From 97da6a822a26080ff05f941f75f0d52e07833a53 Mon Sep 17 00:00:00 2001 From: Geoff Genz Date: Wed, 2 Nov 2022 09:56:02 -0600 Subject: [PATCH] Fix query_context.updated_copy method, add first examples --- .github/workflows/on_push.yml | 1 + CHANGELOG.md | 8 +++ clickhouse_connect/VERSION | 2 +- clickhouse_connect/driver/common.py | 9 +++- clickhouse_connect/driver/query.py | 9 ++-- clickhouse_connect/driverc/creaders.c | 6 +-- examples/pandas_examples.py | 36 ++++++++++++++ examples/params_example.py | 57 ++++++++++++++++++++++ tests/unit_tests/test_driver/test_query.py | 26 ++++++++++ 9 files changed, 145 insertions(+), 9 deletions(-) create mode 100644 examples/pandas_examples.py create mode 100644 examples/params_example.py create mode 100644 tests/unit_tests/test_driver/test_query.py diff --git a/.github/workflows/on_push.yml b/.github/workflows/on_push.yml index 149a4fad..f95072da 100644 --- a/.github/workflows/on_push.yml +++ b/.github/workflows/on_push.yml @@ -34,6 +34,7 @@ jobs: run: | pylint clickhouse_connect pylint tests + pylint examples tests: runs-on: ubuntu-latest diff --git a/CHANGELOG.md b/CHANGELOG.md index e7ff4265..770a9098 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ ## ClickHouse Connect ChangeLog + +### Release 0.3.6 2022-11-02 + +#### Bug Fix +* Update QueryContext.updated_copy method to preserve settings, parameters, etc. https://github.com/ClickHouse/clickhouse-connect/issues/65 + + + ### Release 0.3.5 2022-10-28 #### Improvement diff --git a/clickhouse_connect/VERSION b/clickhouse_connect/VERSION index 09e91570..53b61ecf 100644 --- a/clickhouse_connect/VERSION +++ b/clickhouse_connect/VERSION @@ -1 +1 @@ -0.3.5 \ No newline at end of file +0.3.6 \ No newline at end of file diff --git a/clickhouse_connect/driver/common.py b/clickhouse_connect/driver/common.py index dd0344af..2b2ebbac 100644 --- a/clickhouse_connect/driver/common.py +++ b/clickhouse_connect/driver/common.py @@ -1,7 +1,7 @@ import array import sys -from typing import Tuple, Sequence, MutableSequence +from typing import Tuple, Sequence, MutableSequence, Dict, Optional # pylint: disable=invalid-name must_swap = sys.byteorder == 'big' @@ -171,3 +171,10 @@ def unescape_identifier(x: str) -> str: if x.startswith('`') and x.endswith('`'): return x[1:-1] return x + + +def dict_copy(source: Dict = None, update: Optional[Dict] = None) -> Dict: + copy = source.copy() if source else {} + if update: + copy.update(update) + return copy diff --git a/clickhouse_connect/driver/query.py b/clickhouse_connect/driver/query.py index 6452acc5..783232bc 100644 --- a/clickhouse_connect/driver/query.py +++ b/clickhouse_connect/driver/query.py @@ -7,6 +7,7 @@ from typing import NamedTuple, Any, Tuple, Dict, Sequence, Optional, Union from datetime import date, datetime, tzinfo +from clickhouse_connect.driver.common import dict_copy from clickhouse_connect.json_impl import any_to_json from clickhouse_connect.common import common_settings from clickhouse_connect.datatypes.base import ClickHouseType @@ -114,10 +115,10 @@ def updated_copy(self, Creates Query context copy with parameters overridden/updated as appropriate """ return QueryContext(query or self.query, - self.parameters.update(parameters or {}), - self.settings.update(settings or {}), - self.query_formats.update(query_formats or {}), - self.column_formats.update(column_formats or {}), + dict_copy(self.parameters, parameters), + dict_copy(self.settings, settings), + dict_copy(self.query_formats, query_formats), + dict_copy(self.column_formats, column_formats), encoding if encoding else self.encoding, server_tz if server_tz else self.server_tz, use_none if use_none is not None else self.use_none) diff --git a/clickhouse_connect/driverc/creaders.c b/clickhouse_connect/driverc/creaders.c index b9d9ebb1..59602de2 100644 --- a/clickhouse_connect/driverc/creaders.c +++ b/clickhouse_connect/driverc/creaders.c @@ -967,9 +967,9 @@ static const char *__pyx_filename; static const char *__pyx_f[] = { "clickhouse_connect/driverc/creaders.pyx", - "cc_venv/lib/python3.9/site-packages/Cython/Includes/cpython/type.pxd", - "cc_venv/lib/python3.9/site-packages/Cython/Includes/cpython/bool.pxd", - "cc_venv/lib/python3.9/site-packages/Cython/Includes/cpython/complex.pxd", + "type.pxd", + "bool.pxd", + "complex.pxd", }; /*--- Type declarations ---*/ diff --git a/examples/pandas_examples.py b/examples/pandas_examples.py new file mode 100644 index 00000000..f7e78dae --- /dev/null +++ b/examples/pandas_examples.py @@ -0,0 +1,36 @@ +import pandas as pd +import clickhouse_connect + + +create_table_sql = """ +CREATE TABLE pandas_example +( + `timeseries` DateTime('UTC'), + `int_value` Int32, + `str_value` String, + `float_value` Float64 +) +ENGINE = MergeTree +ORDER BY timeseries +""" + + +def write_pandas_df(): + client = clickhouse_connect.get_client(host='localhost', port='8123', user_name='default', password = '') + client.command('DROP TABLE IF EXISTS pandas_example') + client.command(create_table_sql) + df = pd.DataFrame({'timeseries': ['04/03/2022 10:00:11', '05/03/2022 11:15:44', '06/03/2022 17:14:00'], + 'int_value': [16, 19, 11], + 'str_value': ['String One', 'String Two', 'A Third String'], + 'float_value': [2344.288, -73002.4444, 3.14159]}) + df['timeseries'] = pd.to_datetime(df['timeseries']) + client.insert_df('pandas_example', df) + result_df = client.query_df('SELECT * FROM pandas_example') + print() + print(result_df.dtypes) + print() + print(result_df) + + +if __name__ == '__main__': + write_pandas_df() diff --git a/examples/params_example.py b/examples/params_example.py new file mode 100644 index 00000000..762df080 --- /dev/null +++ b/examples/params_example.py @@ -0,0 +1,57 @@ +from datetime import datetime, timedelta + +from clickhouse_connect.driver.query import finalize_query + +select_template = """ + SELECT + formatDateTime(started_at, '%%m/%%d/%%Y', %(time_zone)s) AS date, + formatDateTime(started_at, '%%I:%%M:%%S %%p', %(time_zone)s) AS time, + format('{}path/link?name={}&dev_type={}', %(web_url)s, label, device_type) AS url, + device_name, + description + FROM sessions +""" + + +def build_device_query(time_zone: str, + web_url: str, + client: str, + company_id: str = '', + device_id: str = '', + updated: bool = False, + start_time: datetime = None, + end_time: datetime = None): + params = {'time_zone': time_zone, + 'web_url': web_url, + 'client': client + } + where_template = ' WHERE client = %(client)s' + if company_id: + where_template += ' AND company_id = %(company_id)s' + params['company_id'] = company_id + if device_id: + where_template += ' AND dev_type = %(device_id)s' + params['device_id'] = device_id + if updated: + where_template += ' AND updated = true' + if start_time and end_time: + where_template += ' AND started_at BETWEEN %(start_time)s AND %(end_time)s' + params['start_time'] = start_time + params['end_time'] = end_time + full_query = select_template + where_template + ' ORDER BY started_at ASC' + return finalize_query(full_query, params) + + +if __name__ == '__main__': + start = datetime.now() + end = start + timedelta(hours=1, minutes=20) + print(build_device_query('UTC', + 'https://example.com', + + client='Client_0', + company_id='Company_1', + device_id='DEVICE_77', + start_time=start, + end_time=end + ) + ) diff --git a/tests/unit_tests/test_driver/test_query.py b/tests/unit_tests/test_driver/test_query.py new file mode 100644 index 00000000..c7769133 --- /dev/null +++ b/tests/unit_tests/test_driver/test_query.py @@ -0,0 +1,26 @@ +from clickhouse_connect.driver.query import QueryContext + + +def test_copy_context(): + settings = {'max_bytes_for_external_group_by': 1024 * 1024 * 100, + 'read_overflow_mode': 'throw'} + parameters = {'user_id': 'user_1'} + query_formats = {'IPv*': 'string'} + context = QueryContext('SELECT source_ip FROM table WHERE user_id = %(user_id)s', + settings=settings, + parameters=parameters, + query_formats=query_formats, + use_none=True) + assert context.use_none is True + assert context.final_query == "SELECT source_ip FROM table WHERE user_id = 'user_1'" + assert context.query_formats['IPv*'] == 'string' + assert context.settings['max_bytes_for_external_group_by'] == 104857600 + + context_copy = context.updated_copy( + settings={'max_bytes_for_external_group_by': 1024 * 1024 * 24, 'max_execution_time': 120}, + parameters={'user_id': 'user_2'} + ) + assert context_copy.settings['read_overflow_mode'] == 'throw' + assert context_copy.settings['max_execution_time'] == 120 + assert context_copy.settings['max_bytes_for_external_group_by'] == 25165824 + assert context_copy.final_query == "SELECT source_ip FROM table WHERE user_id = 'user_2'"