From 42adaa1efabed13a4b672a66fc6110a1eb82db8a Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Wed, 6 Jul 2022 05:23:36 -0600
Subject: [PATCH 01/25] Format control checkpoint

---
 clickhouse_connect/datatypes/base.py         |  13 +-
 clickhouse_connect/datatypes/registry.py     |  26 +++-
 clickhouse_connect/driver/native.py          | 134 ++++++++++---------
 clickhouse_connect/driver/transform.py       |  57 ++++++++
 tests/unit_tests/test_driver/test_formats.py |  11 ++
 5 files changed, 173 insertions(+), 68 deletions(-)
 create mode 100644 clickhouse_connect/driver/transform.py
 create mode 100644 tests/unit_tests/test_driver/test_formats.py

diff --git a/clickhouse_connect/datatypes/base.py b/clickhouse_connect/datatypes/base.py
index d6130e06..484de49e 100644
--- a/clickhouse_connect/datatypes/base.py
+++ b/clickhouse_connect/datatypes/base.py
@@ -1,4 +1,5 @@
 import array
+import threading
 from abc import abstractmethod, ABC
 from math import log
 from typing import NamedTuple, Dict, Type, Any, Sequence, MutableSequence, Optional, Union, Tuple
@@ -15,7 +16,6 @@ class TypeDef(NamedTuple):
     wrappers: tuple = ()
     keys: tuple = ()
     values: tuple = ()
-    format: str = None
 
     @property
     def arg_str(self):
@@ -26,7 +26,7 @@ class ClickHouseType(ABC):
     """
     Base class for all ClickHouseType objects.
     """
-    __slots__ = 'nullable', 'low_card', 'wrappers', 'format', 'type_def', '__dict__'
+    __slots__ = 'nullable', 'low_card', 'wrappers', 'type_def', '__dict__'
     _ch_name = None
     _name_suffix = ''
     np_type = 'O'
@@ -42,6 +42,13 @@ def __init_subclass__(cls, registered: bool = True):
     def build(cls: Type['ClickHouseType'], type_def: TypeDef):
         return cls(type_def)
 
+    @classmethod
+    def read_format(cls):
+        try:
+            return threading.local.ch_read_format(cls)
+        except AttributeError:
+            return 'native'
+
     def __init__(self, type_def: TypeDef):
         """
         Base class constructor that sets Nullable and LowCardinality wrappers and currently assigns the row_binary conversion
@@ -51,8 +58,6 @@ def __init__(self, type_def: TypeDef):
         self.type_def = type_def
         self.wrappers = type_def.wrappers
         self.low_card = 'LowCardinality' in self.wrappers
-        if type_def.format:
-            self.format = type_def.format
         self.nullable = 'Nullable' in self.wrappers
         if self.nullable:
             self.from_row_binary = self._nullable_from_row_binary
diff --git a/clickhouse_connect/datatypes/registry.py b/clickhouse_connect/datatypes/registry.py
index a3b27bc4..3a11ffb3 100644
--- a/clickhouse_connect/datatypes/registry.py
+++ b/clickhouse_connect/datatypes/registry.py
@@ -1,8 +1,9 @@
 import logging
+import re
 
-from typing import Tuple, Dict
+from typing import Tuple, Dict, Sequence, Type, Optional
 from clickhouse_connect.datatypes.base import TypeDef, ClickHouseType, type_map
-from clickhouse_connect.driver.exceptions import InternalError
+from clickhouse_connect.driver.exceptions import InternalError, ProgrammingError
 from clickhouse_connect.driver.parser import parse_enum, parse_callable, parse_columns
 
 logger = logging.getLogger(__name__)
@@ -57,3 +58,24 @@ def get_from_name(name: str) -> ClickHouseType:
             raise InternalError(err_str) from None
         type_cache[name] = ch_type
     return ch_type
+
+
+def matching_types(fmt_map: Optional[Dict[str, str]]) -> Dict[Type[ClickHouseType], str]:
+    if not fmt_map:
+        return {}
+    matches = {}
+    for pattern, fmt in fmt_map.items():
+        if '*' in pattern:
+            re_pattern = re.compile(pattern.replace('*', '.*'), re.IGNORECASE)
+            for type_name, ch_type in type_map.items():
+                if re_pattern.match(type_name):
+                    matches[ch_type] = fmt
+        else:
+            try:
+                matches[type_map[pattern]] = fmt
+            except KeyError:
+                pass
+        if not matches:
+            raise ProgrammingError(f'Unrecognized ClickHouse type {pattern} when setting formats')
+    return matches
+
diff --git a/clickhouse_connect/driver/native.py b/clickhouse_connect/driver/native.py
index b166cd31..78ff5380 100644
--- a/clickhouse_connect/driver/native.py
+++ b/clickhouse_connect/driver/native.py
@@ -1,71 +1,81 @@
-from typing import Any, Sequence
+import threading
+from typing import Any, Sequence, Dict, Union
 
 from clickhouse_connect.datatypes import registry
 from clickhouse_connect.datatypes.base import ClickHouseType
 from clickhouse_connect.driver.common import read_leb128, read_leb128_str, write_leb128
 from clickhouse_connect.driver.query import DataResult
+from clickhouse_connect.driver.transform import DataTransform
 
 
-# pylint: disable=too-many-locals
-def parse_response(source: Sequence, use_none: bool = True) -> DataResult:
-    """
-    Decodes the ClickHouse byte byte buffer response into rows of native Python data
-    :param source: A byte buffer or similar source
-    :param use_none: Use None values for ClickHouse NULLs (otherwise use zero/empty values)
-    :return: DataResult -- data matrix, column names, column types
-    """
-    if not isinstance(source, memoryview):
-        source = memoryview(source)
-    loc = 0
-    names = []
-    col_types = []
-    result = []
-    total_size = len(source)
-    block = 0
-    while loc < total_size:
-        result_block = []
-        num_cols, loc = read_leb128(source, loc)
-        num_rows, loc = read_leb128(source, loc)
-        for col_num in range(num_cols):
-            name, loc = read_leb128_str(source, loc)
-            if block == 0:
-                names.append(name)
-            type_name, loc = read_leb128_str(source, loc)
-            if block == 0:
-                col_type = registry.get_from_name(type_name)
-                col_types.append(col_type)
-            else:
-                col_type = col_types[col_num]
-            column, loc = col_type.read_native_column(source, loc, num_rows, use_none=use_none)
-            result_block.append(column)
-        block += 1
-        result.extend(list(zip(*result_block)))
-    return DataResult(result, tuple(names), tuple(col_types))
+class NativeTransform(DataTransform)
+    # pylint: disable=too-many-locals
+    def parse_response(self, source: Sequence, type_formats: Dict[str, str],
+                       column_formats:Dict[str, Union[str, Dict[str, str]]]) -> DataResult:
+        """
+        Decodes the ClickHouse byte byte buffer response into rows of native Python data
+        :param source: A byte buffer or similar source
+        :param column_formats: Use None values for ClickHouse NULLs (otherwise use zero/empty values)
+        :return: DataResult -- data matrix, column names, column types
+        """
+        threading.local.ch_read_format = self.base_format.read_format
+        if not isinstance(source, memoryview):
+            source = memoryview(source)
+        loc = 0
+        names = []
+        col_types = []
+        result = []
+        total_size = len(source)
+        block = 0
+        while loc < total_size:
+            result_block = []
+            num_cols, loc = read_leb128(source, loc)
+            num_rows, loc = read_leb128(source, loc)
+            for col_num in range(num_cols):
+                name, loc = read_leb128_str(source, loc)
+                if block == 0:
+                    names.append(name)
+                type_name, loc = read_leb128_str(source, loc)
+                if block == 0:
+                    col_type = registry.get_from_name(type_name)
+                    col_types.append(col_type)
+                else:
+                    col_type = col_types[col_num]
+                col_fmt = column_formats.get(name, None)
+                if col_fmt:
+                    if isinstance()
+                else:
+                    self.base_format.read_overrides
+                column, loc = col_type.read_native_column(source, loc, num_rows)
+                result_block.append(column)
+            block += 1
+            result.extend(list(zip(*result_block)))
+        return DataResult(result, tuple(names), tuple(col_types))
 
 
-def build_insert(data: Sequence[Sequence[Any]], *, column_names: Sequence[str],
-                 column_type_names: Sequence[str] = None,
-                 column_types: Sequence[ClickHouseType] = None,
-                 column_oriented: bool = False):
-    """
-    Encoding a dataset of Python sequences into native binary format
-    :param data: Matrix of rows and columns of data
-    :param column_names: Column names of the data to insert
-    :param column_type_names: Column type names of the data
-    :param column_types: Column types used to encode data in ClickHouse native format
-    :param column_oriented: If true the dataset does not need to be "pivoted"
-    :return: bytearray containing the dataset in ClickHouse native insert format
-    """
-    if not column_types:
-        column_types = [registry.get_from_name(name) for name in column_type_names]
-    output = bytearray()
-    columns = data if column_oriented else tuple(zip(*data))
-    write_leb128(len(columns), output)
-    write_leb128(len(columns[0]), output)
-    for col_name, col_type, column in zip(column_names, column_types, columns):
-        write_leb128(len(col_name), output)
-        output += col_name.encode()
-        write_leb128(len(col_type.name), output)
-        output += col_type.name.encode()
-        col_type.write_native_column(column, output)
-    return output
+    def build_insert(data: Sequence[Sequence[Any]], *, column_names: Sequence[str],
+                     column_type_names: Sequence[str] = None,
+                     column_types: Sequence[ClickHouseType] = None,
+                     column_oriented: bool = False):
+        """
+        Encoding a dataset of Python sequences into native binary format
+        :param data: Matrix of rows and columns of data
+        :param column_names: Column names of the data to insert
+        :param column_type_names: Column type names of the data
+        :param column_types: Column types used to encode data in ClickHouse native format
+        :param column_oriented: If true the dataset does not need to be "pivoted"
+        :return: bytearray containing the dataset in ClickHouse native insert format
+        """
+        if not column_types:
+            column_types = [registry.get_from_name(name) for name in column_type_names]
+        output = bytearray()
+        columns = data if column_oriented else tuple(zip(*data))
+        write_leb128(len(columns), output)
+        write_leb128(len(columns[0]), output)
+        for col_name, col_type, column in zip(column_names, column_types, columns):
+            write_leb128(len(col_name), output)
+            output += col_name.encode()
+            write_leb128(len(col_type.name), output)
+            output += col_type.name.encode()
+            col_type.write_native_column(column, output)
+        return output
diff --git a/clickhouse_connect/driver/transform.py b/clickhouse_connect/driver/transform.py
new file mode 100644
index 00000000..338a170c
--- /dev/null
+++ b/clickhouse_connect/driver/transform.py
@@ -0,0 +1,57 @@
+from abc import ABC, abstractmethod
+from typing import Sequence, Dict, Union, Type
+
+from clickhouse_connect.datatypes.base import ClickHouseType
+from clickhouse_connect.datatypes.registry import matching_types
+from clickhouse_connect.driver.query import DataResult
+
+
+class FormatControl:
+
+    def __init__(self,
+                 default_formats: Dict[str, str] = None,
+                 read_formats: Dict[str, str] = None,
+                 write_formats: Dict[str, str] = None):
+        default_formats = matching_types(default_formats)
+        self.read_formats = default_formats.copy()
+        self.read_formats.update(matching_types(read_formats))
+        self.write_formats = default_formats.copy()
+        self.write_formats.update(matching_types(write_formats))
+        self.read_overrides = {}
+        self.write_overrides = {}
+
+    def set_read_overrides(self, read_overrides: Dict[str, str]) -> None:
+        self.read_overrides = matching_types(read_overrides)
+
+    def set_writes_overrides(self, write_overrides: Dict[str, str]) -> None:
+        self.write_overrides = matching_types(write_overrides)
+
+    def read_format(self, ch_type: Type[ClickHouseType]) -> str:
+        return self.read_overrides.get(ch_type, self.read_formats.get(ch_type, 'native'))
+
+    def write_format(self, ch_type: Type[ClickHouseType]) -> str:
+        return self.write_overrides.get(ch_type, self.write_formats.get(ch_type, 'native'))
+
+    def clear_read_overrides(self):
+        self.read_overrides = {}
+
+    def clear_write_override(self):
+        self.write_overrides = {}
+
+
+
+class QueryFormatter:
+    def __init__(self,
+                 type_formats: Dict[str, str] = None,
+                 column_formats: Dict[str, str] = None,
+                 sub_column_formats: Dict[str, Dict[str, str]] = None):
+        pass
+
+
+class DataTransform(ABC):
+
+    def __init__(self, fmt_ctl: FormatControl):
+        self.base_format = fmt_ctl
+
+    def parse_response(self, source: Sequence, type_formats: Dict[column_formats:Dict[str, Union[str, Dict[str, str]]]) -> DataResult:
+        pass
diff --git a/tests/unit_tests/test_driver/test_formats.py b/tests/unit_tests/test_driver/test_formats.py
new file mode 100644
index 00000000..1082326b
--- /dev/null
+++ b/tests/unit_tests/test_driver/test_formats.py
@@ -0,0 +1,11 @@
+from clickhouse_connect.datatypes.network import IPv6
+from clickhouse_connect.datatypes.numeric import Int32
+from clickhouse_connect.datatypes.string import FixedString
+from clickhouse_connect.driver.transform import FormatControl
+
+
+def test_format_control():
+    fmt_ctl = FormatControl(default_formats={'Int32': 'string'}, read_formats={'IP*': 'string'})
+    assert fmt_ctl.read_format(IPv6) == 'string'
+    assert fmt_ctl.write_format(Int32) == 'string'
+    assert fmt_ctl.read_format(FixedString) == 'native'

From 417cd96a5c521860ed8bd1913133190dd0094f76 Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Thu, 7 Jul 2022 14:50:26 -0600
Subject: [PATCH 02/25] Checkpoint on data format handling

---
 clickhouse_connect/datatypes/base.py          | 21 +++--
 clickhouse_connect/datatypes/format.py        | 66 ++++++++++++++
 clickhouse_connect/datatypes/registry.py      | 24 +----
 clickhouse_connect/driver/httpclient.py       | 14 ++-
 clickhouse_connect/driver/native.py           | 36 ++------
 clickhouse_connect/driver/rowbinary.py        | 87 +++++++++---------
 clickhouse_connect/driver/transform.py        | 88 ++++++++++---------
 tests/unit_tests/test_driver/test_formats.py  | 13 +--
 .../test_driver/test_native_fuzz.py           | 12 +--
 .../test_driver/test_native_read.py           |  7 +-
 .../test_driver/test_native_write.py          |  5 +-
 11 files changed, 202 insertions(+), 171 deletions(-)
 create mode 100644 clickhouse_connect/datatypes/format.py

diff --git a/clickhouse_connect/datatypes/base.py b/clickhouse_connect/datatypes/base.py
index 18e9b957..794c17a3 100644
--- a/clickhouse_connect/datatypes/base.py
+++ b/clickhouse_connect/datatypes/base.py
@@ -8,6 +8,9 @@
     write_uint64, low_card_version
 from clickhouse_connect.driver.exceptions import NotSupportedError
 
+ch_read_formats = {}
+ch_write_formats = {}
+
 
 class TypeDef(NamedTuple):
     """
@@ -44,10 +47,13 @@ def build(cls: Type['ClickHouseType'], type_def: TypeDef):
 
     @classmethod
     def read_format(cls):
-        try:
-            return threading.local.ch_read_format(cls)
-        except AttributeError:
-            return 'native'
+        overrides = getattr(threading.local, 'ch_column_overrides', None)
+        if overrides and cls in overrides:
+            return overrides[cls]
+        overrides = getattr(threading.local, 'ch_query_overrides)', None)
+        if overrides and cls in overrides:
+            return overrides[cls]
+        return ch_read_formats.get(cls, 'native')
 
     def __init__(self, type_def: TypeDef):
         """
@@ -120,8 +126,8 @@ def read_native_data(self, source: Sequence, loc: int, num_rows: int, use_none=T
         :param source: Native protocol binary read buffer
         :param loc: Moving location for the read buffer
         :param num_rows: Number of rows expected in the column
-        :param use_none: Use the Python None type for ClickHouse nulls.  Otherwise use the empty or zero type.  Allows support for
-        pandas data frames that do not support None
+        :param use_none: Use the Python None type for ClickHouse nulls.  Otherwise use the empty or zero type.
+         Allows support for pandas data frames that do not support None
         :return: The decoded column plust the updated location pointer
         """
         if self.low_card:
@@ -143,7 +149,8 @@ def read_native_data(self, source: Sequence, loc: int, num_rows: int, use_none=T
     # delegate binary operations to their elements
 
     # pylint: disable=no-self-use
-    def _read_native_binary(self, _source: Sequence, _loc: int, _num_rows: int) -> Tuple[Union[Sequence, MutableSequence], int]:
+    def _read_native_binary(self, _source: Sequence, _loc: int, _num_rows: int) \
+            -> Tuple[Union[Sequence, MutableSequence], int]:
         """
         Lowest level read method for ClickHouseType native data columns
         :param _source: Native protocol binary read buffer
diff --git a/clickhouse_connect/datatypes/format.py b/clickhouse_connect/datatypes/format.py
new file mode 100644
index 00000000..c27856f5
--- /dev/null
+++ b/clickhouse_connect/datatypes/format.py
@@ -0,0 +1,66 @@
+import re
+
+from typing import Dict, Type, Sequence
+
+from clickhouse_connect.datatypes.base import ClickHouseType, type_map, ch_read_formats, ch_write_formats
+from clickhouse_connect.driver import ProgrammingError
+
+
+def set_default_formats(*args, **kwargs):
+    fmt_map = format_map(_convert_arguments(*args, **kwargs))
+    ch_read_formats.update(fmt_map)
+    ch_write_formats.update(fmt_map)
+
+
+def clear_all_formats():
+    ch_read_formats.clear()
+    ch_write_formats.clear()
+
+
+def clear_default_format(pattern: str):
+    for ch_type in _matching_types(pattern):
+        ch_read_formats.pop(ch_type, None)
+        ch_write_formats.pop(ch_type, None)
+
+
+def clear_write_format(pattern: str):
+    for ch_type in _matching_types(pattern):
+        ch_write_formats.pop(ch_type, None)
+
+
+def clear_read_format(pattern: str):
+    for ch_type in _matching_types(pattern):
+        ch_read_formats.pop(ch_type, None)
+
+
+def format_map(fmt_map: Dict[str, str]) -> Dict[Type[ClickHouseType], str]:
+    if not fmt_map:
+        return {}
+    final_map = {}
+    for pattern, fmt in fmt_map.items():
+        matches = _matching_types(pattern)
+        if not matches:
+            raise ProgrammingError(f'Unrecognized ClickHouse type {pattern} when setting formats')
+        for ch_type in matches:
+            final_map[ch_type] = fmt
+    return final_map
+
+
+def _convert_arguments(*args, **kwargs) -> Dict[str, str]:
+    fmt_map = {}
+    try:
+        for x in range(0, len(args), 2):
+            fmt_map[args[x]] = args[x + 1]
+    except (IndexError, TypeError, ValueError):
+        raise ProgrammingError('Invalid type/format arguments for format method')
+    fmt_map.update(kwargs)
+    return fmt_map
+
+
+def _matching_types(pattern: str) -> Sequence[Type[ClickHouseType]]:
+    if '*' in pattern:
+        re_pattern = re.compile(pattern.replace('*', '.*'), re.IGNORECASE)
+        return [ch_type for type_name, ch_type in type_map.items() if re_pattern.match(type_name)]
+    if pattern in type_map:
+        return [type_map[pattern]]
+    return []
diff --git a/clickhouse_connect/datatypes/registry.py b/clickhouse_connect/datatypes/registry.py
index 3a11ffb3..af897884 100644
--- a/clickhouse_connect/datatypes/registry.py
+++ b/clickhouse_connect/datatypes/registry.py
@@ -1,7 +1,6 @@
 import logging
-import re
 
-from typing import Tuple, Dict, Sequence, Type, Optional
+from typing import Tuple, Dict, Type, Optional
 from clickhouse_connect.datatypes.base import TypeDef, ClickHouseType, type_map
 from clickhouse_connect.driver.exceptions import InternalError, ProgrammingError
 from clickhouse_connect.driver.parser import parse_enum, parse_callable, parse_columns
@@ -58,24 +57,3 @@ def get_from_name(name: str) -> ClickHouseType:
             raise InternalError(err_str) from None
         type_cache[name] = ch_type
     return ch_type
-
-
-def matching_types(fmt_map: Optional[Dict[str, str]]) -> Dict[Type[ClickHouseType], str]:
-    if not fmt_map:
-        return {}
-    matches = {}
-    for pattern, fmt in fmt_map.items():
-        if '*' in pattern:
-            re_pattern = re.compile(pattern.replace('*', '.*'), re.IGNORECASE)
-            for type_name, ch_type in type_map.items():
-                if re_pattern.match(type_name):
-                    matches[ch_type] = fmt
-        else:
-            try:
-                matches[type_map[pattern]] = fmt
-            except KeyError:
-                pass
-        if not matches:
-            raise ProgrammingError(f'Unrecognized ClickHouse type {pattern} when setting formats')
-    return matches
-
diff --git a/clickhouse_connect/driver/httpclient.py b/clickhouse_connect/driver/httpclient.py
index 14224d4f..d7aaec41 100644
--- a/clickhouse_connect/driver/httpclient.py
+++ b/clickhouse_connect/driver/httpclient.py
@@ -9,13 +9,13 @@
 from requests.exceptions import RequestException
 
 from clickhouse_connect.datatypes import registry
-from clickhouse_connect.driver import native
-from clickhouse_connect.driver import rowbinary
 from clickhouse_connect.datatypes.base import ClickHouseType
 from clickhouse_connect.driver.client import Client
 from clickhouse_connect.driver.exceptions import DatabaseError, OperationalError, ProgrammingError
 from clickhouse_connect.driver.httpadapter import KeepAliveAdapter
+from clickhouse_connect.driver.native import NativeTransform
 from clickhouse_connect.driver.query import QueryResult, DataResult, format_query_value
+from clickhouse_connect.driver.rowbinary import RowBinaryTransform
 
 logger = logging.getLogger(__name__)
 columns_only_re = re.compile(r'LIMIT 0\s*$', re.IGNORECASE)
@@ -110,15 +110,13 @@ def __init__(self,
 
         if data_format == 'native':
             self.read_format = self.write_format = 'Native'
-            self.build_insert = native.build_insert
-            self.parse_response = native.parse_response
             self.column_inserts = True
+            self.transform = NativeTransform()
         elif data_format in ('row_binary', 'rb'):
             self.read_format = 'RowBinaryWithNamesAndTypes'
             self.write_format = 'RowBinary'
-            self.build_insert = rowbinary.build_insert
-            self.parse_response = rowbinary.parse_response
             self.column_inserts = False
+            self.transform = RowBinaryTransform()
         self.session = session
         self.connect_timeout = connect_timeout
         self.read_timeout = send_receive_timeout
@@ -169,7 +167,7 @@ def query(self, query: str,
             data_result = DataResult([], tuple(names), tuple(types))
         else:
             response = self._raw_request(self._format_query(final_query), params, headers, retries=2)
-            data_result = self.parse_response(response.content, use_none)
+            data_result = self.transform.parse_response(response.content, use_none=use_none)
         summary = {}
         if 'X-ClickHouse-Summary' in response.headers:
             try:
@@ -193,7 +191,7 @@ def data_insert(self,
         params = {'query': f"INSERT INTO {table} ({', '.join(column_names)}) FORMAT {self.write_format}",
                   'database': self.database}
         params.update(self._validate_settings(settings, True))
-        insert_block = self.build_insert(data, column_types=column_types, column_names=column_names,
+        insert_block = self.transform.build_insert(data, column_types=column_types, column_names=column_names,
                                          column_oriented=column_oriented)
         response = self._raw_request(insert_block, params, headers)
         logger.debug('Insert response code: %d, content: %s', response.status_code, response.content)
diff --git a/clickhouse_connect/driver/native.py b/clickhouse_connect/driver/native.py
index 78ff5380..3c738350 100644
--- a/clickhouse_connect/driver/native.py
+++ b/clickhouse_connect/driver/native.py
@@ -1,24 +1,15 @@
-import threading
-from typing import Any, Sequence, Dict, Union
+from typing import Any, Sequence
 
 from clickhouse_connect.datatypes import registry
 from clickhouse_connect.datatypes.base import ClickHouseType
 from clickhouse_connect.driver.common import read_leb128, read_leb128_str, write_leb128
 from clickhouse_connect.driver.query import DataResult
-from clickhouse_connect.driver.transform import DataTransform
+from clickhouse_connect.driver.transform import DataTransform, QueryContext
 
 
-class NativeTransform(DataTransform)
+class NativeTransform(DataTransform):
     # pylint: disable=too-many-locals
-    def parse_response(self, source: Sequence, type_formats: Dict[str, str],
-                       column_formats:Dict[str, Union[str, Dict[str, str]]]) -> DataResult:
-        """
-        Decodes the ClickHouse byte byte buffer response into rows of native Python data
-        :param source: A byte buffer or similar source
-        :param column_formats: Use None values for ClickHouse NULLs (otherwise use zero/empty values)
-        :return: DataResult -- data matrix, column names, column types
-        """
-        threading.local.ch_read_format = self.base_format.read_format
+    def _transform_response(self, source: Sequence, context: QueryContext) -> DataResult:
         if not isinstance(source, memoryview):
             source = memoryview(source)
         loc = 0
@@ -41,31 +32,16 @@ def parse_response(self, source: Sequence, type_formats: Dict[str, str],
                     col_types.append(col_type)
                 else:
                     col_type = col_types[col_num]
-                col_fmt = column_formats.get(name, None)
-                if col_fmt:
-                    if isinstance()
-                else:
-                    self.base_format.read_overrides
-                column, loc = col_type.read_native_column(source, loc, num_rows)
+                column, loc = col_type.read_native_column(source, loc, num_rows, use_none=context.use_none)
                 result_block.append(column)
             block += 1
             result.extend(list(zip(*result_block)))
         return DataResult(result, tuple(names), tuple(col_types))
 
-
-    def build_insert(data: Sequence[Sequence[Any]], *, column_names: Sequence[str],
+    def build_insert(self, data: Sequence[Sequence[Any]], *, column_names: Sequence[str],
                      column_type_names: Sequence[str] = None,
                      column_types: Sequence[ClickHouseType] = None,
                      column_oriented: bool = False):
-        """
-        Encoding a dataset of Python sequences into native binary format
-        :param data: Matrix of rows and columns of data
-        :param column_names: Column names of the data to insert
-        :param column_type_names: Column type names of the data
-        :param column_types: Column types used to encode data in ClickHouse native format
-        :param column_oriented: If true the dataset does not need to be "pivoted"
-        :return: bytearray containing the dataset in ClickHouse native insert format
-        """
         if not column_types:
             column_types = [registry.get_from_name(name) for name in column_type_names]
         output = bytearray()
diff --git a/clickhouse_connect/driver/rowbinary.py b/clickhouse_connect/driver/rowbinary.py
index 701a8b9f..ef0cc65d 100644
--- a/clickhouse_connect/driver/rowbinary.py
+++ b/clickhouse_connect/driver/rowbinary.py
@@ -1,60 +1,55 @@
 import logging
 
-from typing import Any, Sequence, Union
+from typing import Any, Sequence
 
 from clickhouse_connect.datatypes import registry
 from clickhouse_connect.datatypes.base import ClickHouseType
 from clickhouse_connect.driver.common import read_leb128, read_leb128_str
 from clickhouse_connect.driver.exceptions import InterfaceError
 from clickhouse_connect.driver.query import DataResult
+from clickhouse_connect.driver.transform import DataTransform, QueryContext
 
 logger = logging.getLogger(__name__)
 
 
-def parse_response(source: Union[bytes, bytearray, memoryview], _use_none: bool = True) -> DataResult:
-    """
-        Decodes the ClickHouse rowbinary format byte buffer response into rows of native Python data
-        :param source: A byte buffer or similar source
-        :param _use_none: Use None values for ClickHouse NULLs -- Using defaults is not supported by rowbinary,
-            so conversion of nulls for certain outputs (pandas/numpy arrays) will fail
-        :return: DataResult -- data matrix, column names, column types
-        """
-    if not isinstance(source, memoryview):
-        source = memoryview(source)
-    response_size = len(source)
-    loc = 0
-    num_columns, loc = read_leb128(source, loc)
-    names = []
-    for _ in range(num_columns):
-        name, loc = read_leb128_str(source, loc)
-        names.append(name)
-    col_types = []
-    for _ in range(num_columns):
-        col_type, loc = read_leb128_str(source, loc)
-        try:
-            col_types.append(registry.get_from_name(col_type))
-        except KeyError:
-            raise InterfaceError(f'Unknown ClickHouse type returned for type {col_type}') from None
-    convs = tuple(t.from_row_binary for t in col_types)
-    result = []
-    while loc < response_size:
-        row = []
-        for conv in convs:
-            v, loc = conv(source, loc)
-            row.append(v)
-        result.append(row)
-    return DataResult(result, tuple(names), tuple(col_types))
+class RowBinaryTransform(DataTransform):
 
+    def _transform_response(self, source: Sequence, context: QueryContext) -> DataResult:
+        if not isinstance(source, memoryview):
+            source = memoryview(source)
+        response_size = len(source)
+        loc = 0
+        num_columns, loc = read_leb128(source, loc)
+        names = []
+        for _ in range(num_columns):
+            name, loc = read_leb128_str(source, loc)
+            names.append(name)
+        col_types = []
+        for _ in range(num_columns):
+            col_type, loc = read_leb128_str(source, loc)
+            try:
+                col_types.append(registry.get_from_name(col_type))
+            except KeyError:
+                raise InterfaceError(f'Unknown ClickHouse type returned for type {col_type}') from None
+        convs = tuple(t.from_row_binary for t in col_types)
+        result = []
+        while loc < response_size:
+            row = []
+            for conv in convs:
+                v, loc = conv(source, loc)
+                row.append(v)
+            result.append(row)
+        return DataResult(result, tuple(names), tuple(col_types))
 
-def build_insert(data: Sequence[Sequence[Any]], *, column_type_names: Sequence[str] = None,
-                 column_types: Sequence[ClickHouseType] = None, column_oriented: bool = False, **_):
-    if not column_types:
-        column_types = [registry.get_from_name(name) for name in column_type_names]
-    convs = tuple(t.to_row_binary for t in column_types)
-    if column_oriented:
-        data = tuple(zip(*data))
-    output = bytearray()
-    for row in data:
-        for (value, conv) in zip(row, convs):
-            conv(value, output)
-    return output
+    def build_insert(self, data: Sequence[Sequence[Any]], *, column_type_names: Sequence[str] = None,
+                     column_types: Sequence[ClickHouseType] = None, column_oriented: bool = False, **_):
+        if not column_types:
+            column_types = [registry.get_from_name(name) for name in column_type_names]
+        convs = tuple(t.to_row_binary for t in column_types)
+        if column_oriented:
+            data = tuple(zip(*data))
+        output = bytearray()
+        for row in data:
+            for (value, conv) in zip(row, convs):
+                conv(value, output)
+        return output
diff --git a/clickhouse_connect/driver/transform.py b/clickhouse_connect/driver/transform.py
index 338a170c..85cf79ae 100644
--- a/clickhouse_connect/driver/transform.py
+++ b/clickhouse_connect/driver/transform.py
@@ -1,57 +1,59 @@
+import threading
 from abc import ABC, abstractmethod
-from typing import Sequence, Dict, Union, Type
+from typing import Sequence, Dict, Union, Any, Optional
 
 from clickhouse_connect.datatypes.base import ClickHouseType
-from clickhouse_connect.datatypes.registry import matching_types
+from clickhouse_connect.datatypes.format import format_map
 from clickhouse_connect.driver.query import DataResult
 
 
-class FormatControl:
+class QueryContext:
+    def __init__(self, use_none: bool, type_formats: Optional[Dict[str, str]],
+                 column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]]):
+        self.query_overrides = format_map(type_formats)
+        self.use_none = use_none
 
-    def __init__(self,
-                 default_formats: Dict[str, str] = None,
-                 read_formats: Dict[str, str] = None,
-                 write_formats: Dict[str, str] = None):
-        default_formats = matching_types(default_formats)
-        self.read_formats = default_formats.copy()
-        self.read_formats.update(matching_types(read_formats))
-        self.write_formats = default_formats.copy()
-        self.write_formats.update(matching_types(write_formats))
-        self.read_overrides = {}
-        self.write_overrides = {}
+    def __enter__(self):
+        if self.query_overrides:
+            threading.local.ch_query_overrides = self.query_overrides
+        return self
 
-    def set_read_overrides(self, read_overrides: Dict[str, str]) -> None:
-        self.read_overrides = matching_types(read_overrides)
-
-    def set_writes_overrides(self, write_overrides: Dict[str, str]) -> None:
-        self.write_overrides = matching_types(write_overrides)
-
-    def read_format(self, ch_type: Type[ClickHouseType]) -> str:
-        return self.read_overrides.get(ch_type, self.read_formats.get(ch_type, 'native'))
-
-    def write_format(self, ch_type: Type[ClickHouseType]) -> str:
-        return self.write_overrides.get(ch_type, self.write_formats.get(ch_type, 'native'))
-
-    def clear_read_overrides(self):
-        self.read_overrides = {}
-
-    def clear_write_override(self):
-        self.write_overrides = {}
-
-
-
-class QueryFormatter:
-    def __init__(self,
-                 type_formats: Dict[str, str] = None,
-                 column_formats: Dict[str, str] = None,
-                 sub_column_formats: Dict[str, Dict[str, str]] = None):
-        pass
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if self.query_overrides:
+            del threading.local.ch_query_overrides
 
 
 class DataTransform(ABC):
 
-    def __init__(self, fmt_ctl: FormatControl):
-        self.base_format = fmt_ctl
+    def parse_response(self, source: Sequence, type_formats: Dict[str, str] = None, use_none: bool = True,
+                       column_formats: Dict[str, Union[str, Dict[str, str]]] = None) -> DataResult:
+        """
+        Decodes the ClickHouse byte buffer response into rows of native Python data
+        :param source: A byte buffer or similar source
+        :param use_none: Use None python value for ClickHouse nulls (otherwise use type "zero value")
+        :param type_formats:  Dictionary of ClickHouse type names/patterns and response formats
+        :param column_formats: Use None values for ClickHouse NULLs (otherwise use zero/empty values)
+        :return: DataResult -- data matrix, column names, column types
+        """
+        with QueryContext(use_none, type_formats, column_formats) as query_context:
+            return self._transform_response(source, query_context)
+
+    @abstractmethod
+    def build_insert(self, data: Sequence[Sequence[Any]], *, column_names: Sequence[str],
+                     column_type_names: Sequence[str] = None,
+                     column_types: Sequence[ClickHouseType] = None,
+                     column_oriented: bool = False):
+        """
+        Encodes a dataset of Python sequences into a ClickHouse format
+        :param data: Matrix of rows and columns of data
+        :param column_names: Column names of the data to insert
+        :param column_type_names: Column type names of the data
+        :param column_types: Column types used to encode data in ClickHouse native format
+        :param column_oriented: If true the dataset does not need to be "pivoted"
+        :return: bytearray containing the dataset in the appropriate format
+        """
+        pass
 
-    def parse_response(self, source: Sequence, type_formats: Dict[column_formats:Dict[str, Union[str, Dict[str, str]]]) -> DataResult:
+    @abstractmethod
+    def _transform_response(self, source: Sequence, context: QueryContext) -> DataResult:
         pass
diff --git a/tests/unit_tests/test_driver/test_formats.py b/tests/unit_tests/test_driver/test_formats.py
index 1082326b..a03c8156 100644
--- a/tests/unit_tests/test_driver/test_formats.py
+++ b/tests/unit_tests/test_driver/test_formats.py
@@ -1,11 +1,12 @@
+from clickhouse_connect.datatypes.format import clear_all_formats, set_default_formats
 from clickhouse_connect.datatypes.network import IPv6
 from clickhouse_connect.datatypes.numeric import Int32
 from clickhouse_connect.datatypes.string import FixedString
-from clickhouse_connect.driver.transform import FormatControl
 
 
-def test_format_control():
-    fmt_ctl = FormatControl(default_formats={'Int32': 'string'}, read_formats={'IP*': 'string'})
-    assert fmt_ctl.read_format(IPv6) == 'string'
-    assert fmt_ctl.write_format(Int32) == 'string'
-    assert fmt_ctl.read_format(FixedString) == 'native'
+def test_default_formats():
+    clear_all_formats()
+    set_default_formats('Int32', 'string', 'IP*', 'string')
+    assert IPv6.read_format() == 'string'
+    assert Int32.read_format() == 'string'
+    assert FixedString.read_format() == 'native'
diff --git a/tests/unit_tests/test_driver/test_native_fuzz.py b/tests/unit_tests/test_driver/test_native_fuzz.py
index 6670db30..157f9019 100644
--- a/tests/unit_tests/test_driver/test_native_fuzz.py
+++ b/tests/unit_tests/test_driver/test_native_fuzz.py
@@ -1,13 +1,15 @@
 import random
 
 from clickhouse_connect.datatypes.registry import get_from_name
-from clickhouse_connect.driver.native import build_insert, parse_response
+from clickhouse_connect.driver.native import NativeTransform
 from tests.helpers import random_columns, random_data
 
 TEST_RUNS = 200
 TEST_COLUMNS = 12
 MAX_DATA_ROWS = 100
 
+transform = NativeTransform()
+
 
 # pylint: disable=duplicate-code
 def test_native_round_trips():
@@ -18,8 +20,8 @@ def test_native_round_trips():
         col_names = ('row_id',) + col_names
         col_types = (get_from_name('UInt32'),) + col_types
         assert len(data) == data_rows
-        output = build_insert(data, column_names=col_names, column_types=col_types)
-        data_result = parse_response(output)
+        output = transform.build_insert(data, column_names=col_names, column_types=col_types)
+        data_result = transform.parse_response(output)
         assert data_result.column_names == col_names
         assert data_result.column_types == col_types
         dataset = data_result.result
@@ -34,8 +36,8 @@ def test_native_small():
         data = random_data(col_types, 2)
         col_names = ('row_id',) + col_names
         col_types = (get_from_name('UInt32'),) + col_types
-        output = build_insert(data, column_names=col_names, column_types=col_types)
-        data_result = parse_response(output)
+        output = transform.build_insert(data, column_names=col_names, column_types=col_types)
+        data_result = transform.parse_response(output)
         assert data_result.column_names == col_names
         assert data_result.column_types == col_types
         assert data_result.result == data
diff --git a/tests/unit_tests/test_driver/test_native_read.py b/tests/unit_tests/test_driver/test_native_read.py
index 720dec45..3a243a4d 100644
--- a/tests/unit_tests/test_driver/test_native_read.py
+++ b/tests/unit_tests/test_driver/test_native_read.py
@@ -2,7 +2,7 @@
 from uuid import UUID
 
 from clickhouse_connect.datatypes import registry
-from clickhouse_connect.driver.native import parse_response
+from clickhouse_connect.driver.native import NativeTransform
 from tests.helpers import to_bytes
 from tests.unit_tests.test_driver.binary import NESTED_BINARY
 
@@ -54,6 +54,9 @@
 """
 
 
+parse_response = NativeTransform().parse_response
+
+
 def check_result(result, expected, row_num=0, col_num=0):
     result_set = result[0]
     row = result_set[row_num]
@@ -93,5 +96,5 @@ def test_ip():
 
 
 def test_nested():
-    result = parse_response(to_bytes(NESTED_BINARY))
+    result = parse_response (to_bytes(NESTED_BINARY))
     check_result(result, [{'str1': 'one', 'int32': 5}, {'str1': 'two', 'int32': 55}], 2, 0)
diff --git a/tests/unit_tests/test_driver/test_native_write.py b/tests/unit_tests/test_driver/test_native_write.py
index 2242a3d9..3ee25684 100644
--- a/tests/unit_tests/test_driver/test_native_write.py
+++ b/tests/unit_tests/test_driver/test_native_write.py
@@ -1,5 +1,5 @@
 from clickhouse_connect.datatypes.registry import get_from_name
-from clickhouse_connect.driver.native import build_insert
+from clickhouse_connect.driver.native import NativeTransform
 from tests.helpers import to_bytes
 from tests.unit_tests.test_driver.binary import NESTED_BINARY
 
@@ -29,6 +29,9 @@
 """
 
 
+build_insert = NativeTransform().build_insert
+
+
 def test_low_card_null():
     data = [['three']]
     names = ['value']

From b882a66fb41549f45aee327ee17031807f224720 Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Thu, 7 Jul 2022 14:59:17 -0600
Subject: [PATCH 03/25] Lint fixes

---
 clickhouse_connect/datatypes/format.py   | 6 +++---
 clickhouse_connect/datatypes/registry.py | 2 +-
 clickhouse_connect/driver/transform.py   | 3 +--
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/clickhouse_connect/datatypes/format.py b/clickhouse_connect/datatypes/format.py
index c27856f5..5b198e2c 100644
--- a/clickhouse_connect/datatypes/format.py
+++ b/clickhouse_connect/datatypes/format.py
@@ -3,7 +3,7 @@
 from typing import Dict, Type, Sequence
 
 from clickhouse_connect.datatypes.base import ClickHouseType, type_map, ch_read_formats, ch_write_formats
-from clickhouse_connect.driver import ProgrammingError
+from clickhouse_connect.driver.exceptions import ProgrammingError
 
 
 def set_default_formats(*args, **kwargs):
@@ -51,8 +51,8 @@ def _convert_arguments(*args, **kwargs) -> Dict[str, str]:
     try:
         for x in range(0, len(args), 2):
             fmt_map[args[x]] = args[x + 1]
-    except (IndexError, TypeError, ValueError):
-        raise ProgrammingError('Invalid type/format arguments for format method')
+    except (IndexError, TypeError, ValueError) as ex:
+        raise ProgrammingError('Invalid type/format arguments for format method') from ex
     fmt_map.update(kwargs)
     return fmt_map
 
diff --git a/clickhouse_connect/datatypes/registry.py b/clickhouse_connect/datatypes/registry.py
index af897884..1ab80e03 100644
--- a/clickhouse_connect/datatypes/registry.py
+++ b/clickhouse_connect/datatypes/registry.py
@@ -1,6 +1,6 @@
 import logging
 
-from typing import Tuple, Dict, Type, Optional
+from typing import Tuple, Dict
 from clickhouse_connect.datatypes.base import TypeDef, ClickHouseType, type_map
 from clickhouse_connect.driver.exceptions import InternalError, ProgrammingError
 from clickhouse_connect.driver.parser import parse_enum, parse_callable, parse_columns
diff --git a/clickhouse_connect/driver/transform.py b/clickhouse_connect/driver/transform.py
index 85cf79ae..16f2250a 100644
--- a/clickhouse_connect/driver/transform.py
+++ b/clickhouse_connect/driver/transform.py
@@ -9,7 +9,7 @@
 
 class QueryContext:
     def __init__(self, use_none: bool, type_formats: Optional[Dict[str, str]],
-                 column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]]):
+                 _column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]]):
         self.query_overrides = format_map(type_formats)
         self.use_none = use_none
 
@@ -52,7 +52,6 @@ def build_insert(self, data: Sequence[Sequence[Any]], *, column_names: Sequence[
         :param column_oriented: If true the dataset does not need to be "pivoted"
         :return: bytearray containing the dataset in the appropriate format
         """
-        pass
 
     @abstractmethod
     def _transform_response(self, source: Sequence, context: QueryContext) -> DataResult:

From a5387db94ca66ba54e81b1f68d8111436aab7951 Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Thu, 7 Jul 2022 15:02:44 -0600
Subject: [PATCH 04/25] Lint fixes

---
 clickhouse_connect/datatypes/registry.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clickhouse_connect/datatypes/registry.py b/clickhouse_connect/datatypes/registry.py
index 1ab80e03..a3b27bc4 100644
--- a/clickhouse_connect/datatypes/registry.py
+++ b/clickhouse_connect/datatypes/registry.py
@@ -2,7 +2,7 @@
 
 from typing import Tuple, Dict
 from clickhouse_connect.datatypes.base import TypeDef, ClickHouseType, type_map
-from clickhouse_connect.driver.exceptions import InternalError, ProgrammingError
+from clickhouse_connect.driver.exceptions import InternalError
 from clickhouse_connect.driver.parser import parse_enum, parse_callable, parse_columns
 
 logger = logging.getLogger(__name__)

From 6a498092f3bac1415f3656b3ade8845137bbabdf Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Sun, 10 Jul 2022 16:06:08 -0600
Subject: [PATCH 05/25] Format control checkpoint

---
 clickhouse_connect/cc_superset/datatypes.py   | 10 ++--
 clickhouse_connect/datatypes/__init__.py      | 46 ----------------
 clickhouse_connect/datatypes/base.py          | 13 +++--
 clickhouse_connect/datatypes/network.py       | 54 +++++++++----------
 clickhouse_connect/datatypes/numeric.py       | 21 +++-----
 clickhouse_connect/datatypes/special.py       |  7 +--
 clickhouse_connect/datatypes/string.py        | 16 +++---
 tests/conftest.py                             |  8 +++
 tests/integration_tests/docker-compose.yml    |  2 +-
 tests/integration_tests/test_native_fuzz.py   |  2 +
 .../test_sqlalchemy/test_ddl.py               |  4 +-
 tests/unit_tests/test_driver/test_formats.py  |  3 +-
 12 files changed, 70 insertions(+), 116 deletions(-)

diff --git a/clickhouse_connect/cc_superset/datatypes.py b/clickhouse_connect/cc_superset/datatypes.py
index b2e7e3de..82b38727 100644
--- a/clickhouse_connect/cc_superset/datatypes.py
+++ b/clickhouse_connect/cc_superset/datatypes.py
@@ -2,7 +2,7 @@
 
 from superset.utils.core import GenericDataType
 from clickhouse_connect.cc_sqlalchemy.datatypes.base import sqla_type_map
-from clickhouse_connect.datatypes import fixed_string_format, uint64_format, ip_format, uuid_format
+from clickhouse_connect.datatypes.format import set_default_formats
 
 type_mapping = (
     (r'^(FLOAT|DECIMAL|INT|UINT)', GenericDataType.NUMERIC),
@@ -16,10 +16,10 @@ def configure_types():
     Monkey patch the Superset generic_type onto the clickhouse type, also set defaults for certain type formatting to be
     better compatible with superset
     """
-    fixed_string_format('string', 'utf8')
-    uint64_format('signed')
-    ip_format('string')
-    uuid_format('string')
+    set_default_formats(FixedString='string',
+                        IPv4='string',
+                        UInt64='signed',
+                        UUID='string')
     compiled = [(re.compile(pattern, re.IGNORECASE), gen_type) for pattern, gen_type in type_mapping]
     for name, sqla_type in sqla_type_map.items():
         for pattern, gen_type in compiled:
diff --git a/clickhouse_connect/datatypes/__init__.py b/clickhouse_connect/datatypes/__init__.py
index b7896ba5..b9006515 100644
--- a/clickhouse_connect/datatypes/__init__.py
+++ b/clickhouse_connect/datatypes/__init__.py
@@ -8,7 +8,6 @@
 import clickhouse_connect.datatypes.temporal
 import clickhouse_connect.datatypes.registry
 
-from clickhouse_connect.driver.exceptions import ProgrammingError
 
 logger = logging.getLogger(__name__)
 
@@ -22,48 +21,3 @@
 except ImportError:
     logger.warning('Unable to connect optimized C driver functions, falling back to pure Python', exc_info=True)
 
-
-def fixed_string_format(fmt: str, encoding: str = 'utf8'):
-    if fmt == 'string':
-        dt_string.FixedString.format = 'string'
-        dt_string.FixedString.encoding = encoding
-    elif fmt == 'bytes':
-        dt_string.FixedString.format = 'bytes'
-        dt_string.FixedString.encoding = 'utf8'
-    else:
-        raise ProgrammingError(f'Unrecognized fixed string default format {fmt}')
-
-
-def big_int_format(fmt: str):
-    if fmt in ('string', 'int'):
-        dt_numeric.BigInt.format = fmt
-    else:
-        raise ProgrammingError(f'Unrecognized Big Integer default format {fmt}')
-
-
-def uint64_format(fmt: str):
-    if fmt == 'unsigned':
-        dt_numeric.UInt64.format = 'unsigned'
-        dt_numeric.UInt64._array_type = 'Q'
-        dt_numeric.UInt64.np_format = 'u8'
-    elif fmt == 'signed':
-        dt_numeric.UInt64.format = 'signed'
-        dt_numeric.UInt64._array_type = 'q'
-        dt_numeric.UInt64.np_format = 'i8'
-    else:
-        raise ProgrammingError(f'Unrecognized UInt64 default format {fmt}')
-
-
-def uuid_format(fmt: str):
-    if fmt in ('uuid', 'string'):
-        dt_special.UUID.format = fmt
-    else:
-        raise ProgrammingError(f'Unrecognized UUID default format {fmt}')
-
-
-def ip_format(fmt: str):
-    if fmt in ('string', 'ip'):
-        dt_network.IPv4.format = fmt
-        dt_network.IPv6.format = fmt
-    else:
-        raise ProgrammingError(f'Unrecognized IPv4/IPv6 default format {fmt}')
diff --git a/clickhouse_connect/datatypes/base.py b/clickhouse_connect/datatypes/base.py
index 794c17a3..9a57d396 100644
--- a/clickhouse_connect/datatypes/base.py
+++ b/clickhouse_connect/datatypes/base.py
@@ -1,5 +1,7 @@
 import array
 import threading
+import logging
+
 from abc import abstractmethod, ABC
 from math import log
 from typing import NamedTuple, Dict, Type, Any, Sequence, MutableSequence, Optional, Union, Tuple
@@ -8,6 +10,7 @@
     write_uint64, low_card_version
 from clickhouse_connect.driver.exceptions import NotSupportedError
 
+logger = logging.getLogger(__name__)
 ch_read_formats = {}
 ch_write_formats = {}
 
@@ -97,14 +100,15 @@ def write_native_prefix(self, dest: MutableSequence):
 
     def read_native_prefix(self, source: Sequence, loc: int):
         """
-        Read the low cardinality version.  Like the write, this has to happen immediately for container classes
+        Read the low cardinality version.  Like the write method, this has to happen immediately for container classes
         :param source: The native protocol binary read buffer
         :param loc: Moving location pointer for the read buffer
         :return: updated read pointer
         """
         if self.low_card:
             v, loc = read_uint64(source, loc)
-            assert v == low_card_version
+            if v != low_card_version:
+                logger.warning(f'Unexpected low cardinality version {v} reading type {self.name}')
         return loc
 
     def read_native_column(self, source: Sequence, loc: int, num_rows: int, **kwargs) -> Tuple[Sequence, int]:
@@ -296,7 +300,7 @@ def __init_subclass__(cls, registered: bool = True):
         super().__init_subclass__(registered)
         if cls._array_type in ('i', 'I') and int_size == 2:
             cls._array_type = 'L' if cls._array_type.isupper() else 'l'
-        if cls._array_type:
+        if isinstance(cls._array_type, str) and cls._array_type:
             cls._struct_type = '<' + cls._array_type
 
     def _read_native_binary(self, source: Sequence, loc: int, num_rows: int):
@@ -318,7 +322,8 @@ def _write_native_binary(self, column: Union[Sequence, MutableSequence], dest: M
 
 class UnsupportedType(ClickHouseType, ABC, registered=False):
     """
-    Base class for ClickHouse types that can't be serialized/deserialized into Python types.  Mostly useful just for DDL statements
+    Base class for ClickHouse types that can't be serialized/deserialized into Python types.
+    Mostly useful just for DDL statements
     """
     def __init__(self, type_def: TypeDef):
         super().__init__(type_def)
diff --git a/clickhouse_connect/datatypes/network.py b/clickhouse_connect/datatypes/network.py
index 72ff31c8..b485e133 100644
--- a/clickhouse_connect/datatypes/network.py
+++ b/clickhouse_connect/datatypes/network.py
@@ -2,30 +2,29 @@
 from ipaddress import IPv4Address, IPv6Address
 from typing import Union, MutableSequence, Sequence
 
-from clickhouse_connect.datatypes.base import ArrayType, ClickHouseType, TypeDef
+from clickhouse_connect.datatypes.base import ArrayType, ClickHouseType
 from clickhouse_connect.driver.common import write_array, array_column
-from clickhouse_connect.driver.exceptions import ProgrammingError
 
 IPV4_V6_MASK = b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff'
 V6_NULL = bytes(b'\x00' * 16)
+V4_NULL = IPv4Address(0)
 
 
 # pylint: disable=protected-access
 class IPv4(ArrayType):
     _array_type = 'I'
-    python_null = IPv4Address(0)
-    format = 'ip'
-
-    def __init__(self, type_def: TypeDef):
-        super().__init__(type_def)
-        if self.format == 'string':
-            self.python_type = str
-            self.np_type = 'U'
-        elif self.format == 'ip':
-            self.python_type = IPv4Address
-            self.np_type = 'O'
-        else:
-            raise ProgrammingError('Unrecognized output format for IP4 type')
+
+    @property
+    def python_type(self):
+        return str if self.read_format() == 'string' else IPv4Address
+
+    @property
+    def np_type(self):
+        return 'U' if self.read_format() == 'string' else 'O'
+
+    @property
+    def python_null(self):
+        return '' if self.read_format() == 'string' else V4_NULL
 
     def _from_row_binary(self, source: bytes, loc: int):
         ipv4 = IPv4Address.__new__(IPv4Address)
@@ -41,7 +40,7 @@ def _to_row_binary(self, value: [int, IPv4Address, str], dest: bytearray):
             dest += value.to_bytes(4, 'little')
 
     def _read_native_binary(self, source: Sequence, loc: int, num_rows: int):
-        if self.format == 'string':
+        if self.read_format() == 'string':
             return self._from_native_str(source, loc, num_rows)
         return self._from_native_ip(source, loc, num_rows)
 
@@ -76,19 +75,14 @@ def _write_native_binary(self, column: Union[Sequence, MutableSequence], dest: M
 
 # pylint: disable=protected-access
 class IPv6(ClickHouseType):
-    python_null = IPv6Address(0)
-    format = 'ip'
-
-    def __init__(self, type_def: TypeDef):
-        super().__init__(type_def)
-        if self.format == 'string':
-            self.python_type = str
-            self.np_type = 'U'
-        elif self.format == 'ip':
-            self.python_type = IPv6Address
-            self.np_type = 'O'
-        else:
-            raise ProgrammingError('Unrecognized output format for IP6 type')
+
+    @property
+    def python_type(self):
+        return str if self.read_format() == 'string' else IPv6Address
+
+    @property
+    def python_null(self):
+        return '' if self.read_format() == 'string' else V6_NULL
 
     def _from_row_binary(self, source: Sequence, loc: int):
         end = loc + 16
@@ -116,7 +110,7 @@ def _to_row_binary(self, value: Union[str, IPv4Address, IPv6Address, bytes, byte
             dest += value
 
     def _read_native_binary(self, source: Sequence, loc: int, num_rows: int):
-        if self.format == 'string':
+        if self.read_format() == 'string':
             return self._read_native_str(source, loc, num_rows)
         return self._read_native_ip(source, loc, num_rows)
 
diff --git a/clickhouse_connect/datatypes/numeric.py b/clickhouse_connect/datatypes/numeric.py
index 73de49d4..1e65edf2 100644
--- a/clickhouse_connect/datatypes/numeric.py
+++ b/clickhouse_connect/datatypes/numeric.py
@@ -88,18 +88,14 @@ def _to_row_binary(self, value: int, dest: MutableSequence):
 
 
 class UInt64(ArrayType):
-    _array_type = 'Q'
-    np_type = 'u8'
-    format = 'unsigned'
 
-    def __init__(self, type_def: TypeDef):
-        super().__init__(type_def)
-        if self.format == 'unsigned':
-            self._array_type = 'Q'
-            self.np_type = 'u8'
-        else:
-            self._array_type = 'q'
-            self.np_type = 'i8'
+    @property
+    def _array_type(self):
+        return 'Q' if self.read_format() == 'unsigned' else 'q'
+
+    @property
+    def np_type(self):
+        return 'u8' if self.read_format() == 'unsigned' else 'q'
 
     def _from_row_binary(self, source: Sequence, loc: int):
         return suf('<q', source, loc)[0], loc + 8
@@ -112,7 +108,6 @@ def _to_row_binary(self, value: int, dest: MutableSequence):
 class BigInt(ClickHouseType, registered=False):
     _signed = True
     _byte_size = 0
-    format = 'int'
 
     def _read_native_binary(self, source: Sequence, loc: int, num_rows: int):
         signed = self._signed
@@ -121,7 +116,7 @@ def _read_native_binary(self, source: Sequence, loc: int, num_rows: int):
         column = []
         app = column.append
         ifb = int.from_bytes
-        if self.format == 'string':
+        if self.read_format() == 'string':
             for ix in range(loc, end, sz):
                 app(str(ifb(source[ix: ix + sz], 'little', signed=signed)))
         else:
diff --git a/clickhouse_connect/datatypes/special.py b/clickhouse_connect/datatypes/special.py
index e242e50d..d50b31cc 100644
--- a/clickhouse_connect/datatypes/special.py
+++ b/clickhouse_connect/datatypes/special.py
@@ -9,8 +9,9 @@
 
 
 class UUID(ClickHouseType):
-    python_null = PYUUID(int=0)
-    format = 'uuid'
+    @property
+    def python_null(self):
+        return PYUUID(int=0) if self.read_format() == 'uuid' else ''
 
     def _from_row_binary(self, source: bytearray, loc: int):
         int_high, loc = read_uint64(source, loc)
@@ -26,7 +27,7 @@ def _to_row_binary(self, value: PYUUID, dest: bytearray):
         dest += bytes_high + bytes_low
 
     def _read_native_binary(self, source: Sequence, loc: int, num_rows: int):
-        if self.format == 'string':
+        if self.read_format() == 'string':
             return self._read_native_str(source, loc, num_rows)
         return self._read_native_uuid(source, loc, num_rows)
 
diff --git a/clickhouse_connect/datatypes/string.py b/clickhouse_connect/datatypes/string.py
index 36ce95a1..c6ec1151 100644
--- a/clickhouse_connect/datatypes/string.py
+++ b/clickhouse_connect/datatypes/string.py
@@ -78,7 +78,6 @@ def _write_native_binary(self, column: Union[Sequence, MutableSequence], dest: M
 
 class FixedString(ClickHouseType):
     encoding = 'utf8'
-    format = 'bytes'
 
     def __init__(self, type_def: TypeDef):
         super().__init__(type_def)
@@ -88,18 +87,15 @@ def __init__(self, type_def: TypeDef):
         except IndexError:
             pass
         self._name_suffix = type_def.arg_str
-        self._python_null = bytes(b'\x00' * self._byte_size)
-        if self.format == 'bytes':
-            self._to_row_binary = self._to_row_binary_bytes
-        else:
-            self._to_row_binary = self._to_row_binary_str
+        self._empty_bytes = bytes(b'\x00' * self._byte_size)
 
     @property
     def python_null(self):
-        return self._python_null if self.format == 'bytes' else ''
+        return self._empty_bytes if self.read_format() == 'bytes' else ''
 
-    def _to_row_binary(self, value, dest):
-        pass  # Overridden anyway on instance creation
+    @property
+    def _to_row_binary(self):
+        return self._to_row_binary_bytes if self.read_format() == 'bytes' else self._to_row_binary_str
 
     def _from_row_binary(self, source: Sequence, loc: int):
         return bytes(source[loc:loc + self._byte_size]), loc + self._byte_size
@@ -115,7 +111,7 @@ def _to_row_binary_str(self, value, dest: bytearray):
             dest += bytes((0,) * (self._byte_size - len(value)))
 
     def _read_native_binary(self, source: Sequence, loc: int, num_rows: int):
-        if self.format == 'string':
+        if self.read_format() == 'string':
             return self._read_native_str(source, loc, num_rows, self._byte_size, self.encoding)
         return self._read_native_bytes(source, loc, num_rows, self._byte_size)
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 68ff07cc..d7d733eb 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,10 +1,18 @@
 import os
 import time
+import pytest
+
+from clickhouse_connect.datatypes.format import clear_all_formats
 
 os.environ['TZ'] = 'UTC'
 time.tzset()
 
 
+@pytest.fixture(autouse=True)
+def clean_global_state():
+    clear_all_formats()
+
+
 def pytest_addoption(parser):
     parser.addoption('--docker', default=True, action='store_true')
     parser.addoption('--no-docker', dest='docker', action='store_false')
diff --git a/tests/integration_tests/docker-compose.yml b/tests/integration_tests/docker-compose.yml
index 1d3af228..e01ba46e 100644
--- a/tests/integration_tests/docker-compose.yml
+++ b/tests/integration_tests/docker-compose.yml
@@ -2,7 +2,7 @@ version: '3'
 
 services:
   ch_server:
-    image: clickhouse/clickhouse-server:${CLICKHOUSE_IMAGE:-latest}
+    image: ${CLICKHOUSE_IMAGE:-clickhouse/clickhouse-server}:${CLICKHOUSE_VERSION:-latest}
     ports:
       - "10723:8123"
       - "10743:8443"
diff --git a/tests/integration_tests/test_native_fuzz.py b/tests/integration_tests/test_native_fuzz.py
index 7808aa38..e24fc7db 100644
--- a/tests/integration_tests/test_native_fuzz.py
+++ b/tests/integration_tests/test_native_fuzz.py
@@ -16,6 +16,8 @@ def test_query_fuzz(test_client: Client, test_table_engine: str):
     if int(server_major) < 22:
         unsupported_types.add('Date32')
         unsupported_types.add('Bool')
+        unsupported_types.add('UInt128')
+        unsupported_types.add('UUID')
     for _ in range(TEST_RUNS):
         test_client.command('DROP TABLE IF EXISTS fuzz_test')
         data_rows = random.randint(0, MAX_DATA_ROWS)
diff --git a/tests/integration_tests/test_sqlalchemy/test_ddl.py b/tests/integration_tests/test_sqlalchemy/test_ddl.py
index a73c28d7..e83cb0c1 100644
--- a/tests/integration_tests/test_sqlalchemy/test_ddl.py
+++ b/tests/integration_tests/test_sqlalchemy/test_ddl.py
@@ -8,7 +8,7 @@
 
 from tests import helpers
 from clickhouse_connect.cc_sqlalchemy.datatypes.sqltypes import Int8, UInt16, Decimal, Enum16, Float64, Boolean, \
-    FixedString, String, UInt128, UUID, DateTime, DateTime64, LowCardinality, Nullable, Array, AggregateFunction, \
+    FixedString, String, UInt64, UUID, DateTime, DateTime64, LowCardinality, Nullable, Array, AggregateFunction, \
     UInt32, IPv4
 from clickhouse_connect.cc_sqlalchemy.ddl.custom import CreateDatabase, DropDatabase
 from clickhouse_connect.cc_sqlalchemy.ddl.tableengine import engine_map
@@ -50,7 +50,7 @@ def test_create_table(test_engine: Engine, test_db: str, test_table_engine: str)
     table.create(conn)
     conn.execute('DROP TABLE IF EXISTS advanced_table_test')
     table = db.Table('advanced_table_test', metadata,
-                     db.Column('key_col', UInt128),
+                     db.Column('key_col', UInt64),
                      db.Column('uuid_col', UUID),
                      db.Column('dt_col', DateTime),
                      db.Column('ip_col', IPv4),
diff --git a/tests/unit_tests/test_driver/test_formats.py b/tests/unit_tests/test_driver/test_formats.py
index a03c8156..3ddf9979 100644
--- a/tests/unit_tests/test_driver/test_formats.py
+++ b/tests/unit_tests/test_driver/test_formats.py
@@ -1,11 +1,10 @@
-from clickhouse_connect.datatypes.format import clear_all_formats, set_default_formats
+from clickhouse_connect.datatypes.format import set_default_formats
 from clickhouse_connect.datatypes.network import IPv6
 from clickhouse_connect.datatypes.numeric import Int32
 from clickhouse_connect.datatypes.string import FixedString
 
 
 def test_default_formats():
-    clear_all_formats()
     set_default_formats('Int32', 'string', 'IP*', 'string')
     assert IPv6.read_format() == 'string'
     assert Int32.read_format() == 'string'

From b67d6a3b17484b99e8fbca32f01adc2b3c318287 Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Sun, 10 Jul 2022 16:37:49 -0600
Subject: [PATCH 06/25] Fix lint

---
 clickhouse_connect/datatypes/base.py   | 12 +++++++++++-
 clickhouse_connect/datatypes/string.py | 11 ++++++-----
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/clickhouse_connect/datatypes/base.py b/clickhouse_connect/datatypes/base.py
index 9a57d396..3b8fbfd3 100644
--- a/clickhouse_connect/datatypes/base.py
+++ b/clickhouse_connect/datatypes/base.py
@@ -58,6 +58,16 @@ def read_format(cls):
             return overrides[cls]
         return ch_read_formats.get(cls, 'native')
 
+    @classmethod
+    def write_format(cls):
+        overrides = getattr(threading.local, 'ch_column_overrides', None)
+        if overrides and cls in overrides:
+            return overrides[cls]
+        overrides = getattr(threading.local, 'ch_write_overrides)', None)
+        if overrides and cls in overrides:
+            return overrides[cls]
+        return ch_write_formats.get(cls, 'native')
+
     def __init__(self, type_def: TypeDef):
         """
         Base class constructor that sets Nullable and LowCardinality wrappers and currently assigns the row_binary conversion
@@ -108,7 +118,7 @@ def read_native_prefix(self, source: Sequence, loc: int):
         if self.low_card:
             v, loc = read_uint64(source, loc)
             if v != low_card_version:
-                logger.warning(f'Unexpected low cardinality version {v} reading type {self.name}')
+                logger.warning(f'Unexpected low cardinality version %d reading type %s', v, self.name)
         return loc
 
     def read_native_column(self, source: Sequence, loc: int, num_rows: int, **kwargs) -> Tuple[Sequence, int]:
diff --git a/clickhouse_connect/datatypes/string.py b/clickhouse_connect/datatypes/string.py
index c6ec1151..3ad823bf 100644
--- a/clickhouse_connect/datatypes/string.py
+++ b/clickhouse_connect/datatypes/string.py
@@ -88,22 +88,23 @@ def __init__(self, type_def: TypeDef):
             pass
         self._name_suffix = type_def.arg_str
         self._empty_bytes = bytes(b'\x00' * self._byte_size)
+        self.to_row_binary = self._to_rb_internal
 
     @property
     def python_null(self):
         return self._empty_bytes if self.read_format() == 'bytes' else ''
 
-    @property
-    def _to_row_binary(self):
-        return self._to_row_binary_bytes if self.read_format() == 'bytes' else self._to_row_binary_str
-
     def _from_row_binary(self, source: Sequence, loc: int):
         return bytes(source[loc:loc + self._byte_size]), loc + self._byte_size
 
     @staticmethod
-    def _to_row_binary_bytes(value: Sequence, dest: MutableSequence):
+    def _to_row_binary(value: Sequence, dest: MutableSequence):
         dest += value
 
+    @property
+    def _to_rb_internal(self):
+        return self._to_row_binary_str if self.write_format() == 'string' else self._to_row_binary
+
     def _to_row_binary_str(self, value, dest: bytearray):
         value = str.encode(value, self.encoding)
         dest += value

From 3a5ae3b6db0f0563fb3a9e3b93defb986ae62648 Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Sun, 10 Jul 2022 16:44:39 -0600
Subject: [PATCH 07/25] Fix lint

---
 clickhouse_connect/datatypes/__init__.py | 1 -
 clickhouse_connect/datatypes/base.py     | 2 +-
 clickhouse_connect/datatypes/string.py   | 3 +--
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/clickhouse_connect/datatypes/__init__.py b/clickhouse_connect/datatypes/__init__.py
index b9006515..941a2a5d 100644
--- a/clickhouse_connect/datatypes/__init__.py
+++ b/clickhouse_connect/datatypes/__init__.py
@@ -20,4 +20,3 @@
     dt_string.FixedString._read_native_bytes = creaders.read_fixed_string_bytes
 except ImportError:
     logger.warning('Unable to connect optimized C driver functions, falling back to pure Python', exc_info=True)
-
diff --git a/clickhouse_connect/datatypes/base.py b/clickhouse_connect/datatypes/base.py
index 3b8fbfd3..bdc95d86 100644
--- a/clickhouse_connect/datatypes/base.py
+++ b/clickhouse_connect/datatypes/base.py
@@ -118,7 +118,7 @@ def read_native_prefix(self, source: Sequence, loc: int):
         if self.low_card:
             v, loc = read_uint64(source, loc)
             if v != low_card_version:
-                logger.warning(f'Unexpected low cardinality version %d reading type %s', v, self.name)
+                logger.warning('Unexpected low cardinality version %d reading type %s', v, self.name)
         return loc
 
     def read_native_column(self, source: Sequence, loc: int, num_rows: int, **kwargs) -> Tuple[Sequence, int]:
diff --git a/clickhouse_connect/datatypes/string.py b/clickhouse_connect/datatypes/string.py
index 3ad823bf..aab098aa 100644
--- a/clickhouse_connect/datatypes/string.py
+++ b/clickhouse_connect/datatypes/string.py
@@ -97,8 +97,7 @@ def python_null(self):
     def _from_row_binary(self, source: Sequence, loc: int):
         return bytes(source[loc:loc + self._byte_size]), loc + self._byte_size
 
-    @staticmethod
-    def _to_row_binary(value: Sequence, dest: MutableSequence):
+    def _to_row_binary(self, value: Sequence, dest: MutableSequence):
         dest += value
 
     @property

From a7ecf1d287ecd7057548f8f524980b583852dceb Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Mon, 25 Jul 2022 16:00:11 -0600
Subject: [PATCH 08/25] Format checkpoint

---
 clickhouse_connect/datatypes/base.py         | 37 ++++++++++++++------
 clickhouse_connect/datatypes/format.py       | 37 +++++++++++++++-----
 clickhouse_connect/datatypes/network.py      |  2 ++
 clickhouse_connect/datatypes/numeric.py      |  4 ++-
 clickhouse_connect/datatypes/special.py      |  4 ++-
 clickhouse_connect/datatypes/string.py       | 18 ++--------
 tests/unit_tests/test_driver/test_formats.py |  8 ++++-
 7 files changed, 72 insertions(+), 38 deletions(-)

diff --git a/clickhouse_connect/datatypes/base.py b/clickhouse_connect/datatypes/base.py
index bdc95d86..ef925da9 100644
--- a/clickhouse_connect/datatypes/base.py
+++ b/clickhouse_connect/datatypes/base.py
@@ -35,7 +35,10 @@ class ClickHouseType(ABC):
     __slots__ = 'nullable', 'low_card', 'wrappers', 'type_def', '__dict__'
     _ch_name = None
     _name_suffix = ''
-    np_type = 'O'
+    _encoding = 'utf8'
+    np_type = 'O'  # Default to Numpy Object type
+    valid_formats = 'native'
+
     python_null = 0
     python_type = None
 
@@ -49,24 +52,22 @@ def build(cls: Type['ClickHouseType'], type_def: TypeDef):
         return cls(type_def)
 
     @classmethod
-    def read_format(cls):
+    def _active_format(cls, fmt_map: Dict[Type['ClickHouseType'], str]):
         overrides = getattr(threading.local, 'ch_column_overrides', None)
         if overrides and cls in overrides:
             return overrides[cls]
         overrides = getattr(threading.local, 'ch_query_overrides)', None)
         if overrides and cls in overrides:
             return overrides[cls]
-        return ch_read_formats.get(cls, 'native')
+        return fmt_map.get(cls, 'native')
+
+    @classmethod
+    def read_format(cls):
+        return cls._active_format(ch_read_formats)
 
     @classmethod
     def write_format(cls):
-        overrides = getattr(threading.local, 'ch_column_overrides', None)
-        if overrides and cls in overrides:
-            return overrides[cls]
-        overrides = getattr(threading.local, 'ch_write_overrides)', None)
-        if overrides and cls in overrides:
-            return overrides[cls]
-        return ch_write_formats.get(cls, 'native')
+        return cls._active_format(ch_write_formats)
 
     def __init__(self, type_def: TypeDef):
         """
@@ -98,6 +99,16 @@ def name(self):
             name = f'{wrapper}({name})'
         return name
 
+    @property
+    def encoding(self):
+        override = getattr(threading.local, 'ch_column_encoding', None)
+        if override:
+            return override
+        override = getattr(threading.local, 'ch_query_encoding', None)
+        if override:
+            return override
+        return self._encoding
+
     def write_native_prefix(self, dest: MutableSequence):
         """
         This is something of a hack, as the only "prefix" currently used is for the LowCardinality version.  Because of the
@@ -304,6 +315,7 @@ class ArrayType(ClickHouseType, ABC, registered=False):
     _signed = True
     _array_type = None
     _struct_type = None
+    valid_formats = 'string', 'native'
     python_type = int
 
     def __init_subclass__(cls, registered: bool = True):
@@ -314,7 +326,10 @@ def __init_subclass__(cls, registered: bool = True):
             cls._struct_type = '<' + cls._array_type
 
     def _read_native_binary(self, source: Sequence, loc: int, num_rows: int):
-        return array_column(self._array_type, source, loc, num_rows)
+        column, loc =  array_column(self._array_type, source, loc, num_rows)
+        if self.read_format() == 'string':
+            column = [str(x) for x in column]
+        return column, loc
 
     def _write_native_binary(self, column: Union[Sequence, MutableSequence], dest: MutableSequence):
         if column and self.nullable:
diff --git a/clickhouse_connect/datatypes/format.py b/clickhouse_connect/datatypes/format.py
index 5b198e2c..b643e80b 100644
--- a/clickhouse_connect/datatypes/format.py
+++ b/clickhouse_connect/datatypes/format.py
@@ -6,6 +6,10 @@
 from clickhouse_connect.driver.exceptions import ProgrammingError
 
 
+def set_encoding(encoding: str):
+    ClickHouseType._encoding = encoding
+
+
 def set_default_formats(*args, **kwargs):
     fmt_map = format_map(_convert_arguments(*args, **kwargs))
     ch_read_formats.update(fmt_map)
@@ -23,11 +27,21 @@ def clear_default_format(pattern: str):
         ch_write_formats.pop(ch_type, None)
 
 
+def set_write_format(pattern: str, fmt: str):
+    for ch_type in _matching_types(pattern):
+        ch_write_formats[ch_type] = fmt
+
+
 def clear_write_format(pattern: str):
     for ch_type in _matching_types(pattern):
         ch_write_formats.pop(ch_type, None)
 
 
+def set_read_format(pattern: str, fmt: str):
+    for ch_type in _matching_types(pattern):
+        ch_read_formats[ch_type] = fmt
+
+
 def clear_read_format(pattern: str):
     for ch_type in _matching_types(pattern):
         ch_read_formats.pop(ch_type, None)
@@ -38,10 +52,7 @@ def format_map(fmt_map: Dict[str, str]) -> Dict[Type[ClickHouseType], str]:
         return {}
     final_map = {}
     for pattern, fmt in fmt_map.items():
-        matches = _matching_types(pattern)
-        if not matches:
-            raise ProgrammingError(f'Unrecognized ClickHouse type {pattern} when setting formats')
-        for ch_type in matches:
+        for ch_type in _matching_types(pattern, fmt):
             final_map[ch_type] = fmt
     return final_map
 
@@ -57,10 +68,18 @@ def _convert_arguments(*args, **kwargs) -> Dict[str, str]:
     return fmt_map
 
 
-def _matching_types(pattern: str) -> Sequence[Type[ClickHouseType]]:
+def _matching_types(pattern: str, fmt: str = None) -> Sequence[Type[ClickHouseType]]:
     if '*' in pattern:
         re_pattern = re.compile(pattern.replace('*', '.*'), re.IGNORECASE)
-        return [ch_type for type_name, ch_type in type_map.items() if re_pattern.match(type_name)]
-    if pattern in type_map:
-        return [type_map[pattern]]
-    return []
+        matches = [ch_type for type_name, ch_type in type_map.items() if re_pattern.match(type_name)]
+    elif pattern in type_map:
+        matches = [type_map[pattern]]
+    else:
+        matches = []
+    if not matches:
+        ProgrammingError(f'Unrecognized ClickHouse type {pattern} when setting formats')
+    if fmt:
+        invalid = [ch_type.__name__ for ch_type in matches if fmt not in ch_type.valid_formats]
+        if invalid:
+            raise ProgrammingError(f"{fmt} is not a valid format for ClickHouse types {','.join(invalid)}.")
+    return matches
diff --git a/clickhouse_connect/datatypes/network.py b/clickhouse_connect/datatypes/network.py
index b485e133..503e2211 100644
--- a/clickhouse_connect/datatypes/network.py
+++ b/clickhouse_connect/datatypes/network.py
@@ -13,6 +13,7 @@
 # pylint: disable=protected-access
 class IPv4(ArrayType):
     _array_type = 'I'
+    valid_formats = 'string', 'native'
 
     @property
     def python_type(self):
@@ -75,6 +76,7 @@ def _write_native_binary(self, column: Union[Sequence, MutableSequence], dest: M
 
 # pylint: disable=protected-access
 class IPv6(ClickHouseType):
+    valid_formats = 'string', 'native'
 
     @property
     def python_type(self):
diff --git a/clickhouse_connect/datatypes/numeric.py b/clickhouse_connect/datatypes/numeric.py
index 1e65edf2..7e4750d6 100644
--- a/clickhouse_connect/datatypes/numeric.py
+++ b/clickhouse_connect/datatypes/numeric.py
@@ -88,6 +88,7 @@ def _to_row_binary(self, value: int, dest: MutableSequence):
 
 
 class UInt64(ArrayType):
+    valid_formats = 'signed', 'native'
 
     @property
     def _array_type(self):
@@ -108,6 +109,7 @@ def _to_row_binary(self, value: int, dest: MutableSequence):
 class BigInt(ClickHouseType, registered=False):
     _signed = True
     _byte_size = 0
+    valid_formats = 'string', 'native'
 
     def _read_native_binary(self, source: Sequence, loc: int, num_rows: int):
         signed = self._signed
@@ -133,7 +135,7 @@ def _write_native_binary(self, column: Union[Sequence, MutableSequence], dest: M
         signed = self._signed
         empty = bytes(b'\x00' * sz)
         ext = dest.extend
-        if isinstance(first, str):
+        if isinstance(first, str) or self.write_format() == 'string':
             if self.nullable:
                 for x in column:
                     if x:
diff --git a/clickhouse_connect/datatypes/special.py b/clickhouse_connect/datatypes/special.py
index d50b31cc..a9bfb924 100644
--- a/clickhouse_connect/datatypes/special.py
+++ b/clickhouse_connect/datatypes/special.py
@@ -9,6 +9,8 @@
 
 
 class UUID(ClickHouseType):
+    valid_formats = 'string', 'native'
+
     @property
     def python_null(self):
         return PYUUID(int=0) if self.read_format() == 'uuid' else ''
@@ -68,7 +70,7 @@ def _read_native_str(source: Sequence, loc: int, num_rows: int):
     def _write_native_binary(self, column: Union[Sequence, MutableSequence], dest: MutableSequence):
         first = self._first_value(column)
         empty = empty_uuid_b
-        if isinstance(first, str):
+        if isinstance(first, str) or self.write_format() == 'string':
             for v in column:
                 if v:
                     x = int(v, 16)
diff --git a/clickhouse_connect/datatypes/string.py b/clickhouse_connect/datatypes/string.py
index aab098aa..45c0ef78 100644
--- a/clickhouse_connect/datatypes/string.py
+++ b/clickhouse_connect/datatypes/string.py
@@ -5,16 +5,8 @@
 
 
 class String(ClickHouseType):
-    encoding = 'utf8'
     python_null = ''
 
-    def __init__(self, type_def: TypeDef):
-        super().__init__(type_def)
-        try:
-            self.encoding = type_def.values[0]
-        except IndexError:
-            pass
-
     def _from_row_binary(self, source, loc):
         length, loc = read_leb128(source, loc)
         return str(source[loc:loc + length], self.encoding), loc + length
@@ -77,22 +69,18 @@ def _write_native_binary(self, column: Union[Sequence, MutableSequence], dest: M
 
 
 class FixedString(ClickHouseType):
-    encoding = 'utf8'
+    valid_formats = 'string', 'native'
 
     def __init__(self, type_def: TypeDef):
         super().__init__(type_def)
         self._byte_size = type_def.values[0]
-        try:
-            self.encoding = type_def.values[1]
-        except IndexError:
-            pass
         self._name_suffix = type_def.arg_str
         self._empty_bytes = bytes(b'\x00' * self._byte_size)
         self.to_row_binary = self._to_rb_internal
 
     @property
     def python_null(self):
-        return self._empty_bytes if self.read_format() == 'bytes' else ''
+        return self._empty_bytes if self.read_format() == 'native' else ''
 
     def _from_row_binary(self, source: Sequence, loc: int):
         return bytes(source[loc:loc + self._byte_size]), loc + self._byte_size
@@ -140,7 +128,7 @@ def _write_native_binary(self, column: Union[Sequence, MutableSequence], dest: M
         str_enc = str.encode
         enc = self.encoding
         first = self._first_value(column)
-        if isinstance(first, str):
+        if isinstance(first, str) or self.write_format() == 'string':
             if self.nullable:
                 for x in column:
                     if x is None:
diff --git a/tests/unit_tests/test_driver/test_formats.py b/tests/unit_tests/test_driver/test_formats.py
index 3ddf9979..eee76926 100644
--- a/tests/unit_tests/test_driver/test_formats.py
+++ b/tests/unit_tests/test_driver/test_formats.py
@@ -1,4 +1,4 @@
-from clickhouse_connect.datatypes.format import set_default_formats
+from clickhouse_connect.datatypes.format import set_default_formats, set_write_format
 from clickhouse_connect.datatypes.network import IPv6
 from clickhouse_connect.datatypes.numeric import Int32
 from clickhouse_connect.datatypes.string import FixedString
@@ -9,3 +9,9 @@ def test_default_formats():
     assert IPv6.read_format() == 'string'
     assert Int32.read_format() == 'string'
     assert FixedString.read_format() == 'native'
+
+
+def test_fixed_str_format():
+    set_write_format('FixedString', 'string')
+
+

From f2a429953f977cf141172aeaa49f1df2bbf63a55 Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Mon, 25 Jul 2022 19:54:44 -0600
Subject: [PATCH 09/25] Fix named tuples

---
 clickhouse_connect/datatypes/container.py |  3 +-
 clickhouse_connect/datatypes/format.py    |  4 +-
 clickhouse_connect/datatypes/registry.py  |  3 ++
 clickhouse_connect/driver/client.py       |  6 +--
 clickhouse_connect/driver/parser.py       | 66 +++++++++++------------
 clickhouse_connect/driver/query.py        |  5 +-
 tests/integration_tests/conftest.py       |  3 ++
 tests/integration_tests/test_formats.py   | 16 ++++--
 tests/unit_tests/test_chtypes.py          |  5 ++
 9 files changed, 64 insertions(+), 47 deletions(-)

diff --git a/clickhouse_connect/datatypes/container.py b/clickhouse_connect/datatypes/container.py
index 6b492015..382eacdc 100644
--- a/clickhouse_connect/datatypes/container.py
+++ b/clickhouse_connect/datatypes/container.py
@@ -84,11 +84,12 @@ def write_native_data(self, column: Sequence, dest: MutableSequence):
 
 
 class Tuple(ClickHouseType):
-    _slots = 'element_types', 'from_rb_funcs', 'to_rb_funcs'
+    _slots = 'element_names', 'element_types', 'from_rb_funcs', 'to_rb_funcs'
     python_type = tuple
 
     def __init__(self, type_def: TypeDef):
         super().__init__(type_def)
+        self.element_names = type_def.keys
         self.element_types = [get_from_name(name) for name in type_def.values]
         self.from_rb_funcs = tuple((t.from_row_binary for t in self.element_types))
         self.to_rb_funcs = tuple((t.to_row_binary for t in self.element_types))
diff --git a/clickhouse_connect/datatypes/format.py b/clickhouse_connect/datatypes/format.py
index b643e80b..9cbab1f9 100644
--- a/clickhouse_connect/datatypes/format.py
+++ b/clickhouse_connect/datatypes/format.py
@@ -6,8 +6,8 @@
 from clickhouse_connect.driver.exceptions import ProgrammingError
 
 
-def set_encoding(encoding: str):
-    ClickHouseType._encoding = encoding
+def default_encoding(encoding: str):
+    ClickHouseType._encoding = encoding  # pylint: disable=protected-access
 
 
 def set_default_formats(*args, **kwargs):
diff --git a/clickhouse_connect/datatypes/registry.py b/clickhouse_connect/datatypes/registry.py
index a3b27bc4..47da6e05 100644
--- a/clickhouse_connect/datatypes/registry.py
+++ b/clickhouse_connect/datatypes/registry.py
@@ -32,6 +32,9 @@ def parse_name(name: str) -> Tuple[str, str, TypeDef]:
     elif base.startswith('Nested'):
         keys, values = parse_columns(base[6:])
         base = 'Nested'
+    elif base.startswith('Tuple'):
+        keys, values = parse_columns(base[5:])
+        base = 'Tuple'
     else:
         try:
             base, values, _ = parse_callable(base)
diff --git a/clickhouse_connect/driver/client.py b/clickhouse_connect/driver/client.py
index 0c48ae82..86109144 100644
--- a/clickhouse_connect/driver/client.py
+++ b/clickhouse_connect/driver/client.py
@@ -131,17 +131,17 @@ def query_arrow(self,
                     settings: Optional[Dict[str, Any]] = None,
                     use_strings: bool = True):
         """
-        Query method using the ClickHouse ArrowStream format to return a PyArrow result
+        Query method using the ClickHouse Arrow format to return a PyArrow table
         :param query: Query statement/format string
         :param parameters: Optional dictionary used to format the query
         :param settings: Optional dictionary of ClickHouse settings (key/string values)
         :param use_strings:  Convert ClickHouse String type to Arrow string type (instead of binary)
-        :return: Tuple of the PyArrow schema and a single record batch
+        :return: PyArrow.Table
         """
         arrow_settings = {} if not settings else settings.copy()
         if 'output_format_arrow_string_as_string' not in arrow_settings:
             arrow_settings['output_format_arrow_string_as_string'] = '1' if use_strings else '0'
-        return to_arrow(self.raw_query(query, parameters, arrow_settings, 'ArrowStream'))
+        return to_arrow(self.raw_query(query, parameters, arrow_settings, 'Arrow'))
 
     @abstractmethod
     def command(self,
diff --git a/clickhouse_connect/driver/parser.py b/clickhouse_connect/driver/parser.py
index b74d31dd..76ce66b1 100644
--- a/clickhouse_connect/driver/parser.py
+++ b/clickhouse_connect/driver/parser.py
@@ -124,44 +124,42 @@ def parse_columns(expr: str):
     names = []
     columns = []
     pos = 1
-    in_column = False
+    named = False
     level = 0
-    name = []
-    column = ''
+    label = ''
     in_str = False
     while True:
         char = expr[pos]
         pos += 1
-        if in_column:
-            if in_str:
-                column += ''
-                if "'" == char:
-                    in_str = False
-                elif char == '\\' and expr[pos] == "'" and expr[pos:pos + 4] != "' = " and expr[pos:pos + 2] != "')":
-                    column += expr[pos]
-                    pos += 1
-            else:
-                if level == 0:
-                    if char == ',':
-                        columns.append(column)
-                        column = ''
-                        in_column = False
-                        continue
-                    if char == ')':
-                        columns.append(column)
-                        break
-                if char == "'" and (not column or 'Enum' in column):
-                    in_str = True
-                if char == '(':
-                    level += 1
-                elif char == ')':
-                    level -= 1
-            column += char
-        elif char == ' ':
-            if name:
-                names.append(''.join(name))
-                name = []
-                in_column = True
+        if in_str:
+            if "'" == char:
+                in_str = False
+            elif char == '\\' and expr[pos] == "'" and expr[pos:pos + 4] != "' = " and expr[pos:pos + 2] != "')":
+                label += expr[pos]
+                pos += 1
         else:
-            name.append(char)
+            if level == 0:
+                if char == ' ':
+                    if label:
+                        names.append(label)
+                        label = ''
+                        named = True
+                    char = ''
+                elif char == ',':
+                    columns.append(label)
+                    if not named:
+                        names.append('')
+                    named = False
+                    label = ''
+                    continue
+                elif char == ')':
+                    columns.append(label)
+                    break
+            if char == "'" and (not label or 'Enum' in label):
+                in_str = True
+            elif char == '(':
+                level += 1
+            elif char == ')':
+                level -= 1
+        label += char
     return tuple(names), tuple(columns)
diff --git a/clickhouse_connect/driver/query.py b/clickhouse_connect/driver/query.py
index c7c2cd37..f37bf974 100644
--- a/clickhouse_connect/driver/query.py
+++ b/clickhouse_connect/driver/query.py
@@ -115,6 +115,5 @@ def from_pandas_df(df: 'pa.DataFrame'):
 
 def to_arrow(content: bytes):
     check_arrow()
-    buf = pyarrow.BufferReader(content)
-    schema = pyarrow.read_schema(buf)
-    return schema, pyarrow.read_record_batch(buf, schema)
+    reader = pyarrow.RecordBatchFileReader(content)
+    return reader.read_all()
diff --git a/tests/integration_tests/conftest.py b/tests/integration_tests/conftest.py
index b9e17313..58396f68 100644
--- a/tests/integration_tests/conftest.py
+++ b/tests/integration_tests/conftest.py
@@ -69,6 +69,9 @@ def test_client_fixture(test_config: TestConfig, test_db: str) -> Iterator[Clien
     if test_config.use_docker:
         run_cmd(['docker-compose', '-f', compose_file, 'down', '-v'])
         sys.stderr.write('Starting docker compose')
+        pull_result = run_cmd(['docker-compose', '-f', compose_file, 'pull'])
+        if pull_result[0]:
+            raise Exception(f'Failed to pull latest docker image(s): {pull_result[2]}')
         up_result = run_cmd(['docker-compose', '-f', compose_file, 'up', '-d'])
         if up_result[0]:
             raise Exception(f'Failed to start docker: {up_result[2]}')
diff --git a/tests/integration_tests/test_formats.py b/tests/integration_tests/test_formats.py
index c7abc03f..4b344003 100644
--- a/tests/integration_tests/test_formats.py
+++ b/tests/integration_tests/test_formats.py
@@ -7,12 +7,20 @@
 def test_arrow(test_client: Client):
     if not HAS_ARROW:
         pytest.skip('PyArrow package not available')
-    arrow_schema, arrow_batch = test_client.query_arrow('SELECT database, name, total_rows FROM system.tables')
+    arrow_table = test_client.query_arrow('SELECT database, name, total_rows FROM system.tables')
+    arrow_schema = arrow_table.schema
     assert arrow_schema.field(0).name == 'database'
-    assert arrow_schema.field(2).type.id == 8
+    assert arrow_schema.field(1).type.id == 13
     assert arrow_schema.field(2).type.bit_width == 64
-    assert arrow_batch.num_rows > 20
-    assert len(arrow_batch.columns) == 3
+    assert arrow_table.num_rows > 20
+    assert len(arrow_table.columns) == 3
+
+    arrow_table = test_client.query_arrow('SELECT number from system.numbers LIMIT 500',
+                                          settings={'max_block_size': 50})
+    arrow_schema = arrow_table.schema
+    assert arrow_schema.field(0).name == 'number'
+    assert arrow_schema.field(0).type.id == 8
+    assert arrow_table.num_rows == 500
 
 
 def test_numpy(test_client: Client):
diff --git a/tests/unit_tests/test_chtypes.py b/tests/unit_tests/test_chtypes.py
index fd096ab7..df66018c 100644
--- a/tests/unit_tests/test_chtypes.py
+++ b/tests/unit_tests/test_chtypes.py
@@ -36,3 +36,8 @@ def test_nested_parse():
     nested_name = f'Nested({nest})'
     nested_type = gfn(nested_name)
     assert nested_type.name == nested_name
+
+
+def test_named_tuple():
+    pass
+

From 9adc694d52ddd112f9df70dc35dd724d3d3bc72f Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Tue, 26 Jul 2022 04:35:49 -0600
Subject: [PATCH 10/25] Named tuple tweaks and test

---
 clickhouse_connect/datatypes/container.py | 5 ++++-
 clickhouse_connect/driver/parser.py       | 7 +++----
 tests/integration_tests/test_formats.py   | 4 ++--
 tests/unit_tests/test_chtypes.py          | 6 ++++--
 4 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/clickhouse_connect/datatypes/container.py b/clickhouse_connect/datatypes/container.py
index 382eacdc..84a7ea47 100644
--- a/clickhouse_connect/datatypes/container.py
+++ b/clickhouse_connect/datatypes/container.py
@@ -93,7 +93,10 @@ def __init__(self, type_def: TypeDef):
         self.element_types = [get_from_name(name) for name in type_def.values]
         self.from_rb_funcs = tuple((t.from_row_binary for t in self.element_types))
         self.to_rb_funcs = tuple((t.to_row_binary for t in self.element_types))
-        self._name_suffix = type_def.arg_str
+        if self.element_names:
+            self._name_suffix = f"({', '.join(k + ' ' + str(v) for k, v in zip(type_def.keys, type_def.values))})"
+        else:
+            self._name_suffix = type_def.arg_str
 
     def _from_row_binary(self, source: bytes, loc: int):
         values = []
diff --git a/clickhouse_connect/driver/parser.py b/clickhouse_connect/driver/parser.py
index 76ce66b1..0064302c 100644
--- a/clickhouse_connect/driver/parser.py
+++ b/clickhouse_connect/driver/parser.py
@@ -117,7 +117,8 @@ def parse_enum(expr) -> Tuple[Tuple[str], Tuple[int]]:
 
 def parse_columns(expr: str):
     """
-    Parse a ClickHouse column list of the form (col1 String, col2 Array(Tuple(String, Int32)))
+    Parse a ClickHouse column list of the form (col1 String, col2 Array(Tuple(String, Int32))).  This also handles
+    unnamed columns (such as Tuple definitions).  Mixed named and unnamed columns are not currently supported.
     :param expr: ClickHouse enum expression/arguments
     :return: Parallel tuples of column types and column types (strings)
     """
@@ -140,15 +141,13 @@ def parse_columns(expr: str):
         else:
             if level == 0:
                 if char == ' ':
-                    if label:
+                    if label and not named:
                         names.append(label)
                         label = ''
                         named = True
                     char = ''
                 elif char == ',':
                     columns.append(label)
-                    if not named:
-                        names.append('')
                     named = False
                     label = ''
                     continue
diff --git a/tests/integration_tests/test_formats.py b/tests/integration_tests/test_formats.py
index 4b344003..28ddf82c 100644
--- a/tests/integration_tests/test_formats.py
+++ b/tests/integration_tests/test_formats.py
@@ -7,10 +7,10 @@
 def test_arrow(test_client: Client):
     if not HAS_ARROW:
         pytest.skip('PyArrow package not available')
-    arrow_table = test_client.query_arrow('SELECT database, name, total_rows FROM system.tables')
+    arrow_table = test_client.query_arrow('SELECT database, name, total_rows FROM system.tables', use_strings=False)
     arrow_schema = arrow_table.schema
     assert arrow_schema.field(0).name == 'database'
-    assert arrow_schema.field(1).type.id == 13
+    assert arrow_schema.field(1).type.id == 14
     assert arrow_schema.field(2).type.bit_width == 64
     assert arrow_table.num_rows > 20
     assert len(arrow_table.columns) == 3
diff --git a/tests/unit_tests/test_chtypes.py b/tests/unit_tests/test_chtypes.py
index df66018c..140cdb1d 100644
--- a/tests/unit_tests/test_chtypes.py
+++ b/tests/unit_tests/test_chtypes.py
@@ -39,5 +39,7 @@ def test_nested_parse():
 
 
 def test_named_tuple():
-    pass
-
+    tuple_type = gfn('Tuple(Int64, String)')
+    assert tuple_type.name == 'Tuple(Int64, String)'
+    tuple_type = gfn('Tuple(key Int64, value String)')
+    assert tuple_type.name == 'Tuple(key Int64, value String)'

From 454e5151c71a565453bc08e216ee6d7a54184fd0 Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Tue, 26 Jul 2022 05:09:50 -0600
Subject: [PATCH 11/25] Fix lint

---
 .../{test_formats.py => test_data_libraries.py}                | 1 -
 tests/unit_tests/test_driver/test_formats.py                   | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)
 rename tests/integration_tests/{test_formats.py => test_data_libraries.py} (99%)

diff --git a/tests/integration_tests/test_formats.py b/tests/integration_tests/test_data_libraries.py
similarity index 99%
rename from tests/integration_tests/test_formats.py
rename to tests/integration_tests/test_data_libraries.py
index 28ddf82c..c2c2c81b 100644
--- a/tests/integration_tests/test_formats.py
+++ b/tests/integration_tests/test_data_libraries.py
@@ -14,7 +14,6 @@ def test_arrow(test_client: Client):
     assert arrow_schema.field(2).type.bit_width == 64
     assert arrow_table.num_rows > 20
     assert len(arrow_table.columns) == 3
-
     arrow_table = test_client.query_arrow('SELECT number from system.numbers LIMIT 500',
                                           settings={'max_block_size': 50})
     arrow_schema = arrow_table.schema
diff --git a/tests/unit_tests/test_driver/test_formats.py b/tests/unit_tests/test_driver/test_formats.py
index eee76926..a9d79b92 100644
--- a/tests/unit_tests/test_driver/test_formats.py
+++ b/tests/unit_tests/test_driver/test_formats.py
@@ -13,5 +13,4 @@ def test_default_formats():
 
 def test_fixed_str_format():
     set_write_format('FixedString', 'string')
-
-
+    assert FixedString.write_format() == 'string'

From 24552563be4ce055a6611eb5b20a93d10eb6a04e Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Wed, 27 Jul 2022 17:29:25 -0600
Subject: [PATCH 12/25] JSON support checkpoint

---
 README.md                                     |  4 +-
 .../cc_sqlalchemy/sql/__init__.py             |  4 +-
 clickhouse_connect/datatypes/container.py     | 53 ++++++++++++++++---
 clickhouse_connect/driver/client.py           | 25 +++++++++
 clickhouse_connect/driver/common.py           |  8 ++-
 clickhouse_connect/driver/httpclient.py       |  9 +++-
 clickhouse_connect/driver/parser.py           |  5 +-
 clickhouse_connect/json_impl.py               | 17 ++++++
 tests/integration_tests/conftest.py           |  2 +
 tests/integration_tests/test_native.py        | 33 +++++++++++-
 tests/integration_tests/test_native_fuzz.py   |  3 +-
 11 files changed, 143 insertions(+), 20 deletions(-)
 create mode 100644 clickhouse_connect/json_impl.py

diff --git a/README.md b/README.md
index 14f06747..dc27a96b 100644
--- a/README.md
+++ b/README.md
@@ -104,8 +104,8 @@ Create a ClickHouse client using the `clickhouse_connect.driver.create_client(..
   Native format is preferred for performance reasons
 * `query_limit:int` LIMIT value added to all queries.  
   Defaults to 5,000 rows.  Unlimited queries are not supported to prevent crashing the driver
-* `connect_timeout:int` HTTP connection timeout in seconds
-* `send_receive_timeout:int` HTTP read timeout in seconds
+* `connect_timeout:int` HTTP connection timeout in seconds.  Default 10 seconds.
+* `send_receive_timeout:int` HTTP read timeout in seconds.  Default 300 seconds.
 * `client_name:str` HTTP User-Agent header.  Defaults to `clickhouse-connect`
 * `verify:bool` For HTTPS connections, validate the ClickHouse server TLS certificate, including
   matching hostname, expiration, and signed by a trusted Certificate Authority. Defaults to True.
diff --git a/clickhouse_connect/cc_sqlalchemy/sql/__init__.py b/clickhouse_connect/cc_sqlalchemy/sql/__init__.py
index 19af42e1..e4040382 100644
--- a/clickhouse_connect/cc_sqlalchemy/sql/__init__.py
+++ b/clickhouse_connect/cc_sqlalchemy/sql/__init__.py
@@ -1,11 +1,11 @@
-import re
 from typing import Optional
 
 from sqlalchemy import Table
 from sqlalchemy.sql.compiler import RESERVED_WORDS
 
+from clickhouse_connect.driver.common import identifier_re
+
 reserved_words = RESERVED_WORDS | set('index')
-identifier_re = re.compile(r'^[a-zA-Z_][0-9a-zA-Z_]*$')
 
 
 def quote_id(v: str) -> str:
diff --git a/clickhouse_connect/datatypes/container.py b/clickhouse_connect/datatypes/container.py
index 84a7ea47..d3e01ce1 100644
--- a/clickhouse_connect/datatypes/container.py
+++ b/clickhouse_connect/datatypes/container.py
@@ -1,9 +1,11 @@
 import array
-from typing import Dict, Sequence, MutableSequence
+from typing import Dict, Sequence, MutableSequence, Any
 
-from clickhouse_connect.datatypes.base import UnsupportedType, ClickHouseType, TypeDef
-from clickhouse_connect.driver.common import read_leb128, to_leb128, array_column, must_swap
+from clickhouse_connect.datatypes.base import UnsupportedType, ClickHouseType, TypeDef, EMPTY_TYPE_DEF
+from clickhouse_connect.datatypes.string import String
+from clickhouse_connect.driver.common import read_leb128, to_leb128, array_column, must_swap, write_uint64
 from clickhouse_connect.datatypes.registry import get_from_name
+from clickhouse_connect.json_impl import json_impl
 
 
 class Array(ClickHouseType):
@@ -116,9 +118,16 @@ def read_native_prefix(self, source: Sequence, loc: int):
 
     def read_native_data(self, source: Sequence, loc: int, num_rows: int, use_none = True):
         columns = []
+        e_names = self.element_names
         for e_type in self.element_types:
             column, loc = e_type.read_native_data(source, loc, num_rows, use_none)
             columns.append(column)
+        if e_names and self.read_format != 'tuple':
+            dicts = [{} for _ in range(num_rows)]
+            for ix, x in enumerate(dicts):
+                for n, key in enumerate(e_names):
+                    x[key] = columns[n][ix]
+            return dicts, loc
         return tuple(zip(*columns)), loc
 
     def write_native_prefix(self, dest: MutableSequence):
@@ -239,13 +248,41 @@ def write_native_data(self, column: Sequence, dest: MutableSequence):
         self.tuple_array.write_native_data(data, dest)
 
 
-class Object(UnsupportedType):
+class JSON(ClickHouseType):
     python_type = dict
 
-    def __init__(self, type_def):
-        super().__init__(type_def)
-        self._name_suffix = type_def.arg_str
+    def _to_row_binary(self, value: Any, dest: MutableSequence):
+        value = bytes(json_impl.dumps(value))
+        dest += to_leb128(len(value)) + value
 
+    def _from_row_binary(self, source: Sequence, loc: int):
+        length, loc = read_leb128(source, loc)
+        return json_impl.loads(str(source[loc:loc + length])), loc + length
+
+    def write_native_prefix(self, dest: MutableSequence):
+        dest.append(0x01)
 
-class JSON(UnsupportedType):
+    def write_native_data(self, column: Sequence, dest: MutableSequence):
+        app = dest.append
+        to_json = json_impl.dumps
+        for x in column:
+            v = to_json(x)
+            sz = len(v)
+            while True:
+                b = sz & 0x7f
+                sz >>= 7
+                if sz == 0:
+                    app(b)
+                    break
+                app(0x80 | b)
+            dest += v
+
+
+class Object(JSON):
     python_type = dict
+
+    def __init__(self, type_def):
+        if type_def.values[0].lower() != "'json'":
+            raise NotImplementedError('Only json Object type is currently supported')
+        super().__init__(type_def)
+        self._name_suffix = type_def.arg_str
diff --git a/clickhouse_connect/driver/client.py b/clickhouse_connect/driver/client.py
index 86109144..0079201d 100644
--- a/clickhouse_connect/driver/client.py
+++ b/clickhouse_connect/driver/client.py
@@ -69,6 +69,14 @@ def _prep_query(self, query: str, parameters: Optional[Dict[str, Any]] = None):
             query += f' LIMIT {self.limit}'
         return query
 
+    @abstractmethod
+    def client_setting(self, name, value):
+        """
+        Set a clickhouse setting for the client after initialization
+        :param name: Setting name
+        :param value: Setting value
+        """
+
     @abstractmethod
     def query(self,
               query: str,
@@ -245,6 +253,23 @@ def normalize_table(self, table: str, database: Optional[str]) -> Tuple[str, str
             full_name = f'{database}.{name}'
         return table, database, full_name
 
+    def min_version(self, version_str: str) -> bool:
+        """
+        Determine whether the connected server is at least the submitted version
+        :param version_str:  Version string consisting of up to 4 integers delimited by dots
+        :return:  1 if the version_str is greater than the server_version, 0 if equal, -1 if less than
+        """
+        server_parts = [int(x) for x in self.server_version.split('.')]
+        server_parts.extend([0] * (4 - len(server_parts)))
+        version_parts = [int(x) for x in version_str.split('.')]
+        version_parts.extend([0] * (4 - len(version_parts)))
+        for x, y in zip(server_parts, version_parts):
+            if x > y:
+                return True
+            if x < y:
+                return False
+        return True
+
     def table_columns(self, table: str, database: str) -> Tuple[ColumnDef]:
         """
         Return complete column definitions for a ClickHouse table
diff --git a/clickhouse_connect/driver/common.py b/clickhouse_connect/driver/common.py
index f1ae9c97..4b448796 100644
--- a/clickhouse_connect/driver/common.py
+++ b/clickhouse_connect/driver/common.py
@@ -1,5 +1,6 @@
 import array
 import sys
+import re
 
 from typing import Tuple, Sequence, MutableSequence
 
@@ -9,6 +10,8 @@
 low_card_version = 1
 
 array_map = {1: 'b', 2: 'h', 4: 'i', 8: 'q'}
+decimal_prec = {32: 9, 64: 18, 128: 38, 256: 79}
+identifier_re = re.compile('^[a-zA-Z_][0-9a-zA-Z_]*$')
 
 if int_size == 2:
     array_map[4] = 'l'
@@ -166,4 +169,7 @@ def decimal_size(prec: int):
     return 256
 
 
-decimal_prec = {32: 9, 64: 18, 128: 38, 256: 79}
+def unescape_identifier(x: str) -> str:
+    if x.startswith('`') and x.endswith('`'):
+        return x[1:-1]
+    return x
\ No newline at end of file
diff --git a/clickhouse_connect/driver/httpclient.py b/clickhouse_connect/driver/httpclient.py
index d7aaec41..fb6b081d 100644
--- a/clickhouse_connect/driver/httpclient.py
+++ b/clickhouse_connect/driver/httpclient.py
@@ -47,7 +47,7 @@ def __init__(self,
                  data_format: str = 'native',
                  query_limit: int = 5000,
                  connect_timeout: int = 10,
-                 send_receive_timeout=60,
+                 send_receive_timeout = 300,
                  client_name: str = 'clickhouse-connect',
                  send_progress: bool = True,
                  verify: bool = True,
@@ -143,6 +143,11 @@ def _format_query(self, query: str) -> str:
             query += f' FORMAT {self.read_format}'
         return query
 
+    def client_setting(self, name, value):
+        if isinstance(value, bool):
+            value = '1' if value else '0'
+        self.session.params[name] = str(value)
+
     def query(self, query: str,
               parameters: Optional[Dict[str, Any]] = None,
               settings: Dict[str, Any] = None,
@@ -192,7 +197,7 @@ def data_insert(self,
                   'database': self.database}
         params.update(self._validate_settings(settings, True))
         insert_block = self.transform.build_insert(data, column_types=column_types, column_names=column_names,
-                                         column_oriented=column_oriented)
+                                                   column_oriented=column_oriented)
         response = self._raw_request(insert_block, params, headers)
         logger.debug('Insert response code: %d, content: %s', response.status_code, response.content)
 
diff --git a/clickhouse_connect/driver/parser.py b/clickhouse_connect/driver/parser.py
index 0064302c..1815734c 100644
--- a/clickhouse_connect/driver/parser.py
+++ b/clickhouse_connect/driver/parser.py
@@ -2,6 +2,9 @@
 
 
 # pylint: disable=too-many-branches
+from clickhouse_connect.driver.common import unescape_identifier
+
+
 def parse_callable(expr) -> Tuple[str, Tuple[Union[str, int], ...], str]:
     """
     Parses a single level ClickHouse optionally 'callable' function/identifier.  The identifier is returned as the
@@ -142,7 +145,7 @@ def parse_columns(expr: str):
             if level == 0:
                 if char == ' ':
                     if label and not named:
-                        names.append(label)
+                        names.append(unescape_identifier(label))
                         label = ''
                         named = True
                     char = ''
diff --git a/clickhouse_connect/json_impl.py b/clickhouse_connect/json_impl.py
new file mode 100644
index 00000000..5728520a
--- /dev/null
+++ b/clickhouse_connect/json_impl.py
@@ -0,0 +1,17 @@
+import logging
+
+logger = logging.getLogger(__name__)
+
+try:
+    import orjson as json_impl
+    logger.info('Using orjson as the JSON implementation')
+except ImportError:
+    try:
+        import ujson as json_impl
+        logger.info('Using ujson as the JSON implementation')
+    except ImportError:
+        import json as json_impl
+        logger.info('Using default JSON implementation')
+
+
+
diff --git a/tests/integration_tests/conftest.py b/tests/integration_tests/conftest.py
index 58396f68..c46feeca 100644
--- a/tests/integration_tests/conftest.py
+++ b/tests/integration_tests/conftest.py
@@ -91,6 +91,8 @@ def test_client_fixture(test_config: TestConfig, test_db: str) -> Iterator[Clien
             if tries > 15:
                 raise Exception('Failed to connect to ClickHouse server after 30 seconds') from ex
             sleep(1)
+    if client.min_version('22.6.1'):
+        client.client_setting('allow_experimental_object_type', 1)
     if test_db != 'default':
         client.command(f'CREATE DATABASE IF NOT EXISTS {test_db}', use_database=False)
         client.database = test_db
diff --git a/tests/integration_tests/test_native.py b/tests/integration_tests/test_native.py
index 1c149801..2602a8dc 100644
--- a/tests/integration_tests/test_native.py
+++ b/tests/integration_tests/test_native.py
@@ -1,7 +1,36 @@
-def test_low_card(test_client):
+import pytest
+
+from clickhouse_connect.driver import Client
+
+
+def test_low_card(test_client: Client, test_table_engine: str):
     test_client.command('DROP TABLE IF EXISTS native_test')
     test_client.command('CREATE TABLE native_test (key LowCardinality(Int32), value_1 LowCardinality(String)) ' +
-                        'Engine MergeTree ORDER BY key')
+                        f'Engine {test_table_engine} ORDER BY key')
     test_client.insert('native_test', [[55, 'TV1'], [-578328, 'TV38882'], [57372, 'Kabc/defXX']])
     result = test_client.query("SELECT * FROM native_test WHERE value_1 LIKE '%abc/def%'")
     assert len(result.result_set) == 1
+
+
+def test_json_insert(test_client: Client, test_table_engine: str):
+    if not test_client.min_version('22.6.1'):
+        pytest.skip('JSON test skipped for old version {test_client.server_version}')
+    test_client.command('DROP TABLE IF EXISTS native_json_test')
+    test_client.command('CREATE TABLE native_json_test (key Int32, value JSON, e2 Int32)' +
+                        f'Engine {test_table_engine} ORDER BY key')
+    jv1 = {'key1': 337, 'value.2': 'vvvv', 'HKD@spéçiäl': 'Special K', 'blank': 'not_really_blank'}
+    jv3 = {'key3': 752, 'value.2': 'v2_rules', 'blank': None}
+    test_client.insert('native_json_test', [[5, jv1, -44], [20, None, 5200], [25, jv3, 7302]])
+
+    result = test_client.query('SELECT * FROM native_json_test ORDER BY key')
+    json1 = result.result_set[0][1]
+    assert json1['HKD@spéçiäl'] == 'Special K'
+    assert json1['key3'] == 0
+    json3 = result.result_set[2][1]
+    assert json3['value.2'] == 'v2_rules'
+    assert json3['key1'] == 0
+    assert json3['key3'] == 752
+
+
+def test_read_formats(test_client: Client, test_table_engine: str):
+    pass
diff --git a/tests/integration_tests/test_native_fuzz.py b/tests/integration_tests/test_native_fuzz.py
index e24fc7db..4f5770e4 100644
--- a/tests/integration_tests/test_native_fuzz.py
+++ b/tests/integration_tests/test_native_fuzz.py
@@ -12,8 +12,7 @@
 
 # pylint: disable=duplicate-code
 def test_query_fuzz(test_client: Client, test_table_engine: str):
-    server_major = test_client.server_version.split('.')[0]
-    if int(server_major) < 22:
+    if not test_client.min_version('22.1'):
         unsupported_types.add('Date32')
         unsupported_types.add('Bool')
         unsupported_types.add('UInt128')

From 37e4847e3ae3e0fb5b245a1a80d64c15c8d43205 Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Mon, 1 Aug 2022 05:05:01 -0600
Subject: [PATCH 13/25] Enhance JSON library selection and usage, use
 QueryContext for all queries

---
 CHANGELOG.md                              | 18 +++++-
 clickhouse_connect/datatypes/base.py      | 15 ++---
 clickhouse_connect/datatypes/container.py | 26 ++++----
 clickhouse_connect/datatypes/format.py    |  9 +--
 clickhouse_connect/datatypes/string.py    |  1 +
 clickhouse_connect/driver/client.py       | 63 ++++++++++++++++---
 clickhouse_connect/driver/common.py       |  2 +-
 clickhouse_connect/driver/httpclient.py   | 18 ++----
 clickhouse_connect/driver/native.py       |  4 +-
 clickhouse_connect/driver/query.py        | 75 ++++++++++++++++++++++-
 clickhouse_connect/driver/transform.py    | 32 +++-------
 clickhouse_connect/json_impl.py           | 45 +++++++++++---
 setup.py                                  |  3 +-
 tests/integration_tests/test_native.py    | 29 ++++++++-
 14 files changed, 247 insertions(+), 93 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 60dc14f8..96a786b1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,12 +1,24 @@
 ## ClickHouse Connect ChangeLog
 
+### Release 0.1.7, 2022-07-28
+
+#### Improvements
+
+* Support (experimental) JSON/Object datatype.  ClickHouse Connect will take advantage of the fast orjson library if available.
+* Standardize read format handling and allow setting a return data format per column or per query.
+
+#### Bug Fixes
+* Named Tuples were not supported and would result in throwing an exception.  This has been fixed.
+* The client query_arrow function would return incomplete results if the query result exceeded the ClickHouse max_block_size.  This has been fixed.  As part of the fix query_arrow method returns a PyArrow Table object.  While this is a breaking change in the API it should be easy to work around.
+
+
 ### Release 0.1.6, 2022-07-06
 
 #### Improvements
 
-* Support Nested data types
+* Support Nested data types.
 
 #### Bug Fixes
 
-* Fix issue with native reads of Nullable(LowCardinality) numeric and date types
-* Empty inserts will now just log a debug message instead of throwing an IndexError
\ No newline at end of file
+* Fix issue with native reads of Nullable(LowCardinality) numeric and date types.
+* Empty inserts will now just log a debug message instead of throwing an IndexError.
\ No newline at end of file
diff --git a/clickhouse_connect/datatypes/base.py b/clickhouse_connect/datatypes/base.py
index ef925da9..3bccf150 100644
--- a/clickhouse_connect/datatypes/base.py
+++ b/clickhouse_connect/datatypes/base.py
@@ -53,10 +53,11 @@ def build(cls: Type['ClickHouseType'], type_def: TypeDef):
 
     @classmethod
     def _active_format(cls, fmt_map: Dict[Type['ClickHouseType'], str]):
-        overrides = getattr(threading.local, 'ch_column_overrides', None)
+        t_local = threading.local()
+        overrides = getattr(t_local, 'ch_column_overrides', None)
         if overrides and cls in overrides:
             return overrides[cls]
-        overrides = getattr(threading.local, 'ch_query_overrides)', None)
+        overrides = getattr(t_local, 'ch_query_overrides)', None)
         if overrides and cls in overrides:
             return overrides[cls]
         return fmt_map.get(cls, 'native')
@@ -101,19 +102,19 @@ def name(self):
 
     @property
     def encoding(self):
-        override = getattr(threading.local, 'ch_column_encoding', None)
+        override = getattr(threading.local(), 'ch_column_encoding', None)
         if override:
             return override
-        override = getattr(threading.local, 'ch_query_encoding', None)
+        override = getattr(threading.local(), 'ch_query_encoding', None)
         if override:
             return override
         return self._encoding
 
     def write_native_prefix(self, dest: MutableSequence):
         """
-        This is something of a hack, as the only "prefix" currently used is for the LowCardinality version.  Because of the
-        way the ClickHouse C++ code is written, this must be done before any data is written even if the LowCardinality column
-        is within a container
+        Prefix is primarily used is for the LowCardinality version (but see the JSON data type).  Because of the
+        way the ClickHouse C++ code is written, this must be done before any data is written even if the
+        LowCardinality column is within a container.  The only recognized low cardinality version is 1
         :param dest: The native protocol binary write buffer
         """
         if self.low_card:
diff --git a/clickhouse_connect/datatypes/container.py b/clickhouse_connect/datatypes/container.py
index d3e01ce1..930310ef 100644
--- a/clickhouse_connect/datatypes/container.py
+++ b/clickhouse_connect/datatypes/container.py
@@ -1,11 +1,10 @@
 import array
 from typing import Dict, Sequence, MutableSequence, Any
 
-from clickhouse_connect.datatypes.base import UnsupportedType, ClickHouseType, TypeDef, EMPTY_TYPE_DEF
-from clickhouse_connect.datatypes.string import String
-from clickhouse_connect.driver.common import read_leb128, to_leb128, array_column, must_swap, write_uint64
+from clickhouse_connect import json_impl
+from clickhouse_connect.datatypes.base import ClickHouseType, TypeDef
+from clickhouse_connect.driver.common import read_leb128, to_leb128, array_column, must_swap
 from clickhouse_connect.datatypes.registry import get_from_name
-from clickhouse_connect.json_impl import json_impl
 
 
 class Array(ClickHouseType):
@@ -116,17 +115,17 @@ def read_native_prefix(self, source: Sequence, loc: int):
             loc = e_type.read_native_prefix(source, loc)
         return loc
 
-    def read_native_data(self, source: Sequence, loc: int, num_rows: int, use_none = True):
+    def read_native_data(self, source: Sequence, loc: int, num_rows: int, use_none=True):
         columns = []
         e_names = self.element_names
         for e_type in self.element_types:
             column, loc = e_type.read_native_data(source, loc, num_rows, use_none)
             columns.append(column)
-        if e_names and self.read_format != 'tuple':
+        if e_names and self.read_format() != 'tuple':
             dicts = [{} for _ in range(num_rows)]
             for ix, x in enumerate(dicts):
-                for n, key in enumerate(e_names):
-                    x[key] = columns[n][ix]
+                for y, key in enumerate(e_names):
+                    x[key] = columns[y][ix]
             return dicts, loc
         return tuple(zip(*columns)), loc
 
@@ -176,7 +175,7 @@ def read_native_prefix(self, source: Sequence, loc: int):
         return loc
 
     # pylint: disable=too-many-locals
-    def read_native_data(self, source: Sequence, loc: int, num_rows: int, use_none = True):
+    def read_native_data(self, source: Sequence, loc: int, num_rows: int, use_none=True):
         offsets, loc = array_column('Q', source, loc, num_rows)
         total_rows = offsets[-1]
         keys, loc = self.key_type.read_native_data(source, loc, total_rows, use_none)
@@ -252,19 +251,20 @@ class JSON(ClickHouseType):
     python_type = dict
 
     def _to_row_binary(self, value: Any, dest: MutableSequence):
-        value = bytes(json_impl.dumps(value))
+        value = json_impl.any_to_json(value)
         dest += to_leb128(len(value)) + value
 
     def _from_row_binary(self, source: Sequence, loc: int):
-        length, loc = read_leb128(source, loc)
-        return json_impl.loads(str(source[loc:loc + length])), loc + length
+        # ClickHouse will never return JSON/Object types, just tuples
+        return None, 0
 
     def write_native_prefix(self, dest: MutableSequence):
         dest.append(0x01)
 
+    # pylint: disable=duplicate-code
     def write_native_data(self, column: Sequence, dest: MutableSequence):
         app = dest.append
-        to_json = json_impl.dumps
+        to_json = json_impl.any_to_json
         for x in column:
             v = to_json(x)
             sz = len(v)
diff --git a/clickhouse_connect/datatypes/format.py b/clickhouse_connect/datatypes/format.py
index 9cbab1f9..d8708566 100644
--- a/clickhouse_connect/datatypes/format.py
+++ b/clickhouse_connect/datatypes/format.py
@@ -69,13 +69,8 @@ def _convert_arguments(*args, **kwargs) -> Dict[str, str]:
 
 
 def _matching_types(pattern: str, fmt: str = None) -> Sequence[Type[ClickHouseType]]:
-    if '*' in pattern:
-        re_pattern = re.compile(pattern.replace('*', '.*'), re.IGNORECASE)
-        matches = [ch_type for type_name, ch_type in type_map.items() if re_pattern.match(type_name)]
-    elif pattern in type_map:
-        matches = [type_map[pattern]]
-    else:
-        matches = []
+    re_pattern = re.compile(pattern.replace('*', '.*'), re.IGNORECASE)
+    matches = [ch_type for type_name, ch_type in type_map.items() if re_pattern.match(type_name)]
     if not matches:
         ProgrammingError(f'Unrecognized ClickHouse type {pattern} when setting formats')
     if fmt:
diff --git a/clickhouse_connect/datatypes/string.py b/clickhouse_connect/datatypes/string.py
index 45c0ef78..911a3050 100644
--- a/clickhouse_connect/datatypes/string.py
+++ b/clickhouse_connect/datatypes/string.py
@@ -36,6 +36,7 @@ def _read_native_python(source, loc, num_rows, encoding: str):
             loc += length
         return column, loc
 
+    # pylint: disable=duplicate-code
     def _write_native_binary(self, column: Union[Sequence, MutableSequence], dest: MutableSequence):
         encoding = self.encoding
         app = dest.append
diff --git a/clickhouse_connect/driver/client.py b/clickhouse_connect/driver/client.py
index 0079201d..5e538c17 100644
--- a/clickhouse_connect/driver/client.py
+++ b/clickhouse_connect/driver/client.py
@@ -9,7 +9,7 @@
 from clickhouse_connect.driver.exceptions import ProgrammingError, InternalError
 from clickhouse_connect.driver.models import ColumnDef, SettingDef
 from clickhouse_connect.driver.query import QueryResult, np_result, to_pandas_df, from_pandas_df, format_query_value, \
-    to_arrow
+    to_arrow, QueryContext
 
 logger = logging.getLogger(__name__)
 limit_re = re.compile(r'\s+LIMIT[$|\s]', re.IGNORECASE)
@@ -69,6 +69,10 @@ def _prep_query(self, query: str, parameters: Optional[Dict[str, Any]] = None):
             query += f' LIMIT {self.limit}'
         return query
 
+    @abstractmethod
+    def _query_with_context(self, context: QueryContext):
+        pass
+
     @abstractmethod
     def client_setting(self, name, value):
         """
@@ -77,20 +81,35 @@ def client_setting(self, name, value):
         :param value: Setting value
         """
 
-    @abstractmethod
     def query(self,
-              query: str,
+              query: str = None,
               parameters: Optional[Dict[str, Any]] = None,
               settings: Optional[Dict[str, Any]] = None,
-              use_none: bool = True) -> QueryResult:
+              query_formats: Optional[Dict[str, str]] = None,
+              column_formats: Optional[Dict[str, str]] = None,
+              use_none: bool = True,
+              context: QueryContext = None) -> QueryResult:
         """
         Main query method for SELECT, DESCRIBE and other commands that result a result matrix
         :param query: Query statement/format string
         :param parameters: Optional dictionary used to format the query
         :param settings: Optional dictionary of ClickHouse settings (key/string values)
+        :param query_formats: See QueryContext __init__ docstring
+        :param column_formats: See QueryContext __init__ docstring
         :param use_none: Use None for ClickHouse nulls instead of empty values
+        :param context An alternative QueryContext parameter object that contains some or all of the method arguments
         :return: QueryResult -- data and metadata from response
         """
+        if context:
+            query_context = context.updated_copy(query,
+                                                 parameters,
+                                                 settings,
+                                                 query_formats,
+                                                 column_formats,
+                                                 False)
+        else:
+            query_context = QueryContext(query, parameters, settings, query_formats, column_formats, use_none)
+        return self._query_with_context(query_context)
 
     @abstractmethod
     def raw_query(self,
@@ -108,30 +127,54 @@ def raw_query(self,
         """
 
     def query_np(self,
-                 query: str,
+                 query: str = None,
                  parameters: Optional[Dict[str, Any]] = None,
-                 settings: Optional[Dict[str, Any]] = None):
+                 settings: Optional[Dict[str, Any]] = None,
+                 query_formats: Optional[Dict[str, str]] = None,
+                 column_formats: Optional[Dict[str, str]] = None,
+                 context: QueryContext = None):
         """
         Query method that results the results as a numpy array
         :param query: Query statement/format string
         :param parameters: Optional dictionary used to format the query
         :param settings: Optional dictionary of ClickHouse settings (key/string values)
+        :param query_formats: See QueryContext __init__ docstring
+        :param column_formats: See QueryContext __init__ docstring.
+        :param context An alternative QueryContext parameter object that contains some or all of the method arguments
         :return: Numpy array representing the result set
         """
-        return np_result(self.query(query, parameters, settings, use_none=False))
+        return np_result(self.query(query,
+                                    parameters,
+                                    settings,
+                                    query_formats,
+                                    column_formats,
+                                    False,
+                                    context))
 
     def query_df(self,
-                 query: str,
+                 query: str = None,
                  parameters: Optional[Dict[str, Any]] = None,
-                 settings: Optional[Dict[str, Any]] = None):
+                 settings: Optional[Dict[str, Any]] = None,
+                 query_formats: Optional[Dict[str, str]] = None,
+                 column_formats: Optional[Dict[str, str]] = None,
+                 context: QueryContext = None):
         """
         Query method that results the results as a pandas dataframe
         :param query: Query statement/format string
         :param parameters: Optional dictionary used to format the query
         :param settings: Optional dictionary of ClickHouse settings (key/string values)
+        :param query_formats: See QueryContext __init__ docstring
+        :param column_formats: See QueryContext __init__ docstring
+        :param context An alternative QueryContext parameter object that contains some or all of the method arguments
         :return: Numpy array representing the result set
         """
-        return to_pandas_df(self.query(query, parameters, settings, use_none=False))
+        return to_pandas_df(self.query(query,
+                                       parameters,
+                                       settings,
+                                       query_formats,
+                                       column_formats,
+                                       False,
+                                       context))
 
     def query_arrow(self,
                     query: str,
diff --git a/clickhouse_connect/driver/common.py b/clickhouse_connect/driver/common.py
index 4b448796..a0480ed9 100644
--- a/clickhouse_connect/driver/common.py
+++ b/clickhouse_connect/driver/common.py
@@ -172,4 +172,4 @@ def decimal_size(prec: int):
 def unescape_identifier(x: str) -> str:
     if x.startswith('`') and x.endswith('`'):
         return x[1:-1]
-    return x
\ No newline at end of file
+    return x
diff --git a/clickhouse_connect/driver/httpclient.py b/clickhouse_connect/driver/httpclient.py
index fb6b081d..d4a4bf7a 100644
--- a/clickhouse_connect/driver/httpclient.py
+++ b/clickhouse_connect/driver/httpclient.py
@@ -14,7 +14,7 @@
 from clickhouse_connect.driver.exceptions import DatabaseError, OperationalError, ProgrammingError
 from clickhouse_connect.driver.httpadapter import KeepAliveAdapter
 from clickhouse_connect.driver.native import NativeTransform
-from clickhouse_connect.driver.query import QueryResult, DataResult, format_query_value
+from clickhouse_connect.driver.query import QueryResult, DataResult, format_query_value, QueryContext
 from clickhouse_connect.driver.rowbinary import RowBinaryTransform
 
 logger = logging.getLogger(__name__)
@@ -148,18 +148,12 @@ def client_setting(self, name, value):
             value = '1' if value else '0'
         self.session.params[name] = str(value)
 
-    def query(self, query: str,
-              parameters: Optional[Dict[str, Any]] = None,
-              settings: Dict[str, Any] = None,
-              use_none: bool = True) -> QueryResult:
-        """
-        See BaseClient doc_string for this method
-        """
-        final_query = self._prep_query(query, parameters)
+    def _query_with_context(self, context: QueryContext) -> QueryResult:
+        final_query = self._prep_query(context.query, context.parameters)
         headers = {'Content-Type': 'text/plain; charset=utf-8'}
         params = {'database': self.database}
-        params.update(self._validate_settings(settings, True))
-        if columns_only_re.search(query):
+        params.update(self._validate_settings(context.settings, True))
+        if columns_only_re.search(final_query):
             response = self._raw_request(final_query + ' FORMAT JSON', params, headers, retries=2)
             json_result = json.loads(response.content)
             # ClickHouse will respond with a JSON object of meta, data, and some other objects
@@ -172,7 +166,7 @@ def query(self, query: str,
             data_result = DataResult([], tuple(names), tuple(types))
         else:
             response = self._raw_request(self._format_query(final_query), params, headers, retries=2)
-            data_result = self.transform.parse_response(response.content, use_none=use_none)
+            data_result = self.transform.parse_response(response.content, context)
         summary = {}
         if 'X-ClickHouse-Summary' in response.headers:
             try:
diff --git a/clickhouse_connect/driver/native.py b/clickhouse_connect/driver/native.py
index 3c738350..7356c2b9 100644
--- a/clickhouse_connect/driver/native.py
+++ b/clickhouse_connect/driver/native.py
@@ -18,6 +18,7 @@ def _transform_response(self, source: Sequence, context: QueryContext) -> DataRe
         result = []
         total_size = len(source)
         block = 0
+        use_none = context.use_none
         while loc < total_size:
             result_block = []
             num_cols, loc = read_leb128(source, loc)
@@ -32,7 +33,8 @@ def _transform_response(self, source: Sequence, context: QueryContext) -> DataRe
                     col_types.append(col_type)
                 else:
                     col_type = col_types[col_num]
-                column, loc = col_type.read_native_column(source, loc, num_rows, use_none=context.use_none)
+                context.start_column(name, col_type)
+                column, loc = col_type.read_native_column(source, loc, num_rows, use_none=use_none)
                 result_block.append(column)
             block += 1
             result.extend(list(zip(*result_block)))
diff --git a/clickhouse_connect/driver/query.py b/clickhouse_connect/driver/query.py
index f37bf974..d41a6cc2 100644
--- a/clickhouse_connect/driver/query.py
+++ b/clickhouse_connect/driver/query.py
@@ -1,12 +1,15 @@
 import ipaddress
+import threading
 import uuid
 
 from enum import Enum
-from typing import NamedTuple, Any, Tuple, Dict, Sequence
+from typing import NamedTuple, Any, Tuple, Dict, Sequence, Optional, Union
 from datetime import date, datetime
 from pytz import UTC
 
 from clickhouse_connect.datatypes.base import ClickHouseType
+from clickhouse_connect.datatypes.container import Array
+from clickhouse_connect.datatypes.format import format_map
 from clickhouse_connect.driver.options import HAS_NUMPY, HAS_PANDAS, check_pandas, check_numpy, HAS_ARROW, check_arrow
 
 if HAS_PANDAS:
@@ -19,10 +22,78 @@
     import pyarrow
 
 
+class QueryContext:
+    """
+    Argument/parameter object for queries
+    """
+    def __init__(self,
+                 query: str = None,
+                 parameters: Optional[Dict[str, Any]] = None,
+                 settings: Optional[Dict[str, Any]] = None,
+                 query_formats: Optional[Dict[str, str]] = None,
+                 column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
+                 use_none: bool = True):
+        self.query = query
+        self.parameters = parameters or {}
+        self.settings = settings or {}
+        self.query_formats = format_map(query_formats)
+        self.column_formats = column_formats or {}
+        self.use_none = use_none
+
+    def updated_copy(self,
+                     query: Optional[str] = None,
+                     parameters: Optional[Dict[str, Any]] = None,
+                     settings: Optional[Dict[str, Any]] = None,
+                     query_formats: Optional[Dict[str, str]] = None,
+                     column_formats: Optional[Dict[str, str]] = None,
+                     use_none: Optional[bool] = None) -> 'QueryContext':
+        copy = QueryContext()
+        copy.query = query or self.query
+        copy.parameters = self.parameters.update(parameters or {})
+        copy.settings = self.settings.update(settings or {})
+        copy.query_formats = self.query_formats.update(query_formats or {})
+        copy.column_formats = self.column_formats.update(column_formats or {})
+        copy.use_none = use_none if use_none is not None else self.use_none
+        return copy
+
+    def __enter__(self):
+        if self.query_formats:
+            threading.local().ch_query_overrides = self.query_formats
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        t_local = threading.local()
+        if self.query_formats:
+            del t_local.ch_query_overrides
+        try:
+            del t_local.ch_column_overrides
+        except AttributeError:
+            pass
+
+    def start_column(self, name: str, ch_type: ClickHouseType):
+        t_local = threading.local()
+        if name in self.column_formats:
+            fmts = self.column_formats[name]
+            if isinstance(fmts, str):
+                if isinstance(ch_type, Array):
+                    fmt_map = {ch_type.element_type: fmts}
+                else:
+                    fmt_map = {ch_type: fmts}
+            else:
+                fmt_map = format_map(fmts)
+            t_local.ch_column_overrides = fmt_map
+        else:
+            try:
+                del t_local.ch_column_overrides
+            except AttributeError:
+                pass
+
+
 class QueryResult():
     """
     Wrapper class for query return values and metadata
     """
+
     def __init__(self, result_set: Sequence[Sequence[Any]], column_names: Tuple[str, ...],
                  column_types: Tuple[ClickHouseType, ...], query_id: str = None, summary: Dict[str, Any] = None):
         self.result_set = result_set
@@ -115,5 +186,5 @@ def from_pandas_df(df: 'pa.DataFrame'):
 
 def to_arrow(content: bytes):
     check_arrow()
-    reader = pyarrow.RecordBatchFileReader(content)
+    reader = pyarrow.ipc.RecordBatchFileReader(content)
     return reader.read_all()
diff --git a/clickhouse_connect/driver/transform.py b/clickhouse_connect/driver/transform.py
index 16f2250a..9409329c 100644
--- a/clickhouse_connect/driver/transform.py
+++ b/clickhouse_connect/driver/transform.py
@@ -1,42 +1,24 @@
-import threading
 from abc import ABC, abstractmethod
-from typing import Sequence, Dict, Union, Any, Optional
+from typing import Sequence, Dict, Union, Any
 
 from clickhouse_connect.datatypes.base import ClickHouseType
-from clickhouse_connect.datatypes.format import format_map
-from clickhouse_connect.driver.query import DataResult
+from clickhouse_connect.driver.query import DataResult, QueryContext
 
 
-class QueryContext:
-    def __init__(self, use_none: bool, type_formats: Optional[Dict[str, str]],
-                 _column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]]):
-        self.query_overrides = format_map(type_formats)
-        self.use_none = use_none
-
-    def __enter__(self):
-        if self.query_overrides:
-            threading.local.ch_query_overrides = self.query_overrides
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        if self.query_overrides:
-            del threading.local.ch_query_overrides
+_EMPTY_CONTEXT = QueryContext()
 
 
 class DataTransform(ABC):
 
-    def parse_response(self, source: Sequence, type_formats: Dict[str, str] = None, use_none: bool = True,
-                       column_formats: Dict[str, Union[str, Dict[str, str]]] = None) -> DataResult:
+    def parse_response(self, source: Sequence, context: QueryContext = _EMPTY_CONTEXT) -> DataResult:
         """
         Decodes the ClickHouse byte buffer response into rows of native Python data
         :param source: A byte buffer or similar source
-        :param use_none: Use None python value for ClickHouse nulls (otherwise use type "zero value")
-        :param type_formats:  Dictionary of ClickHouse type names/patterns and response formats
-        :param column_formats: Use None values for ClickHouse NULLs (otherwise use zero/empty values)
+        :param context: The QueryContext to use in processing the response
         :return: DataResult -- data matrix, column names, column types
         """
-        with QueryContext(use_none, type_formats, column_formats) as query_context:
-            return self._transform_response(source, query_context)
+        with context:
+            return self._transform_response(source, context)
 
     @abstractmethod
     def build_insert(self, data: Sequence[Sequence[Any]], *, column_names: Sequence[str],
diff --git a/clickhouse_connect/json_impl.py b/clickhouse_connect/json_impl.py
index 5728520a..037d21e9 100644
--- a/clickhouse_connect/json_impl.py
+++ b/clickhouse_connect/json_impl.py
@@ -1,17 +1,44 @@
 import logging
+import json as py_json
+from collections import OrderedDict
+from typing import Any
 
-logger = logging.getLogger(__name__)
+try:
+    import orjson
+    any_to_json = orjson.dumps
+except ImportError:
+    orjson = None
 
 try:
-    import orjson as json_impl
-    logger.info('Using orjson as the JSON implementation')
+    import ujson
 except ImportError:
-    try:
-        import ujson as json_impl
-        logger.info('Using ujson as the JSON implementation')
-    except ImportError:
-        import json as json_impl
-        logger.info('Using default JSON implementation')
+    ujson = None
+
+
+def _pyjson_to_json(obj: Any) -> bytes:
+    return py_json.dumps(obj).encode()
+
+
+logger = logging.getLogger(__name__)
+_to_json = OrderedDict()
+_to_json['orjson'] = orjson.dumps if orjson else None
+_to_json['ujson'] = ujson.dumps if ujson else None
+_to_json['python'] = _pyjson_to_json
+
+any_to_json = _pyjson_to_json
+
 
+def set_json_library(impl: str = None):
+    global any_to_json
+    if impl:
+        func = _to_json.get(impl)
+        if not func:
+            raise NotImplementedError(f'JSON library {impl} is not supported')
+    for library, func in _to_json.items():
+        if func:
+            logger.info('Using %s library for writing JSON byte strings', library)
+            any_to_json = func
+            break
 
 
+set_json_library()
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 12084cf8..46b7e956 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,8 @@ def run_setup(try_c: bool = True):
             'superset': ['apache_superset>=1.4.1', 'sqlalchemy>1.3.21, <1.4'],
             'numpy': ['numpy'],
             'pandas': ['pandas'],
-            'arrow': ['pyarrow']
+            'arrow': ['pyarrow'],
+            'orjson': ['orjson']
         },
         entry_points={
             'sqlalchemy.dialects': ['clickhousedb.connect=clickhouse_connect.cc_sqlalchemy.dialect:ClickHouseDialect',
diff --git a/tests/integration_tests/test_native.py b/tests/integration_tests/test_native.py
index 2602a8dc..fc89a8d8 100644
--- a/tests/integration_tests/test_native.py
+++ b/tests/integration_tests/test_native.py
@@ -1,5 +1,9 @@
+import uuid
+from ipaddress import IPv4Address
+
 import pytest
 
+from clickhouse_connect.datatypes.format import set_default_formats, clear_default_format
 from clickhouse_connect.driver import Client
 
 
@@ -12,7 +16,7 @@ def test_low_card(test_client: Client, test_table_engine: str):
     assert len(result.result_set) == 1
 
 
-def test_json_insert(test_client: Client, test_table_engine: str):
+def test_json(test_client: Client, test_table_engine: str):
     if not test_client.min_version('22.6.1'):
         pytest.skip('JSON test skipped for old version {test_client.server_version}')
     test_client.command('DROP TABLE IF EXISTS native_json_test')
@@ -33,4 +37,25 @@ def test_json_insert(test_client: Client, test_table_engine: str):
 
 
 def test_read_formats(test_client: Client, test_table_engine: str):
-    pass
+    test_client.command('DROP TABLE IF EXISTS read_format_test')
+    test_client.command('CREATE TABLE read_format_test (key Int32, uuid UUID, fs FixedString(10), ipv4 IPv4)' +
+                        f'Engine {test_table_engine} ORDER BY key')
+    uuid1 = uuid.UUID('23E45688e89B-12D3-3273-426614174000')
+    uuid2 = uuid.UUID('77AA3278-3728-12d3-5372-000377723832')
+    row1 = (1, uuid1, '530055777k', '10.251.30.50')
+    row2 = (2, uuid2, 'short str', '10.44.75.20')
+    test_client.insert('read_format_test', [row1, row2])
+    result = test_client.query('SELECT * FROM read_format_test').result_set
+    assert result[0][1] == uuid1
+    assert result[1][3] == IPv4Address('10.44.75.20')
+    assert result[0][2] == b'\x35\x33\x30\x30\x35\x35\x37\x37\x37\x6b'
+    set_default_formats('uuid', 'string', 'ip*', 'string', 'FixedString', 'string')
+    result = test_client.query('SELECT * FROM read_format_test').result_set
+    assert result[0][1] == '23e45688-e89b-12d3-3273-426614174000'
+    assert result[1][3] == '10.44.75.20'
+    assert result[0][2] == '530055777k'
+    clear_default_format('ipv4')
+    result = test_client.query('SELECT * FROM read_format_test').result_set
+    assert result[0][1] == '23e45688-e89b-12d3-3273-426614174000'
+    assert result[1][3] == IPv4Address('10.44.75.20')
+    assert result[0][2] == '530055777k'

From cf7b2643fb91c909372de503ee24f556da46d254 Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Mon, 1 Aug 2022 05:10:23 -0600
Subject: [PATCH 14/25] Fix lint

---
 clickhouse_connect/driver/transform.py | 2 +-
 clickhouse_connect/json_impl.py        | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/clickhouse_connect/driver/transform.py b/clickhouse_connect/driver/transform.py
index 9409329c..c52686ff 100644
--- a/clickhouse_connect/driver/transform.py
+++ b/clickhouse_connect/driver/transform.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Sequence, Dict, Union, Any
+from typing import Sequence, Any
 
 from clickhouse_connect.datatypes.base import ClickHouseType
 from clickhouse_connect.driver.query import DataResult, QueryContext
diff --git a/clickhouse_connect/json_impl.py b/clickhouse_connect/json_impl.py
index 037d21e9..c0c2e0df 100644
--- a/clickhouse_connect/json_impl.py
+++ b/clickhouse_connect/json_impl.py
@@ -29,7 +29,7 @@ def _pyjson_to_json(obj: Any) -> bytes:
 
 
 def set_json_library(impl: str = None):
-    global any_to_json
+    global any_to_json # pylint: disable=global-statement
     if impl:
         func = _to_json.get(impl)
         if not func:
@@ -41,4 +41,4 @@ def set_json_library(impl: str = None):
             break
 
 
-set_json_library()
\ No newline at end of file
+set_json_library()

From 6081c15effd1d6a027abffffc82a7ab28dbbc4de Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Mon, 1 Aug 2022 10:19:58 -0600
Subject: [PATCH 15/25] Formatting fixes plus more format tests

---
 clickhouse_connect/datatypes/base.py      | 11 ++++----
 clickhouse_connect/datatypes/container.py |  3 +++
 clickhouse_connect/driver/query.py        | 21 +++++++--------
 clickhouse_connect/driver/threads.py      |  5 ++++
 tests/integration_tests/test_native.py    | 33 ++++++++++++++++++-----
 5 files changed, 50 insertions(+), 23 deletions(-)
 create mode 100644 clickhouse_connect/driver/threads.py

diff --git a/clickhouse_connect/datatypes/base.py b/clickhouse_connect/datatypes/base.py
index 3bccf150..ca0427e9 100644
--- a/clickhouse_connect/datatypes/base.py
+++ b/clickhouse_connect/datatypes/base.py
@@ -1,5 +1,4 @@
 import array
-import threading
 import logging
 
 from abc import abstractmethod, ABC
@@ -9,6 +8,7 @@
 from clickhouse_connect.driver.common import array_column, array_type, int_size, read_uint64, write_array, \
     write_uint64, low_card_version
 from clickhouse_connect.driver.exceptions import NotSupportedError
+from clickhouse_connect.driver.threads import query_settings
 
 logger = logging.getLogger(__name__)
 ch_read_formats = {}
@@ -53,11 +53,10 @@ def build(cls: Type['ClickHouseType'], type_def: TypeDef):
 
     @classmethod
     def _active_format(cls, fmt_map: Dict[Type['ClickHouseType'], str]):
-        t_local = threading.local()
-        overrides = getattr(t_local, 'ch_column_overrides', None)
+        overrides = getattr(query_settings, 'column_overrides', None)
         if overrides and cls in overrides:
             return overrides[cls]
-        overrides = getattr(t_local, 'ch_query_overrides)', None)
+        overrides = getattr(query_settings, 'query_overrides', None)
         if overrides and cls in overrides:
             return overrides[cls]
         return fmt_map.get(cls, 'native')
@@ -102,10 +101,10 @@ def name(self):
 
     @property
     def encoding(self):
-        override = getattr(threading.local(), 'ch_column_encoding', None)
+        override = getattr(query_settings, 'column_encoding', None)
         if override:
             return override
-        override = getattr(threading.local(), 'ch_query_encoding', None)
+        override = getattr(query_settings, 'query_encoding', None)
         if override:
             return override
         return self._encoding
diff --git a/clickhouse_connect/datatypes/container.py b/clickhouse_connect/datatypes/container.py
index 930310ef..0148543f 100644
--- a/clickhouse_connect/datatypes/container.py
+++ b/clickhouse_connect/datatypes/container.py
@@ -126,6 +126,9 @@ def read_native_data(self, source: Sequence, loc: int, num_rows: int, use_none=T
             for ix, x in enumerate(dicts):
                 for y, key in enumerate(e_names):
                     x[key] = columns[y][ix]
+            if self.read_format() == 'json':
+                to_json = json_impl.any_to_json
+                return [to_json(x) for x in dicts], loc
             return dicts, loc
         return tuple(zip(*columns)), loc
 
diff --git a/clickhouse_connect/driver/query.py b/clickhouse_connect/driver/query.py
index d41a6cc2..9287d774 100644
--- a/clickhouse_connect/driver/query.py
+++ b/clickhouse_connect/driver/query.py
@@ -1,5 +1,4 @@
 import ipaddress
-import threading
 import uuid
 
 from enum import Enum
@@ -11,6 +10,7 @@
 from clickhouse_connect.datatypes.container import Array
 from clickhouse_connect.datatypes.format import format_map
 from clickhouse_connect.driver.options import HAS_NUMPY, HAS_PANDAS, check_pandas, check_numpy, HAS_ARROW, check_arrow
+from clickhouse_connect.driver.threads import query_settings
 
 if HAS_PANDAS:
     import pandas as pa
@@ -39,6 +39,7 @@ def __init__(self,
         self.query_formats = format_map(query_formats)
         self.column_formats = column_formats or {}
         self.use_none = use_none
+        self.thread_local = None
 
     def updated_copy(self,
                      query: Optional[str] = None,
@@ -58,38 +59,36 @@ def updated_copy(self,
 
     def __enter__(self):
         if self.query_formats:
-            threading.local().ch_query_overrides = self.query_formats
+            query_settings.query_overrides = self.query_formats
         return self
 
     def __exit__(self, exc_type, exc_val, exc_tb):
-        t_local = threading.local()
         if self.query_formats:
-            del t_local.ch_query_overrides
+            del query_settings.query_overrides
         try:
-            del t_local.ch_column_overrides
+            del query_settings.column_overrides
         except AttributeError:
             pass
 
     def start_column(self, name: str, ch_type: ClickHouseType):
-        t_local = threading.local()
         if name in self.column_formats:
             fmts = self.column_formats[name]
             if isinstance(fmts, str):
                 if isinstance(ch_type, Array):
-                    fmt_map = {ch_type.element_type: fmts}
+                    fmt_map = {ch_type.element_type.__class__: fmts}
                 else:
-                    fmt_map = {ch_type: fmts}
+                    fmt_map = {ch_type.__class__: fmts}
             else:
                 fmt_map = format_map(fmts)
-            t_local.ch_column_overrides = fmt_map
+            query_settings.column_overrides = fmt_map
         else:
             try:
-                del t_local.ch_column_overrides
+                del query_settings.column_overrides
             except AttributeError:
                 pass
 
 
-class QueryResult():
+class QueryResult:
     """
     Wrapper class for query return values and metadata
     """
diff --git a/clickhouse_connect/driver/threads.py b/clickhouse_connect/driver/threads.py
new file mode 100644
index 00000000..9166ac1d
--- /dev/null
+++ b/clickhouse_connect/driver/threads.py
@@ -0,0 +1,5 @@
+import threading
+
+query_settings = threading.local()
+
+
diff --git a/tests/integration_tests/test_native.py b/tests/integration_tests/test_native.py
index fc89a8d8..47c42333 100644
--- a/tests/integration_tests/test_native.py
+++ b/tests/integration_tests/test_native.py
@@ -1,5 +1,5 @@
 import uuid
-from ipaddress import IPv4Address
+from ipaddress import IPv4Address, IPv6Address
 
 import pytest
 
@@ -38,24 +38,45 @@ def test_json(test_client: Client, test_table_engine: str):
 
 def test_read_formats(test_client: Client, test_table_engine: str):
     test_client.command('DROP TABLE IF EXISTS read_format_test')
-    test_client.command('CREATE TABLE read_format_test (key Int32, uuid UUID, fs FixedString(10), ipv4 IPv4)' +
-                        f'Engine {test_table_engine} ORDER BY key')
+    test_client.command('CREATE TABLE read_format_test (key Int32, uuid UUID, fs FixedString(10), ipv4 IPv4,' +
+                        f'str_array Array(IPv6)) Engine {test_table_engine} ORDER BY key')
     uuid1 = uuid.UUID('23E45688e89B-12D3-3273-426614174000')
     uuid2 = uuid.UUID('77AA3278-3728-12d3-5372-000377723832')
-    row1 = (1, uuid1, '530055777k', '10.251.30.50')
-    row2 = (2, uuid2, 'short str', '10.44.75.20')
+    row1 = (1, uuid1, '530055777k', '10.251.30.50', ['2600::', '2001:4860:4860::8844'])
+    row2 = (2, uuid2, 'short str', '10.44.75.20', ['74:382::3332', '8700:5200::5782:3992'])
     test_client.insert('read_format_test', [row1, row2])
+
     result = test_client.query('SELECT * FROM read_format_test').result_set
     assert result[0][1] == uuid1
     assert result[1][3] == IPv4Address('10.44.75.20')
     assert result[0][2] == b'\x35\x33\x30\x30\x35\x35\x37\x37\x37\x6b'
+
     set_default_formats('uuid', 'string', 'ip*', 'string', 'FixedString', 'string')
     result = test_client.query('SELECT * FROM read_format_test').result_set
     assert result[0][1] == '23e45688-e89b-12d3-3273-426614174000'
     assert result[1][3] == '10.44.75.20'
     assert result[0][2] == '530055777k'
-    clear_default_format('ipv4')
+    assert result[0][4][1] == '2001:4860:4860::8844'
+
+    clear_default_format('ip*')
     result = test_client.query('SELECT * FROM read_format_test').result_set
     assert result[0][1] == '23e45688-e89b-12d3-3273-426614174000'
     assert result[1][3] == IPv4Address('10.44.75.20')
+    assert result[0][4][1] == IPv6Address('2001:4860:4860::8844')
     assert result[0][2] == '530055777k'
+
+    result = test_client.query('SELECT * FROM read_format_test', query_formats={'IP*': 'string'}).result_set
+    assert result[1][3] == '10.44.75.20'
+
+    # Ensure that the query format clears
+    result = test_client.query('SELECT * FROM read_format_test').result_set
+    assert result[1][3] == IPv4Address('10.44.75.20')
+
+    result = test_client.query('SELECT * FROM read_format_test', column_formats={'ipv4': 'string'}).result_set
+    assert result[1][3] == '10.44.75.20'
+
+    # Ensure that the column format clears
+    result = test_client.query('SELECT * FROM read_format_test').result_set
+    assert result[1][3] == IPv4Address('10.44.75.20')
+
+

From 0ffb37661c5d0f40f4c515a703cba8641dc1ddd6 Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Mon, 1 Aug 2022 10:24:13 -0600
Subject: [PATCH 16/25] Fix lint

---
 clickhouse_connect/driver/threads.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/clickhouse_connect/driver/threads.py b/clickhouse_connect/driver/threads.py
index 9166ac1d..f856468e 100644
--- a/clickhouse_connect/driver/threads.py
+++ b/clickhouse_connect/driver/threads.py
@@ -1,5 +1,3 @@
 import threading
 
 query_settings = threading.local()
-
-

From b866f5eaa4bde6265fc620005e35288afe3a8ed4 Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Mon, 1 Aug 2022 10:30:04 -0600
Subject: [PATCH 17/25] Fix lint

---
 tests/integration_tests/test_native.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/integration_tests/test_native.py b/tests/integration_tests/test_native.py
index 47c42333..6d1ab27c 100644
--- a/tests/integration_tests/test_native.py
+++ b/tests/integration_tests/test_native.py
@@ -78,5 +78,3 @@ def test_read_formats(test_client: Client, test_table_engine: str):
     # Ensure that the column format clears
     result = test_client.query('SELECT * FROM read_format_test').result_set
     assert result[1][3] == IPv4Address('10.44.75.20')
-
-

From 5b215f03eb5d4d72d41badce2c91c6e9a5c9aa76 Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Mon, 1 Aug 2022 12:29:45 -0600
Subject: [PATCH 18/25] Expand read format tests

---
 clickhouse_connect/datatypes/container.py | 10 ++++++-
 clickhouse_connect/driver/client.py       |  2 +-
 clickhouse_connect/driver/query.py        |  2 +-
 tests/integration_tests/test_native.py    | 32 ++++++++++++++++++-----
 4 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/clickhouse_connect/datatypes/container.py b/clickhouse_connect/datatypes/container.py
index 0148543f..7863227f 100644
--- a/clickhouse_connect/datatypes/container.py
+++ b/clickhouse_connect/datatypes/container.py
@@ -86,7 +86,7 @@ def write_native_data(self, column: Sequence, dest: MutableSequence):
 
 class Tuple(ClickHouseType):
     _slots = 'element_names', 'element_types', 'from_rb_funcs', 'to_rb_funcs'
-    python_type = tuple
+    valid_formats = 'tuple', 'json', 'native'  # native is 'tuple' for unnamed tuples, and dict for named tuples
 
     def __init__(self, type_def: TypeDef):
         super().__init__(type_def)
@@ -99,6 +99,14 @@ def __init__(self, type_def: TypeDef):
         else:
             self._name_suffix = type_def.arg_str
 
+    @property
+    def python_type(self):
+        if self.read_format() == 'tuple':
+            return tuple
+        if self.read_format() == 'json':
+            return str
+        return dict
+
     def _from_row_binary(self, source: bytes, loc: int):
         values = []
         for conv in self.from_rb_funcs:
diff --git a/clickhouse_connect/driver/client.py b/clickhouse_connect/driver/client.py
index 5e538c17..d93a6f1c 100644
--- a/clickhouse_connect/driver/client.py
+++ b/clickhouse_connect/driver/client.py
@@ -86,7 +86,7 @@ def query(self,
               parameters: Optional[Dict[str, Any]] = None,
               settings: Optional[Dict[str, Any]] = None,
               query_formats: Optional[Dict[str, str]] = None,
-              column_formats: Optional[Dict[str, str]] = None,
+              column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
               use_none: bool = True,
               context: QueryContext = None) -> QueryResult:
         """
diff --git a/clickhouse_connect/driver/query.py b/clickhouse_connect/driver/query.py
index 9287d774..972052b0 100644
--- a/clickhouse_connect/driver/query.py
+++ b/clickhouse_connect/driver/query.py
@@ -46,7 +46,7 @@ def updated_copy(self,
                      parameters: Optional[Dict[str, Any]] = None,
                      settings: Optional[Dict[str, Any]] = None,
                      query_formats: Optional[Dict[str, str]] = None,
-                     column_formats: Optional[Dict[str, str]] = None,
+                     column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
                      use_none: Optional[bool] = None) -> 'QueryContext':
         copy = QueryContext()
         copy.query = query or self.query
diff --git a/tests/integration_tests/test_native.py b/tests/integration_tests/test_native.py
index 6d1ab27c..f5caed39 100644
--- a/tests/integration_tests/test_native.py
+++ b/tests/integration_tests/test_native.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-from clickhouse_connect.datatypes.format import set_default_formats, clear_default_format
+from clickhouse_connect.datatypes.format import set_default_formats, clear_default_format, set_read_format
 from clickhouse_connect.driver import Client
 
 
@@ -39,17 +39,20 @@ def test_json(test_client: Client, test_table_engine: str):
 def test_read_formats(test_client: Client, test_table_engine: str):
     test_client.command('DROP TABLE IF EXISTS read_format_test')
     test_client.command('CREATE TABLE read_format_test (key Int32, uuid UUID, fs FixedString(10), ipv4 IPv4,' +
-                        f'str_array Array(IPv6)) Engine {test_table_engine} ORDER BY key')
+                        'ip_array Array(IPv6), tup Tuple(u1 UInt64, ip2 IPv4))' +
+                        f'Engine {test_table_engine} ORDER BY key')
     uuid1 = uuid.UUID('23E45688e89B-12D3-3273-426614174000')
     uuid2 = uuid.UUID('77AA3278-3728-12d3-5372-000377723832')
-    row1 = (1, uuid1, '530055777k', '10.251.30.50', ['2600::', '2001:4860:4860::8844'])
-    row2 = (2, uuid2, 'short str', '10.44.75.20', ['74:382::3332', '8700:5200::5782:3992'])
+    row1 = (1, uuid1, '530055777k', '10.251.30.50', ['2600::', '2001:4860:4860::8844'], (7372, '10.20.30.203'))
+    row2 = (2, uuid2, 'short str', '10.44.75.20', ['74:382::3332', '8700:5200::5782:3992'], (7320, '252.18.4.50'))
     test_client.insert('read_format_test', [row1, row2])
 
     result = test_client.query('SELECT * FROM read_format_test').result_set
     assert result[0][1] == uuid1
     assert result[1][3] == IPv4Address('10.44.75.20')
     assert result[0][2] == b'\x35\x33\x30\x30\x35\x35\x37\x37\x37\x6b'
+    assert result[0][5]['u1'] == 7372
+    assert result[0][5]['ip2'] == IPv4Address('10.20.30.203')
 
     set_default_formats('uuid', 'string', 'ip*', 'string', 'FixedString', 'string')
     result = test_client.query('SELECT * FROM read_format_test').result_set
@@ -65,16 +68,33 @@ def test_read_formats(test_client: Client, test_table_engine: str):
     assert result[0][4][1] == IPv6Address('2001:4860:4860::8844')
     assert result[0][2] == '530055777k'
 
-    result = test_client.query('SELECT * FROM read_format_test', query_formats={'IP*': 'string'}).result_set
+    # Test query formats
+    result = test_client.query('SELECT * FROM read_format_test', query_formats={'IP*': 'string',
+                               'tup': 'json'}).result_set
     assert result[1][3] == '10.44.75.20'
+    assert result[0][5] == b'{"u1":7372,"ip2":"10.20.30.203"}'
 
     # Ensure that the query format clears
     result = test_client.query('SELECT * FROM read_format_test').result_set
     assert result[1][3] == IPv4Address('10.44.75.20')
+    assert result[0][5]['ip2'] == IPv4Address('10.20.30.203')
 
-    result = test_client.query('SELECT * FROM read_format_test', column_formats={'ipv4': 'string'}).result_set
+    # Test column formats
+    result = test_client.query('SELECT * FROM read_format_test', column_formats={'ipv4': 'string',
+                               'tup': 'tuple'}).result_set
     assert result[1][3] == '10.44.75.20'
+    assert result[0][5][1] == IPv4Address('10.20.30.203')
 
     # Ensure that the column format clears
     result = test_client.query('SELECT * FROM read_format_test').result_set
     assert result[1][3] == IPv4Address('10.44.75.20')
+    assert result[0][5]['ip2'] == IPv4Address('10.20.30.203')
+
+    # Test sub column formats
+    set_read_format('tuple', 'tuple')
+    result = test_client.query('SELECT * FROM read_format_test', column_formats={'tup' : {'ip*': 'string'}}).result_set
+    assert result[0][5][1] == '10.20.30.203'
+
+    set_read_format('tuple', 'native')
+    result = test_client.query('SELECT * FROM read_format_test', column_formats={'tup': {'ip*': 'string'}}).result_set
+    assert result[0][5]['ip2'] == '10.20.30.203'

From 19308151bb6808615dc3e546c5ea658a2fe40fc0 Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Mon, 1 Aug 2022 13:37:01 -0600
Subject: [PATCH 19/25] Fix lint, remove spaces from python JSON serialization

---
 clickhouse_connect/driver/client.py | 5 ++++-
 clickhouse_connect/driver/query.py  | 2 ++
 clickhouse_connect/json_impl.py     | 2 +-
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/clickhouse_connect/driver/client.py b/clickhouse_connect/driver/client.py
index d93a6f1c..379911bc 100644
--- a/clickhouse_connect/driver/client.py
+++ b/clickhouse_connect/driver/client.py
@@ -81,6 +81,7 @@ def client_setting(self, name, value):
         :param value: Setting value
         """
 
+    # pylint: disable=duplicate-code
     def query(self,
               query: str = None,
               parameters: Optional[Dict[str, Any]] = None,
@@ -126,6 +127,7 @@ def raw_query(self,
         :return: bytes representing raw ClickHouse return value based on format
         """
 
+    # pylint: disable=duplicate-code
     def query_np(self,
                  query: str = None,
                  parameters: Optional[Dict[str, Any]] = None,
@@ -134,7 +136,7 @@ def query_np(self,
                  column_formats: Optional[Dict[str, str]] = None,
                  context: QueryContext = None):
         """
-        Query method that results the results as a numpy array
+        Query method that returns the results as a numpy array
         :param query: Query statement/format string
         :param parameters: Optional dictionary used to format the query
         :param settings: Optional dictionary of ClickHouse settings (key/string values)
@@ -151,6 +153,7 @@ def query_np(self,
                                     False,
                                     context))
 
+    # pylint: disable=duplicate-code
     def query_df(self,
                  query: str = None,
                  parameters: Optional[Dict[str, Any]] = None,
diff --git a/clickhouse_connect/driver/query.py b/clickhouse_connect/driver/query.py
index 972052b0..ae56b620 100644
--- a/clickhouse_connect/driver/query.py
+++ b/clickhouse_connect/driver/query.py
@@ -26,6 +26,8 @@ class QueryContext:
     """
     Argument/parameter object for queries
     """
+
+    # pylint: disable=duplicate-code
     def __init__(self,
                  query: str = None,
                  parameters: Optional[Dict[str, Any]] = None,
diff --git a/clickhouse_connect/json_impl.py b/clickhouse_connect/json_impl.py
index c0c2e0df..5576bb2e 100644
--- a/clickhouse_connect/json_impl.py
+++ b/clickhouse_connect/json_impl.py
@@ -16,7 +16,7 @@
 
 
 def _pyjson_to_json(obj: Any) -> bytes:
-    return py_json.dumps(obj).encode()
+    return py_json.dumps(obj, separators=(',', ':')).encode()
 
 
 logger = logging.getLogger(__name__)

From 353c6cdceee220c8c8f29127c735ca5ad2cb6910 Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Tue, 2 Aug 2022 13:59:30 -0600
Subject: [PATCH 20/25] format doc checkpoint

---
 clickhouse_connect/datatypes/base.py |  9 ++---
 clickhouse_connect/driver/query.py   | 53 +++++++++++++++++++++-------
 2 files changed, 42 insertions(+), 20 deletions(-)

diff --git a/clickhouse_connect/datatypes/base.py b/clickhouse_connect/datatypes/base.py
index ca0427e9..1de56c8b 100644
--- a/clickhouse_connect/datatypes/base.py
+++ b/clickhouse_connect/datatypes/base.py
@@ -101,13 +101,8 @@ def name(self):
 
     @property
     def encoding(self):
-        override = getattr(query_settings, 'column_encoding', None)
-        if override:
-            return override
-        override = getattr(query_settings, 'query_encoding', None)
-        if override:
-            return override
-        return self._encoding
+        query_encoding = getattr(query_settings, 'query_encoding', None)
+        return query_encoding or self._encoding
 
     def write_native_prefix(self, dest: MutableSequence):
         """
diff --git a/clickhouse_connect/driver/query.py b/clickhouse_connect/driver/query.py
index ae56b620..cd9bccf7 100644
--- a/clickhouse_connect/driver/query.py
+++ b/clickhouse_connect/driver/query.py
@@ -24,7 +24,7 @@
 
 class QueryContext:
     """
-    Argument/parameter object for queries
+    Argument/parameter object for queries.  This context is used to set thread/query specific formats
     """
 
     # pylint: disable=duplicate-code
@@ -35,9 +35,30 @@ def __init__(self,
                  query_formats: Optional[Dict[str, str]] = None,
                  column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
                  use_none: bool = True):
+        """
+        Initializes various configuration settings for the query context
+
+        :param query:  Query string with Python style format value replacements
+        :param parameters: Optional dictionary of substitution values
+        :param settings: Optional ClickHouse settings for the query
+        :param query_formats: Optional dictionary of query formats with the key of a ClickHouse type name
+          (with * wildcards) and a value of valid query formats for those types.
+          The value 'encoding' can be sent to change the expected encoding for this query, with a value of
+          the desired encoding such as `latin-1`
+        :param column_formats: Optional dictionary of column specific formats.  The key is the column name,
+          The value is either the format for the data column (such as 'string' for a UUID column) or a
+          second level "format" dictionary of a ClickHouse type name and a value of query formats.  This
+          secondary dictionary can be used for nested column types such as Tuples or Maps
+        :param column_formats: Optional dictionary
+        :param use_none:
+        """
         self.query = query
         self.parameters = parameters or {}
         self.settings = settings or {}
+        if query_formats:
+            self.encoding = query_formats.pop('encoding', None)
+        else:
+            self.encoding = None
         self.query_formats = format_map(query_formats)
         self.column_formats = column_formats or {}
         self.use_none = use_none
@@ -50,27 +71,36 @@ def updated_copy(self,
                      query_formats: Optional[Dict[str, str]] = None,
                      column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
                      use_none: Optional[bool] = None) -> 'QueryContext':
+        """
+        Creates
+        :param query:
+        :param parameters:
+        :param settings:
+        :param query_formats:
+        :param column_formats:
+        :param use_none:
+        :return:
+        """
         copy = QueryContext()
         copy.query = query or self.query
         copy.parameters = self.parameters.update(parameters or {})
         copy.settings = self.settings.update(settings or {})
+        if query_formats:
+            copy.encoding = self.encoding or query_formats.pop('encoding', None)
         copy.query_formats = self.query_formats.update(query_formats or {})
         copy.column_formats = self.column_formats.update(column_formats or {})
         copy.use_none = use_none if use_none is not None else self.use_none
         return copy
 
     def __enter__(self):
-        if self.query_formats:
-            query_settings.query_overrides = self.query_formats
+        query_settings.query_overrides = self.query_formats
+        query_settings.query_encoding = self.encoding
         return self
 
     def __exit__(self, exc_type, exc_val, exc_tb):
-        if self.query_formats:
-            del query_settings.query_overrides
-        try:
-            del query_settings.column_overrides
-        except AttributeError:
-            pass
+        query_settings.query_overrides = None
+        query_settings.column_overrides = None
+        query_settings.query_encoding = None
 
     def start_column(self, name: str, ch_type: ClickHouseType):
         if name in self.column_formats:
@@ -84,10 +114,7 @@ def start_column(self, name: str, ch_type: ClickHouseType):
                 fmt_map = format_map(fmts)
             query_settings.column_overrides = fmt_map
         else:
-            try:
-                del query_settings.column_overrides
-            except AttributeError:
-                pass
+            query_settings.column_overrides = None
 
 
 class QueryResult:

From e2a1f4f4b9bb6dd241ce1e213fd3d36cdba72ac1 Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Tue, 2 Aug 2022 14:31:11 -0600
Subject: [PATCH 21/25] Lint fix

---
 clickhouse_connect/driver/query.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clickhouse_connect/driver/query.py b/clickhouse_connect/driver/query.py
index cd9bccf7..5db46cd4 100644
--- a/clickhouse_connect/driver/query.py
+++ b/clickhouse_connect/driver/query.py
@@ -22,6 +22,7 @@
     import pyarrow
 
 
+# pylint: disable=too-many-instance-attributes
 class QueryContext:
     """
     Argument/parameter object for queries.  This context is used to set thread/query specific formats

From cfec41e10f85e4c3e15d2b983aef12da351307d1 Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Wed, 3 Aug 2022 03:42:00 -0600
Subject: [PATCH 22/25] Fix SQL comment related issues

---
 clickhouse_connect/driver/httpclient.py     | 13 ++++++------
 clickhouse_connect/driver/query.py          | 23 +++++++++++++++++++++
 tests/integration_tests/test_client.py      |  3 ++-
 tests/unit_tests/test_driver/test_parser.py | 15 ++++++++++++++
 4 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/clickhouse_connect/driver/httpclient.py b/clickhouse_connect/driver/httpclient.py
index d4a4bf7a..97ed2fa0 100644
--- a/clickhouse_connect/driver/httpclient.py
+++ b/clickhouse_connect/driver/httpclient.py
@@ -14,7 +14,8 @@
 from clickhouse_connect.driver.exceptions import DatabaseError, OperationalError, ProgrammingError
 from clickhouse_connect.driver.httpadapter import KeepAliveAdapter
 from clickhouse_connect.driver.native import NativeTransform
-from clickhouse_connect.driver.query import QueryResult, DataResult, format_query_value, QueryContext
+from clickhouse_connect.driver.query import QueryResult, DataResult, format_query_value, QueryContext, \
+    remove_sql_comments
 from clickhouse_connect.driver.rowbinary import RowBinaryTransform
 
 logger = logging.getLogger(__name__)
@@ -136,11 +137,11 @@ def __init__(self,
         self.session.params = self._validate_settings(settings, True)
 
     def _format_query(self, query: str) -> str:
-        query = query.strip()
-        if query.upper().startswith('INSERT ') and 'VALUES' in query.upper():
-            return query
-        if not query.endswith(self.read_format):
-            query += f' FORMAT {self.read_format}'
+        uncommented_query = remove_sql_comments(query)
+        if uncommented_query.upper().startswith('INSERT ') and 'VALUES' in query.upper():
+            return query  # Don't format the output of INSERT statements
+        if not uncommented_query.endswith(self.read_format):
+            query += f'\nFORMAT {self.read_format}'
         return query
 
     def client_setting(self, name, value):
diff --git a/clickhouse_connect/driver/query.py b/clickhouse_connect/driver/query.py
index 5db46cd4..5278bedf 100644
--- a/clickhouse_connect/driver/query.py
+++ b/clickhouse_connect/driver/query.py
@@ -1,4 +1,5 @@
 import ipaddress
+import re
 import uuid
 
 from enum import Enum
@@ -182,6 +183,28 @@ def format_query_value(value, server_tz=UTC):
     return str(value)
 
 
+comment_re = re.compile(r"(\".*?\"|\'.*?\')|(/\*.*?\*/|(--\s)[^\n]*$)", re.MULTILINE | re.DOTALL)
+
+
+def remove_sql_comments(sql: str) -> str:
+    """
+    Remove SQL comments.  This is useful to determine the type of SQL query, such as SELECT or INSERT, but we
+    don't fully trust it to correctly ignore weird quoted strings, and other edge cases, so we always pass the
+    original SQL to ClickHouse (which uses a full-fledged AST/ token parser)
+    :param sql:  SQL query
+    :return: SQL Query without SQL comments
+    """
+    def replacer(match):
+        # if the 2nd group (capturing comments) is not None, it means we have captured a
+        # non-quoted, actual comment string, so return nothing to remove the comment
+        if match.group(2):
+            return ''
+        # Otherwise we've actually captured a quoted string, so return it
+        return match.group(1)
+
+    return comment_re.sub(replacer, sql)
+
+
 def np_result(result: QueryResult) -> 'np.array':
     """
     Convert QueryResult to a numpy array
diff --git a/tests/integration_tests/test_client.py b/tests/integration_tests/test_client.py
index ac6525af..cfcfa46f 100644
--- a/tests/integration_tests/test_client.py
+++ b/tests/integration_tests/test_client.py
@@ -84,7 +84,8 @@ def test_query_with_inline_comment(test_client: Client):
     result = test_client.query("""
     SELECT *
     -- This is just a comment
-    FROM system.tables
+    FROM system.tables LIMIT 77
+    -- A second comment
     """)
     assert len(result.result_set) > 0
 
diff --git a/tests/unit_tests/test_driver/test_parser.py b/tests/unit_tests/test_driver/test_parser.py
index a4065c0b..cc5bab1f 100644
--- a/tests/unit_tests/test_driver/test_parser.py
+++ b/tests/unit_tests/test_driver/test_parser.py
@@ -1,4 +1,5 @@
 from clickhouse_connect.driver.parser import parse_callable, parse_enum
+from clickhouse_connect.driver.query import remove_sql_comments
 
 
 def test_parse_callable():
@@ -13,3 +14,17 @@ def test_parse_callable():
 def test_parse_enum():
     assert parse_enum("Enum8('one' = 1)") == (('one',), (1,))
     assert parse_enum("Enum16('**\\'5' = 5, '578' = 7)") == (("**'5", '578'), (5, 7))
+
+
+def test_remove_comments():
+    sql = """SELECT -- 6dcd92a04feb50f14bbcf07c661680ba
+* FROM benchmark_results /*With an inline comment */ WHERE result = 'True'
+/*  A single line */
+LIMIT
+/*  A multiline comment
+   
+*/
+2
+-- 6dcd92a04feb50f14bbcf07c661680ba
+"""
+    assert remove_sql_comments(sql) == "SELECT \n* FROM benchmark_results  WHERE result = 'True'\n\nLIMIT\n\n2\n\n"

From 70d2b2d981da31404acfd1cee73f6fbff6aab0ec Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Thu, 4 Aug 2022 05:49:32 -0600
Subject: [PATCH 23/25] Fix comment and sqlalchemy issues

---
 CHANGELOG.md                                  |  2 +-
 README.md                                     |  7 +-
 .../cc_sqlalchemy/datatypes/base.py           | 38 ++++++---
 .../cc_sqlalchemy/sql/__init__.py             |  9 +--
 .../cc_sqlalchemy/sql/preparer.py             | 15 +++-
 clickhouse_connect/datatypes/network.py       |  4 +
 clickhouse_connect/datatypes/special.py       |  6 +-
 clickhouse_connect/driver/client.py           | 41 +++++++---
 clickhouse_connect/driver/common.py           |  2 -
 clickhouse_connect/driver/httpclient.py       | 34 ++++----
 clickhouse_connect/driver/parser.py           |  3 +-
 clickhouse_connect/driver/query.py            | 81 ++++++++++++-------
 tests/unit_tests/test_sqlalchemy/test_ddl.py  |  4 +-
 13 files changed, 155 insertions(+), 91 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 96a786b1..f8933401 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 ## ClickHouse Connect ChangeLog
 
-### Release 0.1.7, 2022-07-28
+### Release 0.2.0, 2022-08-04
 
 #### Improvements
 
diff --git a/README.md b/README.md
index dc27a96b..f664cb43 100644
--- a/README.md
+++ b/README.md
@@ -6,10 +6,15 @@ ClickHouse HTTP interface.
 
 
 ### Installation
+
+```
+pip install clickhouse-connect
+```
+
 ClickHouse Connect requires Python 3.7 or higher.  The `cython` package must be installed prior to installing 
 `clickhouse_connect` to build and install the optional  Cython/C extensions used for improving read and write
 performance using the ClickHouse Native format. After installing cython if desired, clone this repository and
-run `python setup.py install`from the project directory.  
+run `python setup.py install`from the project directory.
 
 ### Getting Started
 
diff --git a/clickhouse_connect/cc_sqlalchemy/datatypes/base.py b/clickhouse_connect/cc_sqlalchemy/datatypes/base.py
index eaac7600..bd7a8710 100644
--- a/clickhouse_connect/cc_sqlalchemy/datatypes/base.py
+++ b/clickhouse_connect/cc_sqlalchemy/datatypes/base.py
@@ -5,14 +5,15 @@
 
 from clickhouse_connect.datatypes.base import ClickHouseType, TypeDef, EMPTY_TYPE_DEF
 from clickhouse_connect.datatypes.registry import parse_name, type_map
+from clickhouse_connect.driver.query import format_query_value
 
 logger = logging.getLogger(__name__)
 
 
 class ChSqlaType:
     """
-    A SQLAlchemy TypeEngine that wraps a ClickHouseType.  We don't extend TypeEngine directly, instead all concrete subclasses
-    will inherit from TypeEngine
+    A SQLAlchemy TypeEngine that wraps a ClickHouseType.  We don't extend TypeEngine directly, instead all concrete
+    subclasses will inherit from TypeEngine.
     """
     ch_type: ClickHouseType = None
     generic_type: None
@@ -22,7 +23,8 @@ class ChSqlaType:
 
     def __init_subclass__(cls):
         """
-        Registers ChSqla type in the type map and sets the underlying ClickHouseType class to use to initialize ChSqlaType instances
+        Registers ChSqla type in the type map and sets the underlying ClickHouseType class to use to initialize
+        ChSqlaType instances
         """
         base = cls.__name__
         if not cls._ch_type_cls:
@@ -47,10 +49,10 @@ def build(cls, type_def: TypeDef):
     def __init__(self, type_def: TypeDef = EMPTY_TYPE_DEF):
         """
         Basic constructor that does nothing but set the wrapped ClickHouseType.  It is overridden in some cases
-        to add specific SqlAlchemy behavior when constructing subclasses "by hand", in which case the type_def parameter is
-        normally set to None and other keyword parameters used for construction
-        :param type_def: TypeDef tuple used to build the underlying ClickHouseType.  This is normally populated by the parse_name
-        function
+        to add specific SqlAlchemy behavior when constructing subclasses "by hand", in which case the type_def
+        parameter is normally set to None and other keyword parameters used for construction
+        :param type_def: TypeDef tuple used to build the underlying ClickHouseType.  This is normally populated by the
+        parse_name function
         """
         self.type_def = type_def
         self.ch_type = self._ch_type_cls.build(type_def)
@@ -74,23 +76,33 @@ def low_card(self):
     @staticmethod
     def result_processor():
         """
-        Override for the SqlAlchemy TypeEngine result_processor method, which is used to convert row values to the correct Python type
-        The core driver handles this automatically, so we always return None
+        Override for the SqlAlchemy TypeEngine result_processor method, which is used to convert row values to the
+        correct Python type.  The core driver handles this automatically, so we always return None.
         """
         return None
 
     @staticmethod
     def _cached_result_processor(*_):
         """
-        Override for the SqlAlchemy TypeEngine _cached_result_processor method to prevent weird behavior when SQLAlchemy tries to cache
+        Override for the SqlAlchemy TypeEngine _cached_result_processor method to prevent weird behavior
+        when SQLAlchemy tries to cache.
         """
         return None
 
+    @staticmethod
+    def _cached_literal_processor(*_):
+        """
+        Override for the SqlAlchemy TypeEngine _cached_literal_processor. We delegate to the driver format_query_value
+        method and should be able to ignore literal_processor definitions in the dialect, which are verbose and
+        confusing.
+        """
+        return format_query_value
+
     def _compiler_dispatch(self, _visitor, **_):
         """
-        Override for the SqlAlchemy TypeEngine _compiler_dispatch method to sidestep unnecessary layers and complexity when generating
-        the type name.  The underlying ClickHouseType generates the correct name
-        :return: Name generated by the underlying driver
+        Override for the SqlAlchemy TypeEngine _compiler_dispatch method to sidestep unnecessary layers and complexity
+        when generating the type name.  The underlying ClickHouseType generates the correct name
+        :return: Name generated by the underlying driver.
         """
         return self.name
 
diff --git a/clickhouse_connect/cc_sqlalchemy/sql/__init__.py b/clickhouse_connect/cc_sqlalchemy/sql/__init__.py
index e4040382..1358d747 100644
--- a/clickhouse_connect/cc_sqlalchemy/sql/__init__.py
+++ b/clickhouse_connect/cc_sqlalchemy/sql/__init__.py
@@ -1,17 +1,10 @@
 from typing import Optional
 
 from sqlalchemy import Table
-from sqlalchemy.sql.compiler import RESERVED_WORDS
-
-from clickhouse_connect.driver.common import identifier_re
-
-reserved_words = RESERVED_WORDS | set('index')
 
 
 def quote_id(v: str) -> str:
-    if v in reserved_words or not identifier_re.match(v):
-        return f'`{v}`'
-    return v
+    return f'`{v}`'
 
 
 def full_table(table_name: str, schema: Optional[str] = None) -> str:
diff --git a/clickhouse_connect/cc_sqlalchemy/sql/preparer.py b/clickhouse_connect/cc_sqlalchemy/sql/preparer.py
index 520e4b08..5337f657 100644
--- a/clickhouse_connect/cc_sqlalchemy/sql/preparer.py
+++ b/clickhouse_connect/cc_sqlalchemy/sql/preparer.py
@@ -1,5 +1,18 @@
 from sqlalchemy.sql.compiler import IdentifierPreparer
 
+from clickhouse_connect.cc_sqlalchemy.sql import quote_id
+
 
 class ChIdentifierPreparer(IdentifierPreparer):
-    pass
+
+    quote_identifier = staticmethod(quote_id)
+
+    def normalize_name(self, name):
+        return name
+
+    def denormalize_name(self, name):
+        return name
+
+    def _requires_quotes(self, _value):
+        return True
+
diff --git a/clickhouse_connect/datatypes/network.py b/clickhouse_connect/datatypes/network.py
index 503e2211..352835bb 100644
--- a/clickhouse_connect/datatypes/network.py
+++ b/clickhouse_connect/datatypes/network.py
@@ -82,6 +82,10 @@ class IPv6(ClickHouseType):
     def python_type(self):
         return str if self.read_format() == 'string' else IPv6Address
 
+    @property
+    def np_type(self):
+        return 'U' if self.read_format() == 'string' else 'O'
+
     @property
     def python_null(self):
         return '' if self.read_format() == 'string' else V6_NULL
diff --git a/clickhouse_connect/datatypes/special.py b/clickhouse_connect/datatypes/special.py
index a9bfb924..78425e01 100644
--- a/clickhouse_connect/datatypes/special.py
+++ b/clickhouse_connect/datatypes/special.py
@@ -13,7 +13,11 @@ class UUID(ClickHouseType):
 
     @property
     def python_null(self):
-        return PYUUID(int=0) if self.read_format() == 'uuid' else ''
+        return '' if self.read_format() == 'string' else PYUUID(0)
+
+    @property
+    def np_type(self):
+        return 'U' if self.read_format() == 'string' else 'O'
 
     def _from_row_binary(self, source: bytearray, loc: int):
         int_high, loc = read_uint64(source, loc)
diff --git a/clickhouse_connect/driver/client.py b/clickhouse_connect/driver/client.py
index 379911bc..ee1ea201 100644
--- a/clickhouse_connect/driver/client.py
+++ b/clickhouse_connect/driver/client.py
@@ -1,8 +1,9 @@
 import logging
-import re
+import pytz
 
 from abc import ABCMeta, abstractmethod
 from typing import Iterable, Tuple, Optional, Any, Union, Sequence, Dict
+from pytz.exceptions import UnknownTimeZoneError
 
 from clickhouse_connect.datatypes.registry import get_from_name
 from clickhouse_connect.datatypes.base import ClickHouseType
@@ -12,7 +13,6 @@
     to_arrow, QueryContext
 
 logger = logging.getLogger(__name__)
-limit_re = re.compile(r'\s+LIMIT[$|\s]', re.IGNORECASE)
 
 
 class Client(metaclass=ABCMeta):
@@ -30,8 +30,13 @@ def __init__(self, database: str, query_limit: int, uri: str):
         :param uri: uri for error messages
         """
         self.limit = query_limit
-        self.server_version, self.server_tz, self.database = \
+        self.server_tz = pytz.UTC
+        self.server_version, server_tz, self.database = \
             tuple(self.command('SELECT version(), timezone(), database()', use_database=False))
+        try:
+            self.server_tz = pytz.timezone(server_tz)
+        except UnknownTimeZoneError:
+            logger.warning('Warning, server is using an unrecognized timezone %s, will use UTC default', server_tz)
         server_settings = self.query('SELECT name, value, changed, description, type, readonly FROM system.settings')
         self.server_settings = {row['name']: SettingDef(**row) for row in server_settings.named_results()}
         if database and not database == '__default__':
@@ -61,13 +66,10 @@ def _validate_settings(self, settings: Optional[Dict[str, Any]], stringify: bool
                 validated[key] = value
         return validated
 
-    def _prep_query(self, query: str, parameters: Optional[Dict[str, Any]] = None):
-        if parameters:
-            escaped = {k: format_query_value(v, self.server_tz) for k, v in parameters.items()}
-            query %= escaped
-        if self.limit and not limit_re.search(query) and 'SELECT ' in query.upper():
-            query += f' LIMIT {self.limit}'
-        return query
+    def _prep_query(self, context: QueryContext):
+        if context.is_select and not context.has_limit:
+            return f'{context.final_query}\n LIMIT {self.limit}'
+        return context.final_query
 
     @abstractmethod
     def _query_with_context(self, context: QueryContext):
@@ -88,6 +90,7 @@ def query(self,
               settings: Optional[Dict[str, Any]] = None,
               query_formats: Optional[Dict[str, str]] = None,
               column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
+              encoding: Optional[str] = None,
               use_none: bool = True,
               context: QueryContext = None) -> QueryResult:
         """
@@ -97,6 +100,7 @@ def query(self,
         :param settings: Optional dictionary of ClickHouse settings (key/string values)
         :param query_formats: See QueryContext __init__ docstring
         :param column_formats: See QueryContext __init__ docstring
+        :param encoding: See QueryContext __init__ docstring
         :param use_none: Use None for ClickHouse nulls instead of empty values
         :param context An alternative QueryContext parameter object that contains some or all of the method arguments
         :return: QueryResult -- data and metadata from response
@@ -107,9 +111,18 @@ def query(self,
                                                  settings,
                                                  query_formats,
                                                  column_formats,
+                                                 encoding,
+                                                 self.server_tz,
                                                  False)
         else:
-            query_context = QueryContext(query, parameters, settings, query_formats, column_formats, use_none)
+            query_context = QueryContext(query,
+                                         parameters,
+                                         settings,
+                                         query_formats,
+                                         column_formats,
+                                         encoding,
+                                         self.server_tz,
+                                         use_none)
         return self._query_with_context(query_context)
 
     @abstractmethod
@@ -134,6 +147,7 @@ def query_np(self,
                  settings: Optional[Dict[str, Any]] = None,
                  query_formats: Optional[Dict[str, str]] = None,
                  column_formats: Optional[Dict[str, str]] = None,
+                 encoding: Optional[str] = None,
                  context: QueryContext = None):
         """
         Query method that returns the results as a numpy array
@@ -142,6 +156,7 @@ def query_np(self,
         :param settings: Optional dictionary of ClickHouse settings (key/string values)
         :param query_formats: See QueryContext __init__ docstring
         :param column_formats: See QueryContext __init__ docstring.
+        :param encoding: See QueryContext __init__ docstring
         :param context An alternative QueryContext parameter object that contains some or all of the method arguments
         :return: Numpy array representing the result set
         """
@@ -150,6 +165,7 @@ def query_np(self,
                                     settings,
                                     query_formats,
                                     column_formats,
+                                    encoding,
                                     False,
                                     context))
 
@@ -160,6 +176,7 @@ def query_df(self,
                  settings: Optional[Dict[str, Any]] = None,
                  query_formats: Optional[Dict[str, str]] = None,
                  column_formats: Optional[Dict[str, str]] = None,
+                 encoding: Optional[str] = None,
                  context: QueryContext = None):
         """
         Query method that results the results as a pandas dataframe
@@ -168,6 +185,7 @@ def query_df(self,
         :param settings: Optional dictionary of ClickHouse settings (key/string values)
         :param query_formats: See QueryContext __init__ docstring
         :param column_formats: See QueryContext __init__ docstring
+        :param encoding: See QueryContext __init__ docstring
         :param context An alternative QueryContext parameter object that contains some or all of the method arguments
         :return: Numpy array representing the result set
         """
@@ -176,6 +194,7 @@ def query_df(self,
                                        settings,
                                        query_formats,
                                        column_formats,
+                                       encoding,
                                        False,
                                        context))
 
diff --git a/clickhouse_connect/driver/common.py b/clickhouse_connect/driver/common.py
index a0480ed9..dd0344af 100644
--- a/clickhouse_connect/driver/common.py
+++ b/clickhouse_connect/driver/common.py
@@ -1,6 +1,5 @@
 import array
 import sys
-import re
 
 from typing import Tuple, Sequence, MutableSequence
 
@@ -11,7 +10,6 @@
 
 array_map = {1: 'b', 2: 'h', 4: 'i', 8: 'q'}
 decimal_prec = {32: 9, 64: 18, 128: 38, 256: 79}
-identifier_re = re.compile('^[a-zA-Z_][0-9a-zA-Z_]*$')
 
 if int_size == 2:
     array_map[4] = 'l'
diff --git a/clickhouse_connect/driver/httpclient.py b/clickhouse_connect/driver/httpclient.py
index 97ed2fa0..ed81da23 100644
--- a/clickhouse_connect/driver/httpclient.py
+++ b/clickhouse_connect/driver/httpclient.py
@@ -14,8 +14,7 @@
 from clickhouse_connect.driver.exceptions import DatabaseError, OperationalError, ProgrammingError
 from clickhouse_connect.driver.httpadapter import KeepAliveAdapter
 from clickhouse_connect.driver.native import NativeTransform
-from clickhouse_connect.driver.query import QueryResult, DataResult, format_query_value, QueryContext, \
-    remove_sql_comments
+from clickhouse_connect.driver.query import QueryResult, DataResult, QueryContext, finalize_query
 from clickhouse_connect.driver.rowbinary import RowBinaryTransform
 
 logger = logging.getLogger(__name__)
@@ -136,26 +135,23 @@ def __init__(self,
         super().__init__(database=database, query_limit=query_limit, uri=self.url)
         self.session.params = self._validate_settings(settings, True)
 
-    def _format_query(self, query: str) -> str:
-        uncommented_query = remove_sql_comments(query)
-        if uncommented_query.upper().startswith('INSERT ') and 'VALUES' in query.upper():
-            return query  # Don't format the output of INSERT statements
-        if not uncommented_query.endswith(self.read_format):
-            query += f'\nFORMAT {self.read_format}'
-        return query
-
     def client_setting(self, name, value):
         if isinstance(value, bool):
             value = '1' if value else '0'
         self.session.params[name] = str(value)
 
+    def _prep_query(self, context: QueryContext):
+        final_query = super()._prep_query(context)
+        if context.is_insert:
+            return final_query
+        return f'{final_query}\n FORMAT {self.write_format}'
+
     def _query_with_context(self, context: QueryContext) -> QueryResult:
-        final_query = self._prep_query(context.query, context.parameters)
         headers = {'Content-Type': 'text/plain; charset=utf-8'}
         params = {'database': self.database}
         params.update(self._validate_settings(context.settings, True))
-        if columns_only_re.search(final_query):
-            response = self._raw_request(final_query + ' FORMAT JSON', params, headers, retries=2)
+        if columns_only_re.search(context.uncommented_query):
+            response = self._raw_request(f'{context.final_query}\n FORMAT JSON', params, headers, retries=2)
             json_result = json.loads(response.content)
             # ClickHouse will respond with a JSON object of meta, data, and some other objects
             # We just grab the column names and column types from the metadata sub object
@@ -166,7 +162,7 @@ def _query_with_context(self, context: QueryContext) -> QueryResult:
                 types.append(registry.get_from_name(col['type']))
             data_result = DataResult([], tuple(names), tuple(types))
         else:
-            response = self._raw_request(self._format_query(final_query), params, headers, retries=2)
+            response = self._raw_request(self._prep_query(context), params, headers, retries=2)
             data_result = self.transform.parse_response(response.content, context)
         summary = {}
         if 'X-ClickHouse-Summary' in response.headers:
@@ -205,9 +201,7 @@ def command(self,
         """
         See BaseClient doc_string for this method
         """
-        if parameters:
-            escaped = {k: format_query_value(v, self.server_tz) for k, v in parameters.items()}
-            cmd %= escaped
+        cmd = finalize_query(cmd, parameters, self.server_tz)
         headers = {}
         params = {}
         payload = None
@@ -286,9 +280,9 @@ def raw_query(self,
         """
         See BaseClient doc_string for this method
         """
-        final_query = self._prep_query(query, parameters)
-        if fmt and ' FORMAT ' not in query.upper():
-            final_query += f' FORMAT {fmt}'
+        final_query = finalize_query(query, parameters, self.server_tz)
+        if fmt:
+            final_query += f'\n FORMAT {fmt}'
         return self._raw_request(final_query, self._validate_settings(settings, True)).content
 
 
diff --git a/clickhouse_connect/driver/parser.py b/clickhouse_connect/driver/parser.py
index 1815734c..a158e7f9 100644
--- a/clickhouse_connect/driver/parser.py
+++ b/clickhouse_connect/driver/parser.py
@@ -1,10 +1,9 @@
 from typing import Union, Tuple
 
-
-# pylint: disable=too-many-branches
 from clickhouse_connect.driver.common import unescape_identifier
 
 
+# pylint: disable=too-many-branches
 def parse_callable(expr) -> Tuple[str, Tuple[Union[str, int], ...], str]:
     """
     Parses a single level ClickHouse optionally 'callable' function/identifier.  The identifier is returned as the
diff --git a/clickhouse_connect/driver/query.py b/clickhouse_connect/driver/query.py
index 5278bedf..6713db6c 100644
--- a/clickhouse_connect/driver/query.py
+++ b/clickhouse_connect/driver/query.py
@@ -1,11 +1,11 @@
 import ipaddress
 import re
 import uuid
+import pytz
 
 from enum import Enum
 from typing import NamedTuple, Any, Tuple, Dict, Sequence, Optional, Union
-from datetime import date, datetime
-from pytz import UTC
+from datetime import date, datetime, tzinfo
 
 from clickhouse_connect.datatypes.base import ClickHouseType
 from clickhouse_connect.datatypes.container import Array
@@ -22,6 +22,10 @@
 if HAS_ARROW:
     import pyarrow
 
+limit_re = re.compile(r'\s+LIMIT($|\s)', re.IGNORECASE)
+select_re = re.compile(r'(^|\s)SELECT\s', re.IGNORECASE)
+insert_re = re.compile(r'(^|\s)INSERT\s*INTO', re.IGNORECASE)
+
 
 # pylint: disable=too-many-instance-attributes
 class QueryContext:
@@ -31,11 +35,13 @@ class QueryContext:
 
     # pylint: disable=duplicate-code
     def __init__(self,
-                 query: str = None,
+                 query: str = '',
                  parameters: Optional[Dict[str, Any]] = None,
                  settings: Optional[Dict[str, Any]] = None,
                  query_formats: Optional[Dict[str, str]] = None,
                  column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
+                 encoding: Optional[str] = None,
+                 server_tz: tzinfo = pytz.UTC,
                  use_none: bool = True):
         """
         Initializes various configuration settings for the query context
@@ -51,20 +57,38 @@ def __init__(self,
           The value is either the format for the data column (such as 'string' for a UUID column) or a
           second level "format" dictionary of a ClickHouse type name and a value of query formats.  This
           secondary dictionary can be used for nested column types such as Tuples or Maps
+        :param encoding: Optional string encoding for this query, such as 'latin-1'
         :param column_formats: Optional dictionary
         :param use_none:
         """
         self.query = query
         self.parameters = parameters or {}
         self.settings = settings or {}
-        if query_formats:
-            self.encoding = query_formats.pop('encoding', None)
-        else:
-            self.encoding = None
-        self.query_formats = format_map(query_formats)
+        self.query_formats = query_formats or {}
         self.column_formats = column_formats or {}
+        self.encoding = encoding
+        self.server_tz = server_tz
         self.use_none = use_none
-        self.thread_local = None
+        self.final_query = finalize_query(query, parameters, server_tz)
+        self._uncommented_query = None
+
+    @property
+    def uncommented_query(self) -> str:
+        if not self._uncommented_query:
+            self._uncommented_query = remove_sql_comments(self.final_query)
+        return self._uncommented_query
+
+    @property
+    def is_select(self) -> bool:
+        return select_re.search(self.uncommented_query) is not None
+
+    @property
+    def has_limit(self) -> bool:
+        return limit_re.search(self.uncommented_query) is not None
+
+    @property
+    def is_insert(self) -> bool:
+        return insert_re.search(self.uncommented_query) is not None
 
     def updated_copy(self,
                      query: Optional[str] = None,
@@ -72,30 +96,23 @@ def updated_copy(self,
                      settings: Optional[Dict[str, Any]] = None,
                      query_formats: Optional[Dict[str, str]] = None,
                      column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
+                     encoding: Optional[str] = None,
+                     server_tz: Optional[tzinfo] = None,
                      use_none: Optional[bool] = None) -> 'QueryContext':
         """
-        Creates
-        :param query:
-        :param parameters:
-        :param settings:
-        :param query_formats:
-        :param column_formats:
-        :param use_none:
-        :return:
+        Creates Query context copy with parameters overridden/updated as appropriate
         """
-        copy = QueryContext()
-        copy.query = query or self.query
-        copy.parameters = self.parameters.update(parameters or {})
-        copy.settings = self.settings.update(settings or {})
-        if query_formats:
-            copy.encoding = self.encoding or query_formats.pop('encoding', None)
-        copy.query_formats = self.query_formats.update(query_formats or {})
-        copy.column_formats = self.column_formats.update(column_formats or {})
-        copy.use_none = use_none if use_none is not None else self.use_none
-        return copy
+        return QueryContext(query or self.query,
+                            self.parameters.update(parameters or {}),
+                            self.settings.update(settings or {}),
+                            self.query_formats.update(query_formats or {}),
+                            self.column_formats.update(column_formats or {}),
+                            encoding if encoding else self.encoding,
+                            server_tz if server_tz else self.server_tz,
+                            use_none if use_none is not None else self.use_none)
 
     def __enter__(self):
-        query_settings.query_overrides = self.query_formats
+        query_settings.query_overrides = format_map(self.query_formats)
         query_settings.query_encoding = self.encoding
         return self
 
@@ -151,8 +168,14 @@ class DataResult(NamedTuple):
 must_escape = (BS, '\'')
 
 
+def finalize_query(query: str, parameters: Optional[Dict[str, Any]], tz: Optional[tzinfo] = None) -> str:
+    if not parameters:
+        return query
+    return query % {k: format_query_value(v, tz) for k, v in parameters.items()}
+
+
 # pylint: disable=too-many-return-statements
-def format_query_value(value, server_tz=UTC):
+def format_query_value(value: Any, server_tz: tzinfo = pytz.UTC):
     """
     Format Python values in a ClickHouse query
     :param value: Python object
diff --git a/tests/unit_tests/test_sqlalchemy/test_ddl.py b/tests/unit_tests/test_sqlalchemy/test_ddl.py
index aebe18ab..f81becd8 100644
--- a/tests/unit_tests/test_sqlalchemy/test_ddl.py
+++ b/tests/unit_tests/test_sqlalchemy/test_ddl.py
@@ -8,12 +8,12 @@
 dialect = ClickHouseDialect()
 
 replicated_mt_ddl = """\
-CREATE TABLE replicated_mt_test (key UInt64) Engine ReplicatedMergeTree('/clickhouse/tables/repl_mt_test',\
+CREATE TABLE `replicated_mt_test` (`key` UInt64) Engine ReplicatedMergeTree('/clickhouse/tables/repl_mt_test',\
  '{replica}') ORDER BY key\
 """
 
 replacing_mt_ddl = """\
-CREATE TABLE replacing_mt_test (key UInt32, date DateTime) Engine ReplacingMergeTree(date) ORDER BY key\
+CREATE TABLE `replacing_mt_test` (`key` UInt32, `date` DateTime) Engine ReplacingMergeTree(date) ORDER BY key\
 """
 
 

From 4383bb8430396c3fb6f1533b518afa4bafcd6709 Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Thu, 4 Aug 2022 07:28:20 -0600
Subject: [PATCH 24/25] Lint fixes

---
 clickhouse_connect/cc_sqlalchemy/sql/preparer.py | 7 -------
 clickhouse_connect/driver/client.py              | 5 ++---
 clickhouse_connect/driver/httpadapter.py         | 2 ++
 clickhouse_connect/driver/query.py               | 6 +++---
 clickhouse_connect/json_impl.py                  | 6 +++---
 5 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/clickhouse_connect/cc_sqlalchemy/sql/preparer.py b/clickhouse_connect/cc_sqlalchemy/sql/preparer.py
index 5337f657..4450b51c 100644
--- a/clickhouse_connect/cc_sqlalchemy/sql/preparer.py
+++ b/clickhouse_connect/cc_sqlalchemy/sql/preparer.py
@@ -7,12 +7,5 @@ class ChIdentifierPreparer(IdentifierPreparer):
 
     quote_identifier = staticmethod(quote_id)
 
-    def normalize_name(self, name):
-        return name
-
-    def denormalize_name(self, name):
-        return name
-
     def _requires_quotes(self, _value):
         return True
-
diff --git a/clickhouse_connect/driver/client.py b/clickhouse_connect/driver/client.py
index ee1ea201..9cbf79ce 100644
--- a/clickhouse_connect/driver/client.py
+++ b/clickhouse_connect/driver/client.py
@@ -9,8 +9,7 @@
 from clickhouse_connect.datatypes.base import ClickHouseType
 from clickhouse_connect.driver.exceptions import ProgrammingError, InternalError
 from clickhouse_connect.driver.models import ColumnDef, SettingDef
-from clickhouse_connect.driver.query import QueryResult, np_result, to_pandas_df, from_pandas_df, format_query_value, \
-    to_arrow, QueryContext
+from clickhouse_connect.driver.query import QueryResult, np_result, to_pandas_df, from_pandas_df, to_arrow, QueryContext
 
 logger = logging.getLogger(__name__)
 
@@ -83,7 +82,7 @@ def client_setting(self, name, value):
         :param value: Setting value
         """
 
-    # pylint: disable=duplicate-code
+    # pylint: disable=duplicate-code,too-many-arguments
     def query(self,
               query: str = None,
               parameters: Optional[Dict[str, Any]] = None,
diff --git a/clickhouse_connect/driver/httpadapter.py b/clickhouse_connect/driver/httpadapter.py
index d1640a94..b3cb24c0 100644
--- a/clickhouse_connect/driver/httpadapter.py
+++ b/clickhouse_connect/driver/httpadapter.py
@@ -19,6 +19,8 @@ class KeepAliveAdapter(HTTPAdapter):
     """
     Extended HTTP adapter that sets preferred keep alive options
     """
+
+    # pylint: disable=no-member
     def __init__(self, **kwargs):
         self.socket_options = core_socket_options.copy()
         interval = kwargs.pop('keep_interval', KEEP_INTERVAL)
diff --git a/clickhouse_connect/driver/query.py b/clickhouse_connect/driver/query.py
index 6713db6c..30b3705d 100644
--- a/clickhouse_connect/driver/query.py
+++ b/clickhouse_connect/driver/query.py
@@ -33,7 +33,7 @@ class QueryContext:
     Argument/parameter object for queries.  This context is used to set thread/query specific formats
     """
 
-    # pylint: disable=duplicate-code
+    # pylint: disable=duplicate-code,too-many-arguments
     def __init__(self,
                  query: str = '',
                  parameters: Optional[Dict[str, Any]] = None,
@@ -168,10 +168,10 @@ class DataResult(NamedTuple):
 must_escape = (BS, '\'')
 
 
-def finalize_query(query: str, parameters: Optional[Dict[str, Any]], tz: Optional[tzinfo] = None) -> str:
+def finalize_query(query: str, parameters: Optional[Dict[str, Any]], server_tz: Optional[tzinfo] = None) -> str:
     if not parameters:
         return query
-    return query % {k: format_query_value(v, tz) for k, v in parameters.items()}
+    return query % {k: format_query_value(v, server_tz) for k, v in parameters.items()}
 
 
 # pylint: disable=too-many-return-statements
diff --git a/clickhouse_connect/json_impl.py b/clickhouse_connect/json_impl.py
index 5576bb2e..d51804ac 100644
--- a/clickhouse_connect/json_impl.py
+++ b/clickhouse_connect/json_impl.py
@@ -5,7 +5,7 @@
 
 try:
     import orjson
-    any_to_json = orjson.dumps
+    any_to_json = orjson.dumps # pylint: disable=no-member
 except ImportError:
     orjson = None
 
@@ -21,8 +21,8 @@ def _pyjson_to_json(obj: Any) -> bytes:
 
 logger = logging.getLogger(__name__)
 _to_json = OrderedDict()
-_to_json['orjson'] = orjson.dumps if orjson else None
-_to_json['ujson'] = ujson.dumps if ujson else None
+_to_json['orjson'] = orjson.dumps if orjson else None # pylint: disable=no-member
+_to_json['ujson'] = ujson.dumps if ujson else None # pylint: disable=c-extension-no-member
 _to_json['python'] = _pyjson_to_json
 
 any_to_json = _pyjson_to_json

From 9b1dd8af11fc9824c5b1ce559bb40122ea8206b0 Mon Sep 17 00:00:00 2001
From: Geoff Genz <geoff@clickhouse.com>
Date: Thu, 4 Aug 2022 08:03:33 -0600
Subject: [PATCH 25/25] Update changelog

---
 .gitignore   |  1 +
 CHANGELOG.md | 13 +++++++++++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index fa8b8db7..9a72348b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,6 +17,7 @@
 
 # Python cruft
 *.pyc
+.python-version
 
 # C extensions
 *.so
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f8933401..a510d9ad 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,12 +2,21 @@
 
 ### Release 0.2.0, 2022-08-04
 
+#### Deprecation warning
+
+* In the next release the row_binary option for ClickHouse serialization will be removed.  The performance is significantly lower than Native format and maintaining the option add complexity with no corresponding benefit
+
 #### Improvements
 
-* Support (experimental) JSON/Object datatype.  ClickHouse Connect will take advantage of the fast orjson library if available.
-* Standardize read format handling and allow setting a return data format per column or per query.
+* Support (experimental) JSON/Object datatype.  ClickHouse Connect will take advantage of the fast orjson library if available.  Note that inserts for JSON columns require ClickHouse server version 22.6.1 or later
+* Standardize read format handling and allow specifying a return data format per column or per query.
+* Added convenience min_version method to client to see if the server is at least the requested level
+* Increase default HTTP timeout to 300 seconds to match ClickHouse server default
 
 #### Bug Fixes
+* Fixed multiple issues with SQL comments that would cause some queries to fail
+* Fixed problem with SQLAlchemy literal binds that would cause an error in Superset filters
+* Fixed issue with parameter
 * Named Tuples were not supported and would result in throwing an exception.  This has been fixed.
 * The client query_arrow function would return incomplete results if the query result exceeded the ClickHouse max_block_size.  This has been fixed.  As part of the fix query_arrow method returns a PyArrow Table object.  While this is a breaking change in the API it should be easy to work around.