Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use deque for ~4x speedup when reading array columns #164

Merged
merged 1 commit into from
Sep 14, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 12 additions & 16 deletions clickhouse_driver/columns/arraycolumn.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
from collections import deque
from itertools import chain
from struct import Struct

from ..util import compat
from .base import Column
from .intcolumn import UInt64Column

if compat.PY3:
from queue import Queue
else:
from Queue import Queue


class ArrayColumn(Column):
"""
Expand Down Expand Up @@ -61,15 +56,15 @@ def read_data(self, rows, buf):
return self._read(rows, buf)

def _write_sizes(self, value, buf):
q = Queue()
q.put((self, value, 0))
q = deque()
q.appendleft((self, value, 0))

cur_depth = 0
offset = 0
nulls_map = []

while not q.empty():
column, value, depth = q.get_nowait()
while q:
column, value, depth = q.pop()

if cur_depth != depth:
cur_depth = depth
Expand All @@ -89,7 +84,7 @@ def _write_sizes(self, value, buf):
nested_column = column.nested_column
if isinstance(nested_column, ArrayColumn):
for x in value:
q.put((nested_column, x, cur_depth + 1))
q.appendleft((nested_column, x, cur_depth + 1))
nulls_map.append(None if x is None else False)

def _write_data(self, value, buf):
Expand Down Expand Up @@ -124,8 +119,8 @@ def write_state_prefix(self, buf):
self.nested_column.write_state_prefix(buf)

def _read(self, size, buf):
q = Queue()
q.put((self, size, 0))
q = deque()
q.appendleft((self, size, 0))

slices_series = []

Expand All @@ -142,8 +137,8 @@ def _read(self, size, buf):
nested_column = self.nested_column

# Read and store info about slices.
while not q.empty():
column, size, depth = q.get_nowait()
while q:
column, size, depth = q.pop()

nested_column = column.nested_column

Expand All @@ -164,7 +159,8 @@ def _read(self, size, buf):
for _i in range(size):
offset = self.size_unpack(buf)
nested_column_size = offset
q.put((nested_column, offset - prev_offset, cur_depth + 1))
q.appendleft(
(nested_column, offset - prev_offset, cur_depth + 1))
slices.append((prev_offset, offset))
prev_offset = offset

Expand Down