Skip to content

Commit

Permalink
Add support for tuple exchange format for codec overrides
Browse files Browse the repository at this point in the history
Connection.set_type_codec() now accepts a new `format` keyword
argument.  When set to 'tuple', it declares that the custom codec
exchanges data with the driver in a type-specific tuple format.

This allows using custom codecs for types without the need to parse
the raw binary or text data format.

This commit adds tuple exchange support for all date-time types.

The `binary` keyword argument to set_type_codec() is now deprecated
in favor of `format='text'` and `format='binary'`.
  • Loading branch information
elprans authored and 1st1 committed Jul 5, 2017
1 parent c54ce43 commit 0453243
Show file tree
Hide file tree
Showing 10 changed files with 583 additions and 92 deletions.
160 changes: 148 additions & 12 deletions asyncpg/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import collections.abc
import struct
import time
import warnings

from . import compat
from . import connect_utils
Expand Down Expand Up @@ -762,22 +763,121 @@ async def _copy_in_records(self, copy_stmt, records, intro_stmt, timeout):
copy_stmt, None, None, records, intro_stmt, timeout)

async def set_type_codec(self, typename, *,
schema='public', encoder, decoder, binary=False):
schema='public', encoder, decoder,
binary=None, format='text'):
"""Set an encoder/decoder pair for the specified data type.
:param typename: Name of the data type the codec is for.
:param schema: Schema name of the data type the codec is for
(defaults to 'public')
:param encoder: Callable accepting a single argument and returning
a string or a bytes object (if `binary` is True).
:param decoder: Callable accepting a single string or bytes argument
and returning a decoded object.
:param binary: Specifies whether the codec is able to handle binary
data. If ``False`` (the default), the data is
expected to be encoded/decoded in text.
:param typename:
Name of the data type the codec is for.
:param schema:
Schema name of the data type the codec is for
(defaults to ``'public'``)
:param format:
The type of the argument received by the *decoder* callback,
and the type of the *encoder* callback return value.
If *format* is ``'text'`` (the default), the exchange datum is a
``str`` instance containing valid text representation of the
data type.
If *format* is ``'binary'``, the exchange datum is a ``bytes``
instance containing valid _binary_ representation of the
data type.
If *format* is ``'tuple'``, the exchange datum is a type-specific
``tuple`` of values. The table below lists supported data
types and their format for this mode.
+-----------------+---------------------------------------------+
| Type | Tuple layout |
+=================+=============================================+
| ``interval`` | (``months``, ``days``, ``seconds``, |
| | ``microseconds``) |
+-----------------+---------------------------------------------+
| ``date`` | (``date ordinal relative to Jan 1 2000``,) |
| | ``-2^31`` for negative infinity timestamp |
| | ``2^31-1`` for positive infinity timestamp. |
+-----------------+---------------------------------------------+
| ``timestamp`` | (``microseconds relative to Jan 1 2000``,) |
| | ``-2^63`` for negative infinity timestamp |
| | ``2^63-1`` for positive infinity timestamp. |
+-----------------+---------------------------------------------+
| ``timestamp | (``microseconds relative to Jan 1 2000 |
| with time zone``| UTC``,) |
| | ``-2^63`` for negative infinity timestamp |
| | ``2^63-1`` for positive infinity timestamp. |
+-----------------+---------------------------------------------+
| ``time`` | (``microseconds``,) |
+-----------------+---------------------------------------------+
| ``time with | (``microseconds``, |
| time zone`` | ``time zone offset in seconds``) |
+-----------------+---------------------------------------------+
:param encoder:
Callable accepting a Python object as a single argument and
returning a value encoded according to *format*.
:param decoder:
Callable accepting a single argument encoded according to *format*
and returning a decoded Python object.
:param binary:
**Deprecated**. Use *format* instead.
Example:
.. code-block:: pycon
>>> import asyncpg
>>> import asyncio
>>> import datetime
>>> from dateutil.relativedelta import relativedelta
>>> async def run():
... con = await asyncpg.connect(user='postgres')
... def encoder(delta):
... ndelta = delta.normalized()
... return (ndelta.years * 12 + ndelta.months,
... ndelta.days,
... (ndelta.hours * 3600 +
... ndelta.minutes * 60 +
... ndelta.seconds),
... ndelta.microseconds)
... def decoder(tup):
... return relativedelta(months=tup[0], days=tup[1],
... seconds=tup[2],
... microseconds=tup[3])
... await con.set_type_codec(
... 'interval', schema='pg_catalog', encoder=encoder,
... decoder=decoder, format='tuple')
... result = await con.fetchval(
... "SELECT '2 years 3 mons 1 day'::interval")
... print(result)
... print(datetime.datetime(2002, 1, 1) + result)
>>> asyncio.get_event_loop().run_until_complete(run())
relativedelta(years=+2, months=+3, days=+1)
2004-04-02 00:00:00
.. versionadded:: 0.12.0
Added the ``format`` keyword argument and support for 'tuple'
format.
.. versionchanged:: 0.12.0
The ``binary`` keyword argument is deprecated in favor of
``format``.
"""
self._check_open()

if binary is not None:
format = 'binary' if binary else 'text'
warnings.warn(
"The `binary` keyword argument to "
"set_type_codec() is deprecated and will be removed in "
"asyncpg 0.13.0. Use the `format` keyword argument instead.",
DeprecationWarning, stacklevel=2)

if self._type_by_name_stmt is None:
self._type_by_name_stmt = await self.prepare(
introspection.TYPE_BY_NAME)
Expand All @@ -795,7 +895,40 @@ async def set_type_codec(self, typename, *,

self._protocol.get_settings().add_python_codec(
oid, typename, schema, 'scalar',
encoder, decoder, binary)
encoder, decoder, format)

# Statement cache is no longer valid due to codec changes.
self._drop_local_statement_cache()

async def reset_type_codec(self, typename, *, schema='public'):
"""Reset *typename* codec to the default implementation.
:param typename:
Name of the data type the codec is for.
:param schema:
Schema name of the data type the codec is for
(defaults to ``'public'``)
.. versionadded:: 0.12.0
"""

if self._type_by_name_stmt is None:
self._type_by_name_stmt = await self.prepare(
introspection.TYPE_BY_NAME)

typeinfo = await self._type_by_name_stmt.fetchrow(
typename, schema)
if not typeinfo:
raise ValueError('unknown type: {}.{}'.format(schema, typename))

oid = typeinfo['oid']

self._protocol.get_settings().remove_python_codec(
oid, typename, schema)

# Statement cache is no longer valid due to codec changes.
self._drop_local_statement_cache()

async def set_builtin_type_codec(self, typename, *,
schema='public', codec_name):
Expand Down Expand Up @@ -826,6 +959,9 @@ async def set_builtin_type_codec(self, typename, *,
self._protocol.get_settings().set_builtin_type_codec(
oid, typename, schema, 'scalar', codec_name)

# Statement cache is no longer valid due to codec changes.
self._drop_local_statement_cache()

def is_closed(self):
"""Return ``True`` if the connection is closed, ``False`` otherwise.
Expand Down
25 changes: 17 additions & 8 deletions asyncpg/protocol/codecs/base.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,17 @@ cdef enum CodecType:
CODEC_RANGE = 5


cdef enum CodecFormat:
cdef enum ServerDataFormat:
PG_FORMAT_ANY = -1
PG_FORMAT_TEXT = 0
PG_FORMAT_BINARY = 1


cdef enum ClientExchangeFormat:
PG_XFORMAT_OBJECT = 1
PG_XFORMAT_TUPLE = 2


cdef class Codec:
cdef:
uint32_t oid
Expand All @@ -46,7 +51,8 @@ cdef class Codec:
str kind

CodecType type
CodecFormat format
ServerDataFormat format
ClientExchangeFormat xformat

encode_func c_encoder
decode_func c_decoder
Expand All @@ -68,7 +74,8 @@ cdef class Codec:
codec_decode_func decoder

cdef init(self, str name, str schema, str kind,
CodecType type, CodecFormat format,
CodecType type, ServerDataFormat format,
ClientExchangeFormat xformat,
encode_func c_encoder, decode_func c_decoder,
object py_encoder, object py_decoder,
Codec element_codec, tuple element_type_oids,
Expand Down Expand Up @@ -140,7 +147,7 @@ cdef class Codec:
cdef Codec new_composite_codec(uint32_t oid,
str name,
str schema,
CodecFormat format,
ServerDataFormat format,
list element_codecs,
tuple element_type_oids,
object element_names)
Expand All @@ -152,14 +159,16 @@ cdef class Codec:
str kind,
object encoder,
object decoder,
CodecFormat format)
encode_func c_encoder,
decode_func c_decoder,
ServerDataFormat format,
ClientExchangeFormat xformat)


cdef class DataCodecConfig:
cdef:
dict _type_codecs_cache
dict _local_type_codecs

cdef inline Codec get_codec(self, uint32_t oid, CodecFormat format)
cdef inline Codec get_local_codec(
self, uint32_t oid, CodecFormat preferred_format=*)
cdef inline Codec get_codec(self, uint32_t oid, ServerDataFormat format)
cdef inline Codec get_local_codec(self, uint32_t oid)
Loading

1 comment on commit 0453243

@lelit
Copy link
Contributor

@lelit lelit commented on 0453243 Jul 6, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

😍

Please sign in to comment.