Skip to content

Commit

Permalink
types: support working with binary for Python 3
Browse files Browse the repository at this point in the history
Before this patch, both bytes and str were encoded as mp_str. It was
possible to work with utf and non-utf strings, but not with
varbinary [1] (mp_bin). This patch adds varbinary support for Python 3
by default. Python 2 connector behavior remains the same.

For encoding="utf-8" (default), the following behavior is expected now:
(Python 3 -> Tarantool          -> Python 3)
 bytes    -> mp_bin (varbinary) -> bytes
 str      -> mp_str (string)    -> str

For encoding=None, the following behavior is expected now:
(Python 3 -> Tarantool          -> Python 3)
 bytes    -> mp_str (string)    -> bytes

This patch changes current behavior for Python 3. Now bytes objects
encoded to varbinary by default. bytes objects are also supported
as keys.

This patch does not add new restrictions (like "do not permit to use
str in encoding=None mode because result may be confusing") to preserve
current behavior (for example, using space name as str in schema
get_space).

1. tarantool/tarantool#4201

Closes #105
  • Loading branch information
DifferentialOrange committed Mar 31, 2022
1 parent 2d13e1c commit 638c124
Show file tree
Hide file tree
Showing 4 changed files with 189 additions and 26 deletions.
15 changes: 12 additions & 3 deletions tarantool/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
Request types definitions
'''

import sys
import collections
import msgpack
import hashlib
Expand Down Expand Up @@ -84,8 +85,13 @@ def __init__(self, conn):
# The option controls whether to pack binary (non-unicode)
# string values as mp_bin or as mp_str.
#
# The default behaviour of the connector is to pack both
# bytes and Unicode strings as mp_str.
# The default behaviour of the Python 2 connector is to pack
# both bytes and Unicode strings as mp_str.
#
# The default behaviour of the Python 3 connector (since
# default encoding is "utf-8") is to pack bytes as mp_bin
# and Unicode strings as mp_str. encoding=None mode must
# be used to work with non-utf strings.
#
# msgpack-0.5.0 (and only this version) warns when the
# option is unset:
Expand All @@ -98,7 +104,10 @@ def __init__(self, conn):
# just always set it for all msgpack versions to get rid
# of the warning on msgpack-0.5.0 and to keep our
# behaviour on msgpack-1.0.0.
packer_kwargs['use_bin_type'] = False
if conn.encoding is None or sys.version_info.major == 2:
packer_kwargs['use_bin_type'] = False
else:
packer_kwargs['use_bin_type'] = True

self.packer = msgpack.Packer(**packer_kwargs)

Expand Down
14 changes: 10 additions & 4 deletions tarantool/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
if sys.version_info.major == 2:
string_types = (basestring, )
integer_types = (int, long)
supported_types = integer_types + string_types + (float,)

ENCODING_DEFAULT = None

if sys.version_info.minor < 6:
binary_types = (str, )
else:
Expand All @@ -17,10 +20,13 @@ def strxor(rhs, lhs):
return "".join(chr(ord(x) ^ ord(y)) for x, y in zip(rhs, lhs))

elif sys.version_info.major == 3:
binary_types = (bytes, )
string_types = (str, )
integer_types = (int, )
binary_types = (bytes, )
string_types = (str, )
integer_types = (int, )
supported_types = integer_types + string_types + binary_types + (float,)

ENCODING_DEFAULT = "utf-8"

from base64 import decodebytes as base64_decode

def strxor(rhs, lhs):
Expand All @@ -43,7 +49,7 @@ def check_key(*args, **kwargs):
elif args[0] is None and kwargs['select']:
return []
for key in args:
assert isinstance(key, integer_types + string_types + (float,))
assert isinstance(key, supported_types)
return list(args)


Expand Down
82 changes: 65 additions & 17 deletions test/suites/lib/skip.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,15 @@
import functools
import pkg_resources
import re
import sys

SQL_SUPPORT_TNT_VERSION = '2.0.0'


def skip_or_run_sql_test(func):
"""Decorator to skip or run SQL-related tests depending on the tarantool
def skip_or_run_test_tarantool(func, REQUIRED_TNT_VERSION, msg):
"""Decorator to skip or run tests depending on the tarantool
version.
Tarantool supports SQL-related stuff only since 2.0.0 version. So this
decorator should wrap every SQL-related test to skip it if the tarantool
version < 2.0.0 is used for testing.
Also, it can be used with the 'setUp' method for skipping the whole test
suite.
Also, it can be used with the 'setUp' method for skipping
the whole test suite.
"""

@functools.wraps(func)
Expand All @@ -28,16 +23,69 @@ def wrapper(self, *args, **kwargs):
).group()

tnt_version = pkg_resources.parse_version(self.tnt_version)
sql_support_tnt_version = pkg_resources.parse_version(
SQL_SUPPORT_TNT_VERSION
)
support_version = pkg_resources.parse_version(REQUIRED_TNT_VERSION)

if tnt_version < sql_support_tnt_version:
self.skipTest(
'Tarantool %s does not support SQL' % self.tnt_version
)
if tnt_version < support_version:
self.skipTest('Tarantool %s %s' % (self.tnt_version, msg))

if func.__name__ != 'setUp':
func(self, *args, **kwargs)

return wrapper


def skip_or_run_test_python_major(func, REQUIRED_PYTHON_MAJOR, msg):
"""Decorator to skip or run tests depending on the Python major
version.
Also, it can be used with the 'setUp' method for skipping
the whole test suite.
"""

@functools.wraps(func)
def wrapper(self, *args, **kwargs):
if func.__name__ == 'setUp':
func(self, *args, **kwargs)

major = sys.version_info.major
if major != REQUIRED_PYTHON_MAJOR:
self.skipTest('Python %s connector %s' % (major, msg))

if func.__name__ != 'setUp':
func(self, *args, **kwargs)

return wrapper


def skip_or_run_sql_test(func):
"""Decorator to skip or run SQL-related tests depending on the
tarantool version.
Tarantool supports SQL-related stuff only since 2.0.0 version.
So this decorator should wrap every SQL-related test to skip it if
the tarantool version < 2.0.0 is used for testing.
"""

return skip_or_run_test_tarantool(func, '2.0.0', 'does not support SQL')


def skip_or_run_varbinary_test(func):
"""Decorator to skip or run VARBINARY-related tests depending on
the tarantool version.
Tarantool supports VARBINARY type only since 2.2.1 version.
See https://github.com/tarantool/tarantool/issues/4201
"""

return skip_or_run_test_tarantool(func, '2.2.1',
'does not support VARBINARY type')


def skip_or_run_mp_bin_test(func):
"""Decorator to skip or run mp_bin-related tests depending on
the Python version.
Python 2 connector do not support mp_bin.
"""

return skip_or_run_test_python_major(func, 3, 'does not support mp_bin')
104 changes: 102 additions & 2 deletions test/suites/test_dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import unittest
import tarantool

from .lib.skip import skip_or_run_mp_bin_test, skip_or_run_varbinary_test
from .lib.tarantool_server import TarantoolServer

class TestSuite_Request(unittest.TestCase):
Expand All @@ -16,7 +17,13 @@ def setUpClass(self):
self.srv = TarantoolServer()
self.srv.script = 'test/suites/box.lua'
self.srv.start()
self.con = tarantool.Connection(self.srv.host, self.srv.args['primary'])

args = [self.srv.host, self.srv.args['primary']]
self.con = tarantool.Connection(*args)
self.con_encoding_utf8 = tarantool.Connection(*args, encoding='utf-8')
self.con_encoding_none = tarantool.Connection(*args, encoding=None)
self.conns = [self.con, self.con_encoding_utf8, self.con_encoding_none]

self.adm = self.srv.admin
self.space_created = self.adm("box.schema.create_space('space_1')")
self.adm("""
Expand All @@ -31,17 +38,64 @@ def setUpClass(self):
parts = {2, 'num', 3, 'str'},
unique = false})
""".replace('\n', ' '))

self.space_created = self.adm("box.schema.create_space('space_2')")
self.adm("""
box.space['space_2']:create_index('primary', {
type = 'hash',
parts = {1, 'num'},
unique = true})
""".replace('\n', ' '))

self.adm("box.schema.create_space('space_str')")
self.adm("""
box.space['space_str']:create_index('primary', {
type = 'tree',
parts = {1, 'str'},
unique = true})
""".replace('\n', ' '))

self.adm("box.schema.create_space('space_varbin')")
self.adm("""
box.space['space_varbin']:create_index('primary', {
type = 'tree',
parts = {1, 'varbinary'},
unique = true})
""".replace('\n', ' '))
self.adm("""
buffer = require('buffer')
ffi = require('ffi')
function encode_bin(bytes)
local tmpbuf = buffer.ibuf()
local p = tmpbuf:alloc(3 + #bytes)
p[0] = 0x91
p[1] = 0xC4
p[2] = #bytes
for i, c in pairs(bytes) do
p[i + 3 - 1] = c
end
return tmpbuf
end
function bintuple_insert(space, bytes)
local tmpbuf = encode_bin(bytes)
ffi.cdef[[
int box_insert(uint32_t space_id, const char *tuple, const char *tuple_end, box_tuple_t **result);
]]
ffi.C.box_insert(space.id, tmpbuf.rpos, tmpbuf.wpos, nil)
end
""")
self.adm("json = require('json')")
self.adm("fiber = require('fiber')")
self.adm("uuid = require('uuid')")

def assertNotRaises(self, func, *args, **kwargs):
try:
func(*args, **kwargs)
except Exception as e:
self.fail('Function raised Exception: %s' % repr(e))

def setUp(self):
# prevent a remote tarantool from clean our session
if self.srv.is_started():
Expand All @@ -54,7 +108,8 @@ def test_00_00_authenticate(self):
self.assertIsNone(self.srv.admin("""
box.schema.user.grant('test', 'execute,read,write', 'universe')
"""))
self.assertEqual(self.con.authenticate('test', 'test')._data, None)
for con in self.conns:
self.assertEqual(con.authenticate('test', 'test')._data, None)

def test_00_01_space_created(self):
# Check that space is created in setUpClass
Expand Down Expand Up @@ -302,6 +357,51 @@ def test_12_update_fields(self):
[[2, 'help', 7]]
)

def test_13_00_string_insert_encoding_utf8_behavior(self):
self.assertNotRaises(
self.con_encoding_utf8.insert,
'space_str', [ 'test_13_00' ])

def test_13_01_string_select_encoding_utf8_behavior(self):
self.adm(r"box.space['space_str']:insert{'test_13_01'}")

strdata = 'test_13_01'
resp = self.con_encoding_utf8.select('space_str', [strdata])
self.assertEquals(resp[0][0], strdata)

@skip_or_run_mp_bin_test
@skip_or_run_varbinary_test
def test_13_02_varbinary_insert_encoding_utf8_behavior(self):
self.assertNotRaises(
self.con_encoding_utf8.insert,
'space_varbin', [ b'test_13_02' ])

@skip_or_run_mp_bin_test
@skip_or_run_varbinary_test
def test_13_03_varbinary_select_encoding_utf8_behavior(self):
self.adm(r"""
bintuple_insert(
box.space['space_varbin'],
{0xDE, 0xAD, 0xBE, 0xAF, 0x13, 0x03})
""")

bindata = bytes(bytearray.fromhex('DEADBEAF1303'))
resp = self.con_encoding_utf8.select('space_varbin', [bindata])
self.assertEquals(resp[0][0], bindata)

def test_14_00_string_insert_encoding_none_behavior(self):
self.assertNotRaises(
self.con_encoding_none.insert,
'space_str',
[ bytes(bytearray.fromhex('DEADBEAF1400')) ])

def test_14_01_string_select_encoding_none_behavior(self):
self.adm(r"box.space['space_str']:insert{'\xDE\xAD\xBE\xAF\x14\x01'}")

bindata = bytes(bytearray.fromhex('DEADBEAF1401'))
resp = self.con_encoding_none.select('space_str', [bindata])
self.assertEquals(resp[0][0], bindata)

@classmethod
def tearDownClass(self):
self.con.close()
Expand Down

0 comments on commit 638c124

Please sign in to comment.