Skip to content

Commit

Permalink
[mypyc] Foundational support for tuple literals (+ None and bool) (#1…
Browse files Browse the repository at this point in the history
…0041)

Add support for loading tuple literals using `LoadLiteral`. The literal tuple objects will
be constructed during module import time, similar to other literals. Only tuples containing
items of supported literal types can be represented (this includes other tuples).

Add `None`, `True` and `False` to the literals array so that they can be used as tuple
literal items.

Currently tuple literals aren't used for anything. I added some unit tests to check parts
of the implementation.

The primary use case I have in mind is supporting vectorcall APIs which expect a tuple
of keyword argument names. I will implemented this in a separate PR. This will also add 
some end-to-end testing for tuple literals.

These could be used to avoid repeatedly constructing tuples with literal values in other
contexts as well.

Use array-based encoding for tuple literal values. We use the literal object array introduced 
in #10040 to allow a simple integer-based encoding of heterogeneous tuples. For example, 
tuple `('x', 5)` could be encoded like this as three integers:

* 2 (length of tuple)
* 123 (index of literal `'x'`)
* 345 (index of literal `5`)
  • Loading branch information
JukkaL authored Feb 6, 2021
1 parent 7ec1455 commit 047e427
Show file tree
Hide file tree
Showing 7 changed files with 148 additions and 25 deletions.
5 changes: 4 additions & 1 deletion mypyc/codegen/emitmodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,9 @@ def generate_literal_tables(self) -> None:
# Descriptions of complex literals
init_complex = c_array_initializer(literals.encoded_complex_values())
self.declare_global('const double []', 'CPyLit_Complex', initializer=init_complex)
# Descriptions of tuple literals
init_tuple = c_array_initializer(literals.encoded_tuple_values())
self.declare_global('const int []', 'CPyLit_Tuple', initializer=init_tuple)

def generate_export_table(self, decl_emitter: Emitter, code_emitter: Emitter) -> None:
"""Generate the declaration and definition of the group's export struct.
Expand Down Expand Up @@ -816,7 +819,7 @@ def generate_globals_init(self, emitter: Emitter) -> None:
for symbol, fixup in self.simple_inits:
emitter.emit_line('{} = {};'.format(symbol, fixup))

values = 'CPyLit_Str, CPyLit_Bytes, CPyLit_Int, CPyLit_Float, CPyLit_Complex'
values = 'CPyLit_Str, CPyLit_Bytes, CPyLit_Int, CPyLit_Float, CPyLit_Complex, CPyLit_Tuple'
emitter.emit_lines('if (CPyStatics_Initialize(CPyStatics, {}) < 0) {{'.format(values),
'return -1;',
'}')
Expand Down
78 changes: 70 additions & 8 deletions mypyc/codegen/literals.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
from typing import Dict, List, Union
from typing import Dict, List, Union, Tuple, Any, cast

from typing_extensions import Final


# Supported Python literal types. All tuple items must have supported
# literal types as well, but we can't represent the type precisely.
LiteralValue = Union[str, bytes, int, bool, float, complex, Tuple[object, ...], None]


# Some literals are singletons and handled specially (None, False and True)
NUM_SINGLETONS = 3 # type: Final


class Literals:
Expand All @@ -11,9 +22,13 @@ def __init__(self) -> None:
self.int_literals = {} # type: Dict[int, int]
self.float_literals = {} # type: Dict[float, int]
self.complex_literals = {} # type: Dict[complex, int]
self.tuple_literals = {} # type: Dict[Tuple[object, ...], int]

def record_literal(self, value: Union[str, bytes, int, float, complex]) -> None:
def record_literal(self, value: LiteralValue) -> None:
"""Ensure that the literal value is available in generated code."""
if value is None or value is True or value is False:
# These are special cased and always present
return
if isinstance(value, str):
str_literals = self.str_literals
if value not in str_literals:
Expand All @@ -34,15 +49,29 @@ def record_literal(self, value: Union[str, bytes, int, float, complex]) -> None:
complex_literals = self.complex_literals
if value not in complex_literals:
complex_literals[value] = len(complex_literals)
elif isinstance(value, tuple):
tuple_literals = self.tuple_literals
if value not in tuple_literals:
for item in value:
self.record_literal(cast(Any, item))
tuple_literals[value] = len(tuple_literals)
else:
assert False, 'invalid literal: %r' % value

def literal_index(self, value: Union[str, bytes, int, float, complex]) -> int:
def literal_index(self, value: LiteralValue) -> int:
"""Return the index to the literals array for given value."""
# The array contains first all str values, followed by bytes values, etc.
# The array contains first None and booleans, followed by all str values,
# followed by bytes values, etc.
if value is None:
return 0
elif value is False:
return 1
elif value is True:
return 2
n = NUM_SINGLETONS
if isinstance(value, str):
return self.str_literals[value]
n = len(self.str_literals)
return n + self.str_literals[value]
n += len(self.str_literals)
if isinstance(value, bytes):
return n + self.bytes_literals[value]
n += len(self.bytes_literals)
Expand All @@ -54,11 +83,16 @@ def literal_index(self, value: Union[str, bytes, int, float, complex]) -> int:
n += len(self.float_literals)
if isinstance(value, complex):
return n + self.complex_literals[value]
n += len(self.complex_literals)
if isinstance(value, tuple):
return n + self.tuple_literals[value]
assert False, 'invalid literal: %r' % value

def num_literals(self) -> int:
return (len(self.str_literals) + len(self.bytes_literals) + len(self.int_literals) +
len(self.float_literals) + len(self.complex_literals))
# The first three are for None, True and False
return (NUM_SINGLETONS + len(self.str_literals) + len(self.bytes_literals) +
len(self.int_literals) + len(self.float_literals) + len(self.complex_literals) +
len(self.tuple_literals))

# The following methods return the C encodings of literal values
# of different types
Expand All @@ -78,6 +112,34 @@ def encoded_float_values(self) -> List[str]:
def encoded_complex_values(self) -> List[str]:
return encode_complex_values(self.complex_literals)

def encoded_tuple_values(self) -> List[str]:
"""Encode tuple values into a C array.
The format of the result is like this:
<number of tuples>
<length of the first tuple>
<literal index of first item>
...
<literal index of last item>
<length of the second tuple>
...
"""
values = self.tuple_literals
value_by_index = {}
for value, index in values.items():
value_by_index[index] = value
result = []
num = len(values)
result.append(str(num))
for i in range(num):
value = value_by_index[i]
result.append(str(len(value)))
for item in value:
index = self.literal_index(cast(Any, item))
result.append(str(index))
return result


def encode_str_values(values: Dict[str, int]) -> List[bytes]:
value_by_index = {}
Expand Down
14 changes: 9 additions & 5 deletions mypyc/ir/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,20 +500,24 @@ class LoadLiteral(RegisterOp):
This is used to load a static PyObject * value corresponding to
a literal of one of the supported types.
NOTE: For int literals, both int_rprimitive (CPyTagged) and
object_primitive (PyObject *) are supported as types. However,
when using int_rprimitive, the value must *not* be small enough
to fit in an unboxed integer.
Tuple literals must contain only valid literal values as items.
NOTE: You can use this to load boxed (Python) int objects. Use
Integer to load unboxed, tagged integers or fixed-width,
low-level integers.
For int literals, both int_rprimitive (CPyTagged) and
object_primitive (PyObject *) are supported as rtype. However,
when using int_rprimitive, the value must *not* be small enough
to fit in an unboxed integer.
"""

error_kind = ERR_NEVER
is_borrowed = True

def __init__(self, value: Union[str, bytes, int, float, complex], rtype: RType) -> None:
def __init__(self,
value: Union[None, str, bytes, bool, int, float, complex, Tuple[object, ...]],
rtype: RType) -> None:
self.value = value
self.type = rtype

Expand Down
4 changes: 1 addition & 3 deletions mypyc/irbuild/expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@
from mypy.types import TupleType, get_proper_type, Instance

from mypyc.common import MAX_SHORT_INT
from mypyc.ir.ops import (
Value, Register, TupleGet, TupleSet, BasicBlock, Assign, LoadAddress
)
from mypyc.ir.ops import Value, Register, TupleGet, TupleSet, BasicBlock, Assign, LoadAddress
from mypyc.ir.rtypes import (
RTuple, object_rprimitive, is_none_rprimitive, int_rprimitive, is_int_rprimitive
)
Expand Down
3 changes: 2 additions & 1 deletion mypyc/lib-rt/CPy.h
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,8 @@ int CPyArg_ParseStackAndKeywordsSimple(PyObject *const *args, Py_ssize_t nargs,

int CPySequence_CheckUnpackCount(PyObject *sequence, Py_ssize_t expected);
int CPyStatics_Initialize(PyObject **statics, const char *strings, const char *bytestrings,
const char *ints, const double *floats, const double *complex_numbers);
const char *ints, const double *floats, const double *complex_numbers,
const int *tuples);


#ifdef __cplusplus
Expand Down
38 changes: 32 additions & 6 deletions mypyc/lib-rt/misc_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,16 @@ int CPyStatics_Initialize(PyObject **statics,
const char *bytestrings,
const char *ints,
const double *floats,
const double *complex_numbers) {
const double *complex_numbers,
const int *tuples) {
PyObject **result = statics;
// Start with some hard-coded values
*result++ = Py_None;
Py_INCREF(Py_None);
*result++ = Py_False;
Py_INCREF(Py_False);
*result++ = Py_True;
Py_INCREF(Py_True);
if (strings) {
size_t num;
strings = parse_int(strings, &num);
Expand All @@ -540,7 +549,7 @@ int CPyStatics_Initialize(PyObject **statics,
return -1;
}
PyUnicode_InternInPlace(&obj);
*statics++ = obj;
*result++ = obj;
strings += len;
}
}
Expand All @@ -554,7 +563,7 @@ int CPyStatics_Initialize(PyObject **statics,
if (obj == NULL) {
return -1;
}
*statics++ = obj;
*result++ = obj;
bytestrings += len;
}
}
Expand All @@ -569,7 +578,7 @@ int CPyStatics_Initialize(PyObject **statics,
}
ints = end;
ints++;
*statics++ = obj;
*result++ = obj;
}
}
if (floats) {
Expand All @@ -579,7 +588,7 @@ int CPyStatics_Initialize(PyObject **statics,
if (obj == NULL) {
return -1;
}
*statics++ = obj;
*result++ = obj;
}
}
if (complex_numbers) {
Expand All @@ -591,7 +600,24 @@ int CPyStatics_Initialize(PyObject **statics,
if (obj == NULL) {
return -1;
}
*statics++ = obj;
*result++ = obj;
}
}
if (tuples) {
int num = *tuples++;
while (num-- > 0) {
int num_items = *tuples++;
PyObject *obj = PyTuple_New(num_items);
if (obj == NULL) {
return -1;
}
int i;
for (i = 0; i < num_items; i++) {
PyObject *item = statics[*tuples++];
Py_INCREF(item);
PyTuple_SET_ITEM(obj, i, item);
}
*result++ = obj;
}
}
return 0;
Expand Down
31 changes: 30 additions & 1 deletion mypyc/test/test_literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import unittest

from mypyc.codegen.literals import format_str_literal
from mypyc.codegen.literals import format_str_literal, Literals


class TestLiterals(unittest.TestCase):
Expand All @@ -12,3 +12,32 @@ def test_format_str_literal(self) -> None:
assert format_str_literal('x' * 127) == b'\x7f' + b'x' * 127
assert format_str_literal('x' * 128) == b'\x81\x00' + b'x' * 128
assert format_str_literal('x' * 131) == b'\x81\x03' + b'x' * 131

def test_simple_literal_index(self) -> None:
lit = Literals()
lit.record_literal(1)
lit.record_literal('y')
lit.record_literal(True)
lit.record_literal(None)
lit.record_literal(False)
assert lit.literal_index(None) == 0
assert lit.literal_index(False) == 1
assert lit.literal_index(True) == 2
assert lit.literal_index('y') == 3
assert lit.literal_index(1) == 4

def test_tuple_literal(self) -> None:
lit = Literals()
lit.record_literal((1, 'y', None, (b'a', 'b')))
lit.record_literal((b'a', 'b'))
lit.record_literal(())
assert lit.literal_index((b'a', 'b')) == 7
assert lit.literal_index((1, 'y', None, (b'a', 'b'))) == 8
assert lit.literal_index(()) == 9
print(lit.encoded_tuple_values())
assert lit.encoded_tuple_values() == [
'3', # Number of tuples
'2', '5', '4', # First tuple (length=2)
'4', '6', '3', '0', '7', # Second tuple (length=4)
'0', # Third tuple (length=0)
]

0 comments on commit 047e427

Please sign in to comment.