From 6bb22661096e97244e6319169194b97e7eea50ff Mon Sep 17 00:00:00 2001 From: 97littleleaf11 <97littleleaf11@users.noreply.github.com> Date: Fri, 23 Jul 2021 03:28:00 +0800 Subject: [PATCH] [mypyc] Build lists using a primitive op (#10807) Closes https://github.com/mypyc/mypyc/issues/264 This PR adds a primitive op and a C helper function for building lists. ## Test Plan This change helps reduce the generated code size, however adds some overhead in calling. Microbenchmark tested on master branch: ``` interpreted: 0.000256s (avg of 3582 iterations; stdev 2.2%) compiled: 0.000063s (avg of 3582 iterations; stdev 2.1%) compiled is 4.073x faster ``` On this PR: ``` interpreted: 0.000275s (avg of 3331 iterations; stdev 4.4%) compiled: 0.000083s (avg of 3331 iterations; stdev 4.5%) compiled is 3.321x faster ``` ``` @benchmark def list_build() -> None: n = 0 for i in range(1000): x = ["x", "y", "1", "2", str(i)] n += len(x) assert n == 5000, n ``` --- mypyc/irbuild/ll_builder.py | 40 +++++++++++++++++++----------- mypyc/lib-rt/CPy.h | 1 + mypyc/lib-rt/list_ops.c | 20 +++++++++++++++ mypyc/primitives/list_ops.py | 11 ++++++-- mypyc/test-data/irbuild-lists.test | 24 ++++++++++++++++++ mypyc/test-data/run-lists.test | 19 ++++++++++++++ 6 files changed, 99 insertions(+), 16 deletions(-) diff --git a/mypyc/irbuild/ll_builder.py b/mypyc/irbuild/ll_builder.py index acd91b181a80..45a1ca647f76 100644 --- a/mypyc/irbuild/ll_builder.py +++ b/mypyc/irbuild/ll_builder.py @@ -24,7 +24,7 @@ GetAttr, LoadStatic, MethodCall, CallC, Truncate, LoadLiteral, AssignMulti, RaiseStandardError, Unreachable, LoadErrorValue, NAMESPACE_TYPE, NAMESPACE_MODULE, NAMESPACE_STATIC, IntOp, GetElementPtr, - LoadMem, ComparisonOp, LoadAddress, TupleGet, SetMem, KeepAlive, ERR_NEVER, ERR_FALSE + LoadMem, ComparisonOp, LoadAddress, TupleGet, KeepAlive, ERR_NEVER, ERR_FALSE, SetMem ) from mypyc.ir.rtypes import ( RType, RUnion, RInstance, RArray, optional_value_type, int_rprimitive, float_rprimitive, @@ -32,8 +32,8 @@ c_pyssize_t_rprimitive, is_short_int_rprimitive, is_tagged, PyVarObject, short_int_rprimitive, is_list_rprimitive, is_tuple_rprimitive, is_dict_rprimitive, is_set_rprimitive, PySetObject, none_rprimitive, RTuple, is_bool_rprimitive, is_str_rprimitive, c_int_rprimitive, - pointer_rprimitive, PyObject, PyListObject, bit_rprimitive, is_bit_rprimitive, - object_pointer_rprimitive, c_size_t_rprimitive, dict_rprimitive + pointer_rprimitive, PyObject, bit_rprimitive, is_bit_rprimitive, + object_pointer_rprimitive, c_size_t_rprimitive, dict_rprimitive, PyListObject ) from mypyc.ir.func_ir import FuncDecl, FuncSignature from mypyc.ir.class_ir import ClassIR, all_concrete_classes @@ -46,7 +46,7 @@ binary_ops, unary_ops, ERR_NEG_INT ) from mypyc.primitives.list_ops import ( - list_extend_op, new_list_op + list_extend_op, new_list_op, list_build_op ) from mypyc.primitives.tuple_ops import ( list_tuple_op, new_tuple_op, new_tuple_with_length_op @@ -78,6 +78,12 @@ DictEntry = Tuple[Optional[Value], Value] +# If the number of items is less than the threshold when initializing +# a list, we would inline the generate IR using SetMem and expanded +# for-loop. Otherwise, we would call `list_build_op` for larger lists. +# TODO: The threshold is a randomly chosen number which needs further +# study on real-world projects for a better balance. +LIST_BUILDING_EXPANSION_THRESHOLD = 10 # From CPython PY_VECTORCALL_ARGUMENTS_OFFSET: Final = 1 << (PLATFORM_SIZE * 8 - 1) @@ -669,7 +675,6 @@ def native_args_to_positional(self, # coercing everything to the expected type. output_args = [] for lst, arg in zip(formal_to_actual, sig.args): - output_arg = None if arg.kind == ARG_STAR: assert star_arg output_arg = star_arg @@ -700,7 +705,7 @@ def gen_method_call(self, arg_names: Optional[List[Optional[str]]] = None) -> Value: """Generate either a native or Python method call.""" # If we have *args, then fallback to Python method call. - if (arg_kinds is not None and any(kind.is_star() for kind in arg_kinds)): + if arg_kinds is not None and any(kind.is_star() for kind in arg_kinds): return self.py_method_call(base, name, arg_values, base.line, arg_kinds, arg_names) # If the base type is one of ours, do a MethodCall @@ -766,7 +771,7 @@ def none(self) -> Value: def true(self) -> Value: """Load unboxed True value (type: bool_rprimitive).""" - return Integer(1, bool_rprimitive) + return Integer(1, bool_rprimitive) def false(self) -> Value: """Load unboxed False value (type: bool_rprimitive).""" @@ -1008,7 +1013,7 @@ def compare_tuples(self, return result length = len(lhs.type.types) false_assign, true_assign, out = BasicBlock(), BasicBlock(), BasicBlock() - check_blocks = [BasicBlock() for i in range(length)] + check_blocks = [BasicBlock() for _ in range(length)] lhs_items = [self.add(TupleGet(lhs, i, line)) for i in range(length)] rhs_items = [self.add(TupleGet(rhs, i, line)) for i in range(length)] @@ -1137,8 +1142,15 @@ def new_list_op_with_length(self, length: Value, line: int) -> Value: return self.call_c(new_list_op, [length], line) def new_list_op(self, values: List[Value], line: int) -> Value: - length = Integer(len(values), c_pyssize_t_rprimitive, line) - result_list = self.call_c(new_list_op, [length], line) + length: List[Value] = [Integer(len(values), c_pyssize_t_rprimitive, line)] + if len(values) >= LIST_BUILDING_EXPANSION_THRESHOLD: + return self.call_c(list_build_op, length + values, line) + + # If the length of the list is less than the threshold, + # LIST_BUILDING_EXPANSION_THRESHOLD, we directly expand the + # for-loop and inline the SetMem operation, which is faster + # than list_build_op, however generates more code. + result_list = self.call_c(new_list_op, length, line) if len(values) == 0: return result_list args = [self.coerce(item, object_rprimitive, line) for item in values] @@ -1174,7 +1186,7 @@ def shortcircuit_helper(self, op: str, # Having actual Phi nodes would be really nice here! target = Register(expr_type) # left_body takes the value of the left side, right_body the right - left_body, right_body, next = BasicBlock(), BasicBlock(), BasicBlock() + left_body, right_body, next_block = BasicBlock(), BasicBlock(), BasicBlock() # true_body is taken if the left is true, false_body if it is false. # For 'and' the value is the right side if the left is true, and for 'or' # it is the right side if the left is false. @@ -1187,15 +1199,15 @@ def shortcircuit_helper(self, op: str, self.activate_block(left_body) left_coerced = self.coerce(left_value, expr_type, line) self.add(Assign(target, left_coerced)) - self.goto(next) + self.goto(next_block) self.activate_block(right_body) right_value = right() right_coerced = self.coerce(right_value, expr_type, line) self.add(Assign(target, right_coerced)) - self.goto(next) + self.goto(next_block) - self.activate_block(next) + self.activate_block(next_block) return target def add_bool_branch(self, value: Value, true: BasicBlock, false: BasicBlock) -> None: diff --git a/mypyc/lib-rt/CPy.h b/mypyc/lib-rt/CPy.h index 1ec510f57c2a..bc1cced301a4 100644 --- a/mypyc/lib-rt/CPy.h +++ b/mypyc/lib-rt/CPy.h @@ -318,6 +318,7 @@ PyObject *CPyObject_GetSlice(PyObject *obj, CPyTagged start, CPyTagged end); // List operations +PyObject *CPyList_Build(Py_ssize_t len, ...); PyObject *CPyList_GetItem(PyObject *list, CPyTagged index); PyObject *CPyList_GetItemUnsafe(PyObject *list, CPyTagged index); PyObject *CPyList_GetItemShort(PyObject *list, CPyTagged index); diff --git a/mypyc/lib-rt/list_ops.c b/mypyc/lib-rt/list_ops.c index ccc8390966a0..28547cfd7b60 100644 --- a/mypyc/lib-rt/list_ops.c +++ b/mypyc/lib-rt/list_ops.c @@ -5,6 +5,26 @@ #include #include "CPy.h" +PyObject *CPyList_Build(Py_ssize_t len, ...) { + Py_ssize_t i; + + PyObject *res = PyList_New(len); + if (res == NULL) { + return NULL; + } + + va_list args; + va_start(args, len); + for (i = 0; i < len; i++) { + // Steals the reference + PyObject *value = va_arg(args, PyObject *); + PyList_SET_ITEM(res, i, value); + } + va_end(args); + + return res; +} + PyObject *CPyList_GetItemUnsafe(PyObject *list, CPyTagged index) { Py_ssize_t n = CPyTagged_ShortAsSsize_t(index); PyObject *result = PyList_GET_ITEM(list, n); diff --git a/mypyc/primitives/list_ops.py b/mypyc/primitives/list_ops.py index 9abb0a46f4a8..ec0abed42915 100644 --- a/mypyc/primitives/list_ops.py +++ b/mypyc/primitives/list_ops.py @@ -22,8 +22,7 @@ arg_types=[object_rprimitive], return_type=list_rprimitive, c_function_name='PySequence_List', - error_kind=ERR_MAGIC, -) + error_kind=ERR_MAGIC) new_list_op = custom_op( arg_types=[c_pyssize_t_rprimitive], @@ -31,6 +30,14 @@ c_function_name='PyList_New', error_kind=ERR_MAGIC) +list_build_op = custom_op( + arg_types=[c_pyssize_t_rprimitive], + return_type=list_rprimitive, + c_function_name='CPyList_Build', + error_kind=ERR_MAGIC, + var_arg_type=object_rprimitive, + steals=True) + # list[index] (for an integer index) list_get_item_op = method_op( name='__getitem__', diff --git a/mypyc/test-data/irbuild-lists.test b/mypyc/test-data/irbuild-lists.test index e08b8d500c14..d310efd4be30 100644 --- a/mypyc/test-data/irbuild-lists.test +++ b/mypyc/test-data/irbuild-lists.test @@ -93,6 +93,30 @@ L0: x = r0 return 1 +[case testNewListTenItems] +from typing import List +def f() -> None: + x: List[str] = ['a', 'b', 'c', 'd', 'e', + 'f', 'g', 'h', 'i', 'j'] +[out] +def f(): + r0, r1, r2, r3, r4, r5, r6, r7, r8, r9 :: str + r10, x :: list +L0: + r0 = 'a' + r1 = 'b' + r2 = 'c' + r3 = 'd' + r4 = 'e' + r5 = 'f' + r6 = 'g' + r7 = 'h' + r8 = 'i' + r9 = 'j' + r10 = CPyList_Build(10, r0, r1, r2, r3, r4, r5, r6, r7, r8, r9) + x = r10 + return 1 + [case testListMultiply] from typing import List def f(a: List[int]) -> None: diff --git a/mypyc/test-data/run-lists.test b/mypyc/test-data/run-lists.test index 9ef337c4e075..1366b1fd857e 100644 --- a/mypyc/test-data/run-lists.test +++ b/mypyc/test-data/run-lists.test @@ -75,6 +75,25 @@ print(primes(13)) \[0, 0, 1, 1] \[0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1] +[case testListBuild] +def test_list_build() -> None: + # Currently LIST_BUILDING_EXPANSION_THRESHOLD equals to 10 + # long list built by list_build_op + l1 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + l1.pop() + l1.append(100) + assert l1 == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 100] + # short list built by Setmem + l2 = [1, 2] + l2.append(3) + l2.pop() + l2.pop() + assert l2 == [1] + # empty list + l3 = [] + l3.append('a') + assert l3 == ['a'] + [case testListPrims] from typing import List