From 426b2b72269bfa284e52b44370cfe3b31003bd1e Mon Sep 17 00:00:00 2001 From: jrmccluskey Date: Fri, 3 May 2024 15:06:59 +0000 Subject: [PATCH 1/7] Fix trivial inference for Python 3.12 support --- .../typehints/intrinsic_one_ops.py | 98 +++++++++++++++++++ sdks/python/apache_beam/typehints/opcodes.py | 59 +++++++++++ .../typehints/trivial_inference.py | 29 +++++- .../typehints/trivial_inference_test.py | 1 + 4 files changed, 186 insertions(+), 1 deletion(-) create mode 100644 sdks/python/apache_beam/typehints/intrinsic_one_ops.py diff --git a/sdks/python/apache_beam/typehints/intrinsic_one_ops.py b/sdks/python/apache_beam/typehints/intrinsic_one_ops.py new file mode 100644 index 000000000000..4216e2cc13cd --- /dev/null +++ b/sdks/python/apache_beam/typehints/intrinsic_one_ops.py @@ -0,0 +1,98 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Defines the actions intrinsic bytecodes have on the frame. + +Each function here corresponds to a bytecode documented in +https://docs.python.org/2/library/dis.html or +https://docs.python.org/3/library/dis.html. The first argument is a (mutable) +FrameState object, the second the integer opcode argument. + +Bytecodes with more complicated behavior (e.g. modifying the program counter) +are handled inline rather than here. + +For internal use only; no backwards-compatibility guarantees. +""" +# pytype: skip-file + +from . import opcodes + + +def intrinsic_1_invalid(state, arg): + pass + + +def intrinsic_print(state, arg): + pass + + +def intrinsic_import_star(state, arg): + pass + + +def intrinsic_stopiteration_error(state, arg): + pass + + +def intrinsic_async_gen_wrap(state, arg): + pass + + +def intrinsic_unary_positive(state, arg): + opcodes.unary_positive(state, arg) + pass + + +def intrinsic_list_to_tuple(state, arg): + opcodes.list_to_tuple(state, arg) + pass + + +def intrinsic_typevar(state, arg): + pass + + +def intrinsic_paramspec(state, arg): + pass + + +def intrinsic_typevartuple(state, arg): + pass + + +def intrinsic_subscript_generic(state, arg): + pass + + +def intrinsic_typealias(state, arg): + pass + + +int_one_ops = [ + intrinsic_1_invalid, + intrinsic_print, + intrinsic_import_star, + intrinsic_stopiteration_error, + intrinsic_async_gen_wrap, + intrinsic_unary_positive, + intrinsic_list_to_tuple, + intrinsic_typevar, + intrinsic_paramspec, + intrinsic_typevartuple, + intrinsic_subscript_generic, + intrinsic_typealias +] diff --git a/sdks/python/apache_beam/typehints/opcodes.py b/sdks/python/apache_beam/typehints/opcodes.py index 5a35b56b9321..3d38f5a18d31 100644 --- a/sdks/python/apache_beam/typehints/opcodes.py +++ b/sdks/python/apache_beam/typehints/opcodes.py @@ -78,10 +78,26 @@ def nop(unused_state, unused_arg): pass +resume = nop + + def pop_top(state, unused_arg): state.stack.pop() +def end_for(state, unused_arg): + state.stack.pop() + state.stack.pop() + + +def end_send(state, unused_arg): + del state.stack[-2] + + +def copy(state, arg): + state.stack.append(state.stack[-arg]) + + def rot_n(state, n): state.stack[-n:] = [state.stack[-1]] + state.stack[-n:-1] @@ -188,6 +204,31 @@ def store_subscr(unused_state, unused_args): pass +def binary_slice(state, args): + end = state.stack.pop() + start = state.stack.pop() + base = Const.unwrap(state.stack.pop()) + if base is str: + out = base + elif isinstance(base, typehints.IndexableTypeConstraint): + out = base + else: + out = element_type(base) + state.stack.append(out) + + +def store_slice(state, args): + """Clears elements off the stack like it was constructing a + container, but leaves the container type back at stack[-1] + since that's all that is relevant for type checking. + """ + _ = state.stack.pop() # End + _ = state.stack.pop() # Start + container = state.stack.pop() # Container type + _ = state.stack.pop() # Values that would go in container + state.stack.append(container) + + print_item = pop_top print_newline = nop @@ -347,6 +388,8 @@ def load_attr(state, arg): Will replace with Any for builtin methods, but these don't have bytecode in CPython so that's okay. """ + if (sys.version_info.major, sys.version_info.minor) >= (3, 12): + arg = arg >> 1 o = state.stack.pop() name = state.get_name(arg) state.stack.append(_getattr(o, name)) @@ -417,6 +460,14 @@ def load_fast(state, arg): state.stack.append(state.vars[arg]) +load_fast_check = load_fast + + +def load_fast_and_clear(state, arg): + state.stack.append(state.vars[arg]) + state.vars[arg] = None + + def store_fast(state, arg): state.vars[arg] = state.stack.pop() @@ -425,6 +476,14 @@ def delete_fast(state, arg): state.vars[arg] = Any # really an error +def swap(state, arg): + state.stack[-arg], state.stack[-1] = state.stack[-1], state.stack[-arg] + + +def reraise(state, arg): + pass + + # bpo-43683 Adds GEN_START in Python 3.10, but removed in Python 3.11 # https://github.com/python/cpython/pull/25138 def gen_start(state, arg): diff --git a/sdks/python/apache_beam/typehints/trivial_inference.py b/sdks/python/apache_beam/typehints/trivial_inference.py index a880b5c70ea1..2e0ad806adf6 100644 --- a/sdks/python/apache_beam/typehints/trivial_inference.py +++ b/sdks/python/apache_beam/typehints/trivial_inference.py @@ -143,6 +143,8 @@ def copy(self): return FrameState(self.f, self.vars, self.stack, self.kw_names) def const_type(self, i): + print(self.co.co_consts) + print(self.co.co_consts[i]) return Const(self.co.co_consts[i]) def get_closure(self, i): @@ -359,6 +361,7 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): dis.dis(f) from . import opcodes simple_ops = dict((k.upper(), v) for k, v in opcodes.__dict__.items()) + from . import intrinsic_one_ops co = f.__code__ code = co.co_code @@ -432,6 +435,8 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): elif op in dis.haslocal: print('(' + co.co_varnames[arg] + ')', end=' ') elif op in dis.hascompare: + if (sys.version_info.major, sys.version_info.minor) >= (3, 12): + arg = arg >> 4 print('(' + dis.cmp_op[arg] + ')', end=' ') elif op in dis.hasfree: if free is None: @@ -578,7 +583,13 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): state = None elif opname in ('POP_JUMP_IF_TRUE', 'POP_JUMP_IF_FALSE'): state.stack.pop() - jmp = arg * jump_multiplier + # The arg was changed to be a relative delta instead of an absolute + # in 3.11, and became a full instruction instead of a + # pseudo-instruction in 3.12 + if (sys.version_info.major, sys.version_info.minor) >= (3, 12): + jmp = pc + arg * jump_multiplier + else: + jmp = arg * jump_multiplier jmp_state = state.copy() elif opname in ('POP_JUMP_FORWARD_IF_TRUE', 'POP_JUMP_FORWARD_IF_FALSE'): state.stack.pop() @@ -608,6 +619,10 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): state.stack.pop() elif opname == 'FOR_ITER': jmp = pc + arg * jump_multiplier + if sys.version_info >= (3, 12): + # The jump is relative to the next instruction after a cache call, + # so jump 4 more bytes. + jmp += 4 jmp_state = state.copy() jmp_state.stack.pop() state.stack.append(element_type(state.stack[-1])) @@ -641,6 +656,18 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): # No-op introduced in 3.11. Without handling this some # instructions have functionally > 2 byte size. pass + elif opname == 'RETURN_CONST': + # Introduced in 3.12. Handles returning constants directly + # instead of having a LOAD_CONST before a RETURN_VALUE. + returns.add(state.const_type(arg)) + state = None + elif opname == 'CALL_INTRINSIC_1': + int_op = intrinsic_one_ops.int_one_ops[arg] + if debug: + print("Executing intrinsic one op", int_op.__name__.upper()) + int_op(state, arg) + elif opname == 'CALL_INTRINSIC_2': + pass else: raise TypeInferenceError('unable to handle %s' % opname) diff --git a/sdks/python/apache_beam/typehints/trivial_inference_test.py b/sdks/python/apache_beam/typehints/trivial_inference_test.py index 4341d11d3604..48ccc8a6a2ed 100644 --- a/sdks/python/apache_beam/typehints/trivial_inference_test.py +++ b/sdks/python/apache_beam/typehints/trivial_inference_test.py @@ -206,6 +206,7 @@ def testTupleListComprehension(self): self.assertReturnType( typehints.List[int], lambda xs: [x for x in xs], [typehints.Tuple[int, int, int]]) + self.assertReturnType( typehints.List[typehints.Union[int, float]], lambda xs: [x for x in xs], [typehints.Tuple[int, float]]) From 67073dde31860d74be37affa413825394fd8b657 Mon Sep 17 00:00:00 2001 From: jrmccluskey Date: Fri, 3 May 2024 15:50:57 +0000 Subject: [PATCH 2/7] remove debugging prints --- sdks/python/apache_beam/typehints/opcodes.py | 6 +++--- sdks/python/apache_beam/typehints/trivial_inference.py | 2 -- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/sdks/python/apache_beam/typehints/opcodes.py b/sdks/python/apache_beam/typehints/opcodes.py index 3d38f5a18d31..75ef1314e874 100644 --- a/sdks/python/apache_beam/typehints/opcodes.py +++ b/sdks/python/apache_beam/typehints/opcodes.py @@ -205,8 +205,8 @@ def store_subscr(unused_state, unused_args): def binary_slice(state, args): - end = state.stack.pop() - start = state.stack.pop() + _ = state.stack.pop() + _ = state.stack.pop() base = Const.unwrap(state.stack.pop()) if base is str: out = base @@ -465,7 +465,7 @@ def load_fast(state, arg): def load_fast_and_clear(state, arg): state.stack.append(state.vars[arg]) - state.vars[arg] = None + del state.vars[arg] def store_fast(state, arg): diff --git a/sdks/python/apache_beam/typehints/trivial_inference.py b/sdks/python/apache_beam/typehints/trivial_inference.py index 2e0ad806adf6..cbe8c45d8a70 100644 --- a/sdks/python/apache_beam/typehints/trivial_inference.py +++ b/sdks/python/apache_beam/typehints/trivial_inference.py @@ -143,8 +143,6 @@ def copy(self): return FrameState(self.f, self.vars, self.stack, self.kw_names) def const_type(self, i): - print(self.co.co_consts) - print(self.co.co_consts[i]) return Const(self.co.co_consts[i]) def get_closure(self, i): From 24c3b4009d843298daf921fc2469018768b905c9 Mon Sep 17 00:00:00 2001 From: jrmccluskey Date: Fri, 3 May 2024 21:37:12 +0000 Subject: [PATCH 3/7] Address comments --- sdks/python/apache_beam/typehints/intrinsic_one_ops.py | 10 ++++++---- sdks/python/apache_beam/typehints/opcodes.py | 6 ++++++ sdks/python/apache_beam/typehints/trivial_inference.py | 9 ++++++--- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/sdks/python/apache_beam/typehints/intrinsic_one_ops.py b/sdks/python/apache_beam/typehints/intrinsic_one_ops.py index 4216e2cc13cd..c82141cb404f 100644 --- a/sdks/python/apache_beam/typehints/intrinsic_one_ops.py +++ b/sdks/python/apache_beam/typehints/intrinsic_one_ops.py @@ -18,8 +18,7 @@ """Defines the actions intrinsic bytecodes have on the frame. Each function here corresponds to a bytecode documented in -https://docs.python.org/2/library/dis.html or -https://docs.python.org/3/library/dis.html. The first argument is a (mutable) +https://docs.python.org/3/library/dis.html . The first argument is a (mutable) FrameState object, the second the integer opcode argument. Bytecodes with more complicated behavior (e.g. modifying the program counter) @@ -82,7 +81,10 @@ def intrinsic_typealias(state, arg): pass -int_one_ops = [ +# The order of operations in the table of the intrinsic one operations is +# defined in https://docs.python.org/3/library/dis.html#opcode-CALL_INTRINSIC_1 +# and may change between minor versions. +INT_ONE_OPS = tuple([ intrinsic_1_invalid, intrinsic_print, intrinsic_import_star, @@ -95,4 +97,4 @@ def intrinsic_typealias(state, arg): intrinsic_typevartuple, intrinsic_subscript_generic, intrinsic_typealias -] +]) diff --git a/sdks/python/apache_beam/typehints/opcodes.py b/sdks/python/apache_beam/typehints/opcodes.py index 75ef1314e874..11e607543e93 100644 --- a/sdks/python/apache_beam/typehints/opcodes.py +++ b/sdks/python/apache_beam/typehints/opcodes.py @@ -389,6 +389,12 @@ def load_attr(state, arg): CPython so that's okay. """ if (sys.version_info.major, sys.version_info.minor) >= (3, 12): + # Load attribute's arg was bit-shifted in 3.12 to also allow for + # adding extra information to the stack based on the lower byte, + # so we have to adjust it back. + # + # See https://docs.python.org/3/library/dis.html#opcode-LOAD_ATTR + # for more information. arg = arg >> 1 o = state.stack.pop() name = state.get_name(arg) diff --git a/sdks/python/apache_beam/typehints/trivial_inference.py b/sdks/python/apache_beam/typehints/trivial_inference.py index cbe8c45d8a70..8b6f43abaa83 100644 --- a/sdks/python/apache_beam/typehints/trivial_inference.py +++ b/sdks/python/apache_beam/typehints/trivial_inference.py @@ -434,6 +434,8 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): print('(' + co.co_varnames[arg] + ')', end=' ') elif op in dis.hascompare: if (sys.version_info.major, sys.version_info.minor) >= (3, 12): + # In 3.12 this arg was bit-shifted. Shifting it back avoids an + # out-of-index. arg = arg >> 4 print('(' + dis.cmp_op[arg] + ')', end=' ') elif op in dis.hasfree: @@ -660,12 +662,13 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): returns.add(state.const_type(arg)) state = None elif opname == 'CALL_INTRINSIC_1': - int_op = intrinsic_one_ops.int_one_ops[arg] + # Introduced in 3.12. The arg is an index into a table of + # operations reproduced in INT_ONE_OPS. Not all ops are + # relevant for our type checking infrastructure. + int_op = intrinsic_one_ops.INT_ONE_OPS[arg] if debug: print("Executing intrinsic one op", int_op.__name__.upper()) int_op(state, arg) - elif opname == 'CALL_INTRINSIC_2': - pass else: raise TypeInferenceError('unable to handle %s' % opname) From c260ef15b5b4accabff49b01e553f04c2b4b7e6d Mon Sep 17 00:00:00 2001 From: jrmccluskey Date: Fri, 3 May 2024 22:02:28 +0000 Subject: [PATCH 4/7] add unit test for intrinsic op order --- .../typehints/intrinsic_one_ops_test.py | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py diff --git a/sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py b/sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py new file mode 100644 index 000000000000..423bbb9400e6 --- /dev/null +++ b/sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py @@ -0,0 +1,36 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Tests for apache_beam.typehints.intrinsic_one_ops.""" + +# pytype: skip-file + +import dis +import sys +import unittest + +from apache_beam.typehints import intrinsic_one_ops + + +class IntrinsicOneOpsTest(unittest.TestCase): + def testOperationsOrder(self): + if sys.version_info >= (3, 12): + dis_order = dis.__dict__['_intrinsic_1_descs'] + beam_order = list() + for fn in intrinsic_one_ops.INT_ONE_OPS: + beam_order.append(fn.__name__.upper()) + self.assertListEqual(dis_order, beam_order) From 45af07c3e711e8613fb8e05f6fbafbf18217dae9 Mon Sep 17 00:00:00 2001 From: jrmccluskey Date: Mon, 6 May 2024 13:41:44 +0000 Subject: [PATCH 5/7] linting --- sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py b/sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py index 423bbb9400e6..148555ca08f1 100644 --- a/sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py +++ b/sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py @@ -30,7 +30,7 @@ class IntrinsicOneOpsTest(unittest.TestCase): def testOperationsOrder(self): if sys.version_info >= (3, 12): dis_order = dis.__dict__['_intrinsic_1_descs'] - beam_order = list() + beam_order = [] for fn in intrinsic_one_ops.INT_ONE_OPS: beam_order.append(fn.__name__.upper()) self.assertListEqual(dis_order, beam_order) From cc26125abb8f2ed71904b0d8476ac3db55a250f8 Mon Sep 17 00:00:00 2001 From: jrmccluskey Date: Mon, 6 May 2024 14:07:56 +0000 Subject: [PATCH 6/7] add unittest.main() --- sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py b/sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py index 148555ca08f1..c94c5f41b8e1 100644 --- a/sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py +++ b/sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py @@ -34,3 +34,7 @@ def testOperationsOrder(self): for fn in intrinsic_one_ops.INT_ONE_OPS: beam_order.append(fn.__name__.upper()) self.assertListEqual(dis_order, beam_order) + + +if __name__ == '__main__': + unittest.main() From 3f23c5b0926e786fa949aec1f5e06943bcf52ca7 Mon Sep 17 00:00:00 2001 From: jrmccluskey Date: Tue, 7 May 2024 14:32:38 +0000 Subject: [PATCH 7/7] suggestions --- .../apache_beam/typehints/intrinsic_one_ops_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py b/sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py index c94c5f41b8e1..8291ef2df5bb 100644 --- a/sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py +++ b/sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py @@ -27,13 +27,13 @@ class IntrinsicOneOpsTest(unittest.TestCase): - def testOperationsOrder(self): + def test_unary_intrinsic_ops_are_in_the_same_order_as_in_cpython(self): if sys.version_info >= (3, 12): dis_order = dis.__dict__['_intrinsic_1_descs'] - beam_order = [] + beam_ops = [fn.__name_upper() for fn in intrinsic_one_ops.INT_ONE_OPS] for fn in intrinsic_one_ops.INT_ONE_OPS: - beam_order.append(fn.__name__.upper()) - self.assertListEqual(dis_order, beam_order) + beam_ops.append(fn.__name__.upper()) + self.assertListEqual(dis_order, beam_ops) if __name__ == '__main__':