From dba0a4c9901e4c258be00489a16eccb88a0eb8ac Mon Sep 17 00:00:00 2001 From: Tomoki Imai Date: Wed, 27 Nov 2024 01:00:54 +0900 Subject: [PATCH] Fix the wrong Content-Length in python-server.py for non-ascii characters. (#24480) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves: https://github.com/microsoft/vscode-python/issues/24479 `python-server.py` currently uses `sys.stdin.read` for reading the input, and it receives the length in `str` (utf-8 string). ref: https://docs.python.org/3/library/sys.html On the other "Content-Length" is the size in **bytes**, therefore we should not pass `content_length` to `sys.stdin.read`. For example, `print("こんにちは世界")`'s length is 16 in str, but 30 in bytes. ``` >>> len('print("こんにちは世界")') 16 >>> len('print("こんにちは世界")'.encode()) 30 ``` This PR have two changes. 1. Replace `sys.stdin.read(content_length)` with `sys.stdin.buffer.read(content_length).decode()`. 2. Make `_send_message` calculate "Content-Length" from bytes, not str. By these changes, original issue https://github.com/microsoft/vscode-python/issues/24479 can be resolved. ![image](https://github.com/user-attachments/assets/20e72a26-d4ad-4e16-9c5b-ed41055c95d9) --- python_files/python_server.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/python_files/python_server.py b/python_files/python_server.py index 40133917a3ec..1689d9b8f7f9 100644 --- a/python_files/python_server.py +++ b/python_files/python_server.py @@ -14,7 +14,8 @@ def _send_message(msg: str): - length_msg = len(msg) + # Content-Length is the data size in bytes. + length_msg = len(msg.encode()) STDOUT.buffer.write(f"Content-Length: {length_msg}\r\n\r\n{msg}".encode()) STDOUT.buffer.flush() @@ -55,10 +56,11 @@ def custom_input(prompt=""): try: send_request({"prompt": prompt}) headers = get_headers() + # Content-Length is the data size in bytes. content_length = int(headers.get("Content-Length", 0)) if content_length: - message_text = STDIN.read(content_length) + message_text = STDIN.buffer.read(content_length).decode() message_json = json.loads(message_text) return message_json["result"]["userInput"] except Exception: @@ -74,10 +76,11 @@ def handle_response(request_id): while not STDIN.closed: try: headers = get_headers() + # Content-Length is the data size in bytes. content_length = int(headers.get("Content-Length", 0)) if content_length: - message_text = STDIN.read(content_length) + message_text = STDIN.buffer.read(content_length).decode() message_json = json.loads(message_text) our_user_input = message_json["result"]["userInput"] if message_json["id"] == request_id: @@ -160,7 +163,7 @@ def get_value(self) -> str: def get_headers(): headers = {} while True: - line = STDIN.readline().strip() + line = STDIN.buffer.readline().decode().strip() if not line: break name, value = line.split(":", 1) @@ -172,10 +175,11 @@ def get_headers(): while not STDIN.closed: try: headers = get_headers() + # Content-Length is the data size in bytes. content_length = int(headers.get("Content-Length", 0)) if content_length: - request_text = STDIN.read(content_length) + request_text = STDIN.buffer.read(content_length).decode() request_json = json.loads(request_text) if request_json["method"] == "execute": execute(request_json, USER_GLOBALS)