diff --git a/.gitignore b/.gitignore index b59c2b93c7..2a0d3de2e4 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ *.py[cod] build +!/scripts/build build_dist venv venv_dev diff --git a/.pylintrc b/.pylintrc index 69ed006aef..3a146e6c38 100644 --- a/.pylintrc +++ b/.pylintrc @@ -52,7 +52,7 @@ ignore=CVS,Makefile,README.md # ignore-list. The regex matches against paths and can be in Posix or Windows # format. Because '\\' represents the directory delimiter on Windows systems, # it can't be used as an escape character. -ignore-paths= +ignore-paths=scripts/build/thrift_patch_for_3.12.diff # Files or directories matching the regular expression patterns are skipped. # The regex matches against base names, not paths. The default value ignores diff --git a/Makefile b/Makefile index 3edbfd76ec..9910872b30 100644 --- a/Makefile +++ b/Makefile @@ -116,6 +116,16 @@ venv: cd $(CC_ANALYZER) && pip3 install -r requirements.txt && \ cd $(CC_WEB) && pip3 install -r $(CC_WEB)/requirements.txt + # TODO: Remove this hack once Thrift is updated to a version that + # doesn't use deprecated Python code. + # https://issues.apache.org/jira/projects/THRIFT/issues/THRIFT-5847 + # Bare minimum work is to delete the 2 keyword arguments like this: + # sed -i "113,114d" $(CURRENT_DIR)/venv/lib/python3.*/site-packages/thrift/transport/THttpClient.py + # See the comments in the diff that is applied why we would want to do more + # than the bare minimum: + $(ACTIVATE_RUNTIME_VENV) && \ + python $(ROOT)/scripts/build/patch_thrift_in_current_venv.py $(ROOT) + venv_osx: # Create a virtual environment which can be used to run the build package. python3 -m venv venv --prompt="CodeChecker venv" && \ @@ -123,6 +133,16 @@ venv_osx: cd $(CC_ANALYZER) && pip3 install -r requirements_py/osx/requirements.txt && \ cd $(CC_WEB) && pip3 install -r requirements_py/osx/requirements.txt + # TODO: Remove this hack once Thrift is updated to a version that + # doesn't use deprecated Python code. + # https://issues.apache.org/jira/projects/THRIFT/issues/THRIFT-5847 + # Bare minimum work is to delete the 2 keyword arguments like this: + # sed -i "113,114d" $(CURRENT_DIR)/venv/lib/python3.*/site-packages/thrift/transport/THttpClient.py + # See the comments in the diff that is applied why we would want to do more + # than the bare minimum: + $(ACTIVATE_RUNTIME_VENV) && \ + python $(ROOT)/scripts/build/patch_thrift_in_current_venv.py $(ROOT) + clean_venv: rm -rf venv @@ -139,6 +159,16 @@ venv_dev: python3 -m venv venv_dev --prompt="CodeChecker venv-dev" && \ $(ACTIVATE_DEV_VENV) && $(PIP_DEV_DEPS_CMD) + # TODO: Remove this hack once Thrift is updated to a version that + # doesn't use deprecated Python code. + # https://issues.apache.org/jira/projects/THRIFT/issues/THRIFT-5847 + # Bare minimum work is to delete the 2 keyword arguments like this: + # sed -i "113,114d" $(CURRENT_DIR)/venv_dev/lib/python3.*/site-packages/thrift/transport/THttpClient.py + # See the comments in the diff that is applied why we would want to do more + # than the bare minimum: + $(ACTIVATE_DEV_VENV) && \ + python $(ROOT)/scripts/build/patch_thrift_in_current_venv.py $(ROOT) + clean_venv_dev: rm -rf venv_dev $(MAKE) -C $(CC_ANALYZER) clean_venv_dev diff --git a/scripts/build/patch_thrift_in_current_venv.py b/scripts/build/patch_thrift_in_current_venv.py new file mode 100755 index 0000000000..42daa29012 --- /dev/null +++ b/scripts/build/patch_thrift_in_current_venv.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +""" +This script applies a patch to the Thrift library used in CodeChecker. + +### Purpose: +The Thrift library (used by CodeChecker) includes deprecated Python files in +its implementation. +This script patches those files to ensure compatibility with Python 3.12. + +### How It Works: +- It takes the CodeChecker root directory as an argument. +- It determines the Thrift installation path in the virtualenv. +- It applies a predefined Git patch to the relevant files using a relative path. + +### When to Remove: +Remove this script when upgrading to a Thrift version that no longer relies on +deprecated Python files, eliminating the need for patching. +""" + +import subprocess +import sys +import thrift +from pathlib import Path + +THRIFT_PATCH_FILENAME = "thrift_patch_for_3.12.patch" + +if len(sys.argv) < 2: + print("Usage: python script.py ") + sys.exit(1) + +codechecker_root_dir = Path(sys.argv[1]).resolve() + +if not codechecker_root_dir.is_dir(): + print(f"Error: {codechecker_root_dir} is not a valid directory") + sys.exit(1) + +script_dir = Path(__file__).resolve().parent +patch_file = script_dir / THRIFT_PATCH_FILENAME +thrift_lib_dir = Path(thrift.__path__[0]).resolve() + +try: + relative_thrift_lib_dir = thrift_lib_dir.relative_to(codechecker_root_dir) + + subprocess.run( + ["git", "apply", "--directory", str(relative_thrift_lib_dir), str(patch_file)], + cwd=codechecker_root_dir, + check=True, + ) + print(f"Patch applied successfully to {thrift_lib_dir}") +except ValueError: + print(f"Error: {thrift_lib_dir} is not inside {codechecker_root_dir}") + sys.exit(1) +except subprocess.CalledProcessError as e: + print(f"Error applying patch: {e}") + sys.exit(1) diff --git a/scripts/build/thrift_patch_for_3.12.diff b/scripts/build/thrift_patch_for_3.12.diff new file mode 100644 index 0000000000..8c042f87fe --- /dev/null +++ b/scripts/build/thrift_patch_for_3.12.diff @@ -0,0 +1,58 @@ +--- a/transport/THttpClient.py ++++ b/transport/THttpClient.py +@@ -65,7 +65,45 @@ class THttpClient(TTransportBase): + self.port = parsed.port or http_client.HTTPS_PORT + self.certfile = cert_file + self.keyfile = key_file +- self.context = ssl.create_default_context(cafile=cafile) if (cafile and not ssl_context) else ssl_context ++ # TODO: this is a hack that should be purged once Thrift's ++ # Python library correctly supports Python >=3.12 versions. ++ # Since from 3.12 the http.client.HTTPSConnection class' ++ # constructor does not support keyword argument 'key_file' ++ # and 'cert_file', we backport the logic here. ++ # The latest versions almost do the right thing, but the ++ # 'load_cert_chain' method is not called on the context object ++ # by default in the newer implementations. It is unclear ++ # whether CodeChecker's use of this THttpClient class would ++ # actually need this, but I think it is safer to have this ++ # called nevertheless to provide better parity with the original behaviour. ++ # ++ # old implementation of http.client.HTTPSConnection: ++ # https://github.com/python/cpython/blob/3.11/Lib/http/client.py#L1419 ++ # ++ # new implementation of http.client.HTTPSConnection: ++ # https://github.com/python/cpython/blob/3.13/Lib/http/client.py#L1454 ++ if cafile and not ssl_context: ++ if not Path(cafile).is_file(): ++ raise ValueError("Invalid cafile path: '%s'" % (cafile)) ++ ++ new_context = ssl.create_default_context(cafile=cafile) ++ ++ if ssl.HAS_ALPN: ++ new_context.set_alpn_protocols(['http/1.1', 'h2']) ++ ++ if key_file: ++ if not Path(cert_file).is_file(): ++ raise ValueError("Invalid cert_file path: '%s'" % (cert_file)) ++ if not Path(key_file).is_file(): ++ raise ValueError("Invalid key_file path: '%s'" % (key_file)) ++ try: ++ new_context.load_cert_chain(cert_file, key_file) ++ except ssl.SSLError as e: ++ raise ValueError("Failed to load cert/key files: %s" % (e)) ++ ++ self.context = new_context ++ else: ++ self.context = ssl_context + self.host = parsed.hostname + self.path = parsed.path + if parsed.query: +@@ -110,8 +148,6 @@ class THttpClient(TTransportBase): + timeout=self.__timeout) + elif self.scheme == 'https': + self.__http = http_client.HTTPSConnection(self.host, self.port, +- key_file=self.keyfile, +- cert_file=self.certfile, + timeout=self.__timeout, + context=self.context) + if self.using_proxy():