diff --git a/CHANGES/8507.bugfix.rst b/CHANGES/8507.bugfix.rst new file mode 100644 index 00000000000..9739536202d --- /dev/null +++ b/CHANGES/8507.bugfix.rst @@ -0,0 +1,8 @@ +Removed blocking I/O in the event loop for static resources and refactored +exception handling -- by :user:`steverep`. + +File system calls when handling requests for static routes were moved to a +separate thread to potentially improve performance. Exception handling +was tightened in order to only return 403 Forbidden or 404 Not Found responses +for expected scenarios; 500 Internal Server Error would be returned for any +unknown errors. diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index 7fbe70ba6a3..7eb934848cb 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -8,6 +8,7 @@ import keyword import os import re +import sys import warnings from contextlib import contextmanager from functools import wraps @@ -78,6 +79,12 @@ else: BaseDict = dict +CIRCULAR_SYMLINK_ERROR = ( + OSError + if sys.version_info < (3, 10) and sys.platform.startswith("win32") + else RuntimeError +) + YARL_VERSION: Final[Tuple[int, ...]] = tuple(map(int, yarl_version.split(".")[:2])) HTTP_METHOD_RE: Final[Pattern[str]] = re.compile( @@ -661,59 +668,66 @@ def __iter__(self) -> Iterator[AbstractRoute]: async def _handle(self, request: Request) -> StreamResponse: rel_url = request.match_info["filename"] + filename = Path(rel_url) + if filename.anchor: + # rel_url is an absolute name like + # /static/\\machine_name\c$ or /static/D:\path + # where the static dir is totally different + raise HTTPForbidden() + + unresolved_path = self._directory.joinpath(filename) + loop = asyncio.get_running_loop() + return await loop.run_in_executor( + None, self._resolve_path_to_response, unresolved_path + ) + + def _resolve_path_to_response(self, unresolved_path: Path) -> StreamResponse: + """Take the unresolved path and query the file system to form a response.""" + # Check for access outside the root directory. For follow symlinks, URI + # cannot traverse out, but symlinks can. Otherwise, no access outside + # root is permitted. try: - filename = Path(rel_url) - if filename.anchor: - # rel_url is an absolute name like - # /static/\\machine_name\c$ or /static/D:\path - # where the static dir is totally different - raise HTTPForbidden() - unresolved_path = self._directory.joinpath(filename) if self._follow_symlinks: normalized_path = Path(os.path.normpath(unresolved_path)) normalized_path.relative_to(self._directory) - filepath = normalized_path.resolve() + file_path = normalized_path.resolve() else: - filepath = unresolved_path.resolve() - filepath.relative_to(self._directory) - except (ValueError, FileNotFoundError) as error: - # relatively safe - raise HTTPNotFound() from error - except HTTPForbidden: - raise - except Exception as error: - # perm error or other kind! - request.app.logger.exception(error) + file_path = unresolved_path.resolve() + file_path.relative_to(self._directory) + except (ValueError, CIRCULAR_SYMLINK_ERROR) as error: + # ValueError for relative check; RuntimeError for circular symlink. raise HTTPNotFound() from error - # on opening a dir, load its contents if allowed - if filepath.is_dir(): - if self._show_index: - try: + # if path is a directory, return the contents if permitted. Note the + # directory check will raise if a segment is not readable. + try: + if file_path.is_dir(): + if self._show_index: return Response( - text=self._directory_as_html(filepath), content_type="text/html" + text=self._directory_as_html(file_path), + content_type="text/html", ) - except PermissionError: + else: raise HTTPForbidden() - else: - raise HTTPForbidden() - elif filepath.is_file(): - return FileResponse(filepath, chunk_size=self._chunk_size) - else: - raise HTTPNotFound + except PermissionError as error: + raise HTTPForbidden() from error + + # Not a regular file or does not exist. + if not file_path.is_file(): + raise HTTPNotFound() - def _directory_as_html(self, filepath: Path) -> str: - # returns directory's index as html + return FileResponse(file_path, chunk_size=self._chunk_size) - # sanity check - assert filepath.is_dir() + def _directory_as_html(self, dir_path: Path) -> str: + """returns directory's index as html.""" + assert dir_path.is_dir() - relative_path_to_dir = filepath.relative_to(self._directory).as_posix() + relative_path_to_dir = dir_path.relative_to(self._directory).as_posix() index_of = f"Index of /{html_escape(relative_path_to_dir)}" h1 = f"