diff --git a/.gitignore b/.gitignore index 922116d122..aa5edd74a9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ ### Python template # Byte-compiled / optimized / DLL files -__pycache__/ +__pycache__ *.py[cod] *$py.class diff --git a/metagpt/utils/common.py b/metagpt/utils/common.py index e9cef69a4a..cc40e37622 100644 --- a/metagpt/utils/common.py +++ b/metagpt/utils/common.py @@ -18,6 +18,7 @@ import importlib import inspect import json +import mimetypes import os import platform import re @@ -834,3 +835,21 @@ def log_and_reraise(retry_state: RetryCallState): """ ) raise retry_state.outcome.exception() + + +def get_markdown_codeblock_type(filename: str) -> str: + """Return the markdown code-block type corresponding to the file extension.""" + mime_type, _ = mimetypes.guess_type(filename) + mappings = { + "text/x-shellscript": "bash", + "text/x-c++src": "cpp", + "text/css": "css", + "text/html": "html", + "text/x-java": "java", + "application/javascript": "javascript", + "application/json": "json", + "text/x-python": "python", + "text/x-ruby": "ruby", + "application/sql": "sql", + } + return mappings.get(mime_type, "text") diff --git a/metagpt/utils/repo_to_markdown.py b/metagpt/utils/repo_to_markdown.py new file mode 100644 index 0000000000..76dfe1b829 --- /dev/null +++ b/metagpt/utils/repo_to_markdown.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +This file provides functionality to convert a local repository into a markdown representation. +""" +from __future__ import annotations + +import mimetypes +from pathlib import Path + +from gitignore_parser import parse_gitignore + +from metagpt.logs import logger +from metagpt.utils.common import aread, awrite, get_markdown_codeblock_type, list_files +from metagpt.utils.tree import tree + + +async def repo_to_markdown(repo_path: str | Path, output: str | Path = None, gitignore: str | Path = None) -> str: + """ + Convert a local repository into a markdown representation. + + This function takes a path to a local repository and generates a markdown representation of the repository structure, + including directory trees and file listings. + + Args: + repo_path (str | Path): The path to the local repository. + output (str | Path, optional): The path to save the generated markdown file. Defaults to None. + gitignore (str | Path, optional): The path to the .gitignore file. Defaults to None. + + Returns: + str: The markdown representation of the repository. + """ + repo_path = Path(repo_path) + gitignore = Path(gitignore or Path(__file__).parent / "../../.gitignore").resolve() + + markdown = await _write_dir_tree(repo_path=repo_path, gitignore=gitignore) + + gitignore_rules = parse_gitignore(full_path=str(gitignore)) + markdown += await _write_files(repo_path=repo_path, gitignore_rules=gitignore_rules) + + if output: + await awrite(filename=str(output), data=markdown, encoding="utf-8") + return markdown + + +async def _write_dir_tree(repo_path: Path, gitignore: Path) -> str: + try: + content = tree(repo_path, gitignore, run_command=True) + except Exception as e: + logger.info(f"{e}, using safe mode.") + content = tree(repo_path, gitignore, run_command=False) + + doc = f"## Directory Tree\n```text\n{content}\n```\n---\n\n" + return doc + + +async def _write_files(repo_path, gitignore_rules) -> str: + filenames = list_files(repo_path) + markdown = "" + for filename in filenames: + if gitignore_rules(str(filename)): + continue + markdown += await _write_file(filename=filename, repo_path=repo_path) + return markdown + + +async def _write_file(filename: Path, repo_path: Path) -> str: + relative_path = filename.relative_to(repo_path) + markdown = f"## {relative_path}\n" + + mime_type, _ = mimetypes.guess_type(filename.name) + if "text/" not in mime_type: + logger.info(f"Ignore content: {filename}") + markdown += "\n---\n\n" + return markdown + content = await aread(filename, encoding="utf-8") + content = content.replace("```", "\\`\\`\\`").replace("---", "\\-\\-\\-") + code_block_type = get_markdown_codeblock_type(filename.name) + markdown += f"```{code_block_type}\n{content}\n```\n---\n\n" + return markdown diff --git a/metagpt/utils/tree.py b/metagpt/utils/tree.py new file mode 100644 index 0000000000..bd79222901 --- /dev/null +++ b/metagpt/utils/tree.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2024/3/11 +@Author : mashenquan +@File : tree.py +@Desc : Implement the same functionality as the `tree` command. + Example: + >>> print_tree(".") + utils + +-- serialize.py + +-- project_repo.py + +-- tree.py + +-- mmdc_playwright.py + +-- cost_manager.py + +-- __pycache__ + | +-- __init__.cpython-39.pyc + | +-- redis.cpython-39.pyc + | +-- singleton.cpython-39.pyc + | +-- embedding.cpython-39.pyc + | +-- make_sk_kernel.cpython-39.pyc + | +-- file_repository.cpython-39.pyc + +-- file.py + +-- save_code.py + +-- common.py + +-- redis.py +""" +from __future__ import annotations + +import subprocess +from pathlib import Path +from typing import Callable, Dict, List + +from gitignore_parser import parse_gitignore + + +def tree(root: str | Path, gitignore: str | Path = None, run_command: bool = False) -> str: + """ + Recursively traverses the directory structure and prints it out in a tree-like format. + + Args: + root (str or Path): The root directory from which to start traversing. + gitignore (str or Path): The filename of gitignore file. + run_command (bool): Whether to execute `tree` command. Execute the `tree` command and return the result if True, + otherwise execute python code instead. + + Returns: + str: A string representation of the directory tree. + + Example: + >>> tree(".") + utils + +-- serialize.py + +-- project_repo.py + +-- tree.py + +-- mmdc_playwright.py + +-- __pycache__ + | +-- __init__.cpython-39.pyc + | +-- redis.cpython-39.pyc + | +-- singleton.cpython-39.pyc + +-- parse_docstring.py + + >>> tree(".", gitignore="../../.gitignore") + utils + +-- serialize.py + +-- project_repo.py + +-- tree.py + +-- mmdc_playwright.py + +-- parse_docstring.py + + >>> tree(".", gitignore="../../.gitignore", run_command=True) + utils + ├── serialize.py + ├── project_repo.py + ├── tree.py + ├── mmdc_playwright.py + └── parse_docstring.py + + + """ + root = Path(root).resolve() + if run_command: + return _execute_tree(root, gitignore) + + git_ignore_rules = parse_gitignore(gitignore) if gitignore else None + dir_ = {root.name: _list_children(root=root, git_ignore_rules=git_ignore_rules)} + v = _print_tree(dir_) + return "\n".join(v) + + +def _list_children(root: Path, git_ignore_rules: Callable) -> Dict[str, Dict]: + dir_ = {} + for i in root.iterdir(): + if git_ignore_rules and git_ignore_rules(str(i)): + continue + try: + if i.is_file(): + dir_[i.name] = {} + else: + dir_[i.name] = _list_children(root=i, git_ignore_rules=git_ignore_rules) + except (FileNotFoundError, PermissionError, OSError): + dir_[i.name] = {} + return dir_ + + +def _print_tree(dir_: Dict[str:Dict]) -> List[str]: + ret = [] + for name, children in dir_.items(): + ret.append(name) + if not children: + continue + lines = _print_tree(children) + for j, v in enumerate(lines): + if v[0] not in ["+", " ", "|"]: + ret = _add_line(ret) + row = f"+-- {v}" + else: + row = f" {v}" + ret.append(row) + return ret + + +def _add_line(rows: List[str]) -> List[str]: + for i in range(len(rows) - 1, -1, -1): + v = rows[i] + if v[0] != " ": + return rows + rows[i] = "|" + v[1:] + return rows + + +def _execute_tree(root: Path, gitignore: str | Path) -> str: + args = ["--gitfile", str(gitignore)] if gitignore else [] + try: + result = subprocess.run(["tree"] + args + [str(root)], capture_output=True, text=True, check=True) + if result.returncode != 0: + raise ValueError(f"tree exits with code {result.returncode}") + return result.stdout + except subprocess.CalledProcessError as e: + raise e diff --git a/tests/metagpt/utils/test_repo_to_markdown.py b/tests/metagpt/utils/test_repo_to_markdown.py new file mode 100644 index 0000000000..914c50dd7c --- /dev/null +++ b/tests/metagpt/utils/test_repo_to_markdown.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import uuid +from pathlib import Path + +import pytest + +from metagpt.utils.repo_to_markdown import repo_to_markdown + + +@pytest.mark.parametrize( + ["repo_path", "output"], + [(Path(__file__).parent.parent, Path(__file__).parent.parent.parent / f"workspace/unittest/{uuid.uuid4().hex}.md")], +) +@pytest.mark.asyncio +async def test_repo_to_markdown(repo_path: Path, output: Path): + markdown = await repo_to_markdown(repo_path=repo_path, output=output) + assert output.exists() + assert markdown + + output.unlink(missing_ok=True) + + +if __name__ == "__main__": + pytest.main([__file__, "-s"]) diff --git a/tests/metagpt/utils/test_tree.py b/tests/metagpt/utils/test_tree.py new file mode 100644 index 0000000000..03a2a56069 --- /dev/null +++ b/tests/metagpt/utils/test_tree.py @@ -0,0 +1,64 @@ +from pathlib import Path +from typing import List + +import pytest + +from metagpt.utils.tree import _print_tree, tree + + +@pytest.mark.parametrize( + ("root", "rules"), + [ + (str(Path(__file__).parent / "../.."), None), + (str(Path(__file__).parent / "../.."), str(Path(__file__).parent / "../../../.gitignore")), + ], +) +def test_tree(root: str, rules: str): + v = tree(root=root, gitignore=rules) + assert v + + +@pytest.mark.parametrize( + ("root", "rules"), + [ + (str(Path(__file__).parent / "../.."), None), + (str(Path(__file__).parent / "../.."), str(Path(__file__).parent / "../../../.gitignore")), + ], +) +def test_tree_command(root: str, rules: str): + v = tree(root=root, gitignore=rules, run_command=True) + assert v + + +@pytest.mark.parametrize( + ("tree", "want"), + [ + ({"a": {"b": {}, "c": {}}}, ["a", "+-- b", "+-- c"]), + ({"a": {"b": {}, "c": {"d": {}}}}, ["a", "+-- b", "+-- c", " +-- d"]), + ( + {"a": {"b": {"e": {"f": {}, "g": {}}}, "c": {"d": {}}}}, + ["a", "+-- b", "| +-- e", "| +-- f", "| +-- g", "+-- c", " +-- d"], + ), + ( + {"h": {"a": {"b": {"e": {"f": {}, "g": {}}}, "c": {"d": {}}}, "i": {}}}, + [ + "h", + "+-- a", + "| +-- b", + "| | +-- e", + "| | +-- f", + "| | +-- g", + "| +-- c", + "| +-- d", + "+-- i", + ], + ), + ], +) +def test__print_tree(tree: dict, want: List[str]): + v = _print_tree(tree) + assert v == want + + +if __name__ == "__main__": + pytest.main([__file__, "-s"])