Skip to content

Commit

Permalink
Consolidate create_file_references
Browse files Browse the repository at this point in the history
  • Loading branch information
jmpaz committed Apr 15, 2024
1 parent d957d35 commit 0b665cc
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 63 deletions.
68 changes: 11 additions & 57 deletions contextualize/cli.py
Original file line number Diff line number Diff line change
@@ -1,80 +1,34 @@
import os
from pathspec import PathSpec
from pyperclip import copy
import argparse
from contextualize.reference import FileReference, concat_refs
from contextualize.reference import create_file_references
from contextualize.external import LinearClient, InvalidTokenError
from contextualize.tokenize import call_tiktoken
from contextualize.utils import read_config


def create_file_references(paths, ignore_paths=None, format="md", label="relative"):
file_references = []
ignore_patterns = [
# ".git/",
# "venv/",
# ".venv/",
".gitignore",
"__pycache__/",
"__init__.py",
]

if ignore_paths:
for path in ignore_paths:
if os.path.isfile(path):
with open(path, "r") as file:
ignore_patterns.extend(file.read().splitlines())

for path in paths:
if os.path.isfile(path):
if not is_ignored(path, ignore_patterns):
file_references.append(FileReference(path, format=format, label=label))
elif os.path.isdir(path):
for root, dirs, files in os.walk(path):
dirs[:] = [
d
for d in dirs
if not is_ignored(os.path.join(root, d), ignore_patterns)
]
for file in files:
file_path = os.path.join(root, file)
if not is_ignored(file_path, ignore_patterns):
file_references.append(
FileReference(file_path, format=format, label=label)
)

return file_references


def is_ignored(path, gitignore_patterns):
path_spec = PathSpec.from_lines("gitwildmatch", gitignore_patterns)
return path_spec.match_file(path)


def cat_cmd(args):
file_references = create_file_references(
references = create_file_references(
args.paths, args.ignore, args.format, args.label
)
concatenated_refs = concat_refs(file_references)
)["concatenated"]

if args.output_file:
with open(args.output_file, "w") as file:
file.write(concatenated_refs)
file.write(references)
print(f"Contents written to {args.output_file}")

if args.output == "clipboard":
try:
copy(concatenated_refs)
token_count = call_tiktoken(concatenated_refs)["count"]
copy(references)
token_count = call_tiktoken(references)["count"]
print(f"Copied {token_count} tokens to clipboard.")
except Exception as e:
print(f"Error copying to clipboard: {e}")
elif not args.output_file:
print(concatenated_refs)
print(references)


def ls_cmd(args):
file_references = create_file_references(args.paths)
references = create_file_references(args.paths)["refs"]
total_tokens = 0
encoding = None

Expand All @@ -83,7 +37,7 @@ def ls_cmd(args):
"Warning: Both 'encoding' and 'model' arguments provided. Using 'encoding' only."
)

for ref in file_references:
for ref in references:
if args.encoding:
result = call_tiktoken(ref.file_content, encoding_str=args.encoding)
elif args.model:
Expand All @@ -95,7 +49,7 @@ def ls_cmd(args):

output_str = (
f"{ref.path}: {result['count']} tokens"
if len(file_references) > 1
if len(references) > 1
else f"{result['count']} tokens"
)
print(output_str)
Expand All @@ -104,7 +58,7 @@ def ls_cmd(args):
if not encoding:
encoding = result["encoding"] # set once for the first file

if len(file_references) > 1:
if len(references) > 1:
print(f"\nTotal: {total_tokens} tokens ({encoding})")


Expand Down
58 changes: 52 additions & 6 deletions contextualize/reference.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,49 @@
import os
from pathspec import PathSpec


def create_file_references(paths, ignore_paths=None, format="md", label="relative"):
"""FileReference wrapper for creating a list of file references from paths."""
file_references = []
ignore_patterns = [
".gitignore",
"__pycache__/",
"__init__.py",
]

if ignore_paths:
for path in ignore_paths:
if os.path.isfile(path):
with open(path, "r") as file:
ignore_patterns.extend(file.read().splitlines())

def is_ignored(path, gitignore_patterns):
path_spec = PathSpec.from_lines("gitwildmatch", gitignore_patterns)
return path_spec.match_file(path)

for path in paths:
if os.path.isfile(path):
if not is_ignored(path, ignore_patterns):
file_references.append(FileReference(path, format=format, label=label))
elif os.path.isdir(path):
for root, dirs, files in os.walk(path):
dirs[:] = [
d
for d in dirs
if not is_ignored(os.path.join(root, d), ignore_patterns)
]
for file in files:
file_path = os.path.join(root, file)
if not is_ignored(file_path, ignore_patterns):
file_references.append(
FileReference(file_path, format=format, label=label)
)

return {"refs": file_references, "concatenated": concat_refs(file_references)}


def concat_refs(file_references: list):
return "\n\n".join(ref.output for ref in file_references)


class FileReference:
Expand All @@ -10,18 +55,23 @@ def __init__(
self.format = format
self.label = label
self.clean_contents = clean_contents
self.file_content = ""
self.output = self.get_contents()

def get_contents(self):
try:
with open(self.path, "r") as file:
contents = file.read()
self.file_content = file.read()
except Exception as e:
print(f"Error reading file {self.path}: {str(e)}")
return ""

return process_text(
contents, self.clean_contents, self.range, self.format, self.get_label()
self.file_content,
self.clean_contents,
self.range,
self.format,
self.get_label(),
)

def get_label(self):
Expand All @@ -35,10 +85,6 @@ def get_label(self):
return ""


def concat_refs(file_references: list):
return "\n\n".join(ref.output for ref in file_references)


def _clean(text):
return text.replace(" ", "\t")

Expand Down

0 comments on commit 0b665cc

Please sign in to comment.