From d957d3516bb9781ae005cd9c0cfccfad18e10f5e Mon Sep 17 00:00:00 2001
From: jmpaz <30947643+jmpaz@users.noreply.github.com>
Date: Fri, 12 Apr 2024 15:51:54 -0400
Subject: [PATCH] Segment out delimiting functionality from `FileReference`
---
contextualize/reference.py | 102 ++++++++++++++++---------------------
1 file changed, 45 insertions(+), 57 deletions(-)
diff --git a/contextualize/reference.py b/contextualize/reference.py
index 4046068..d55a87b 100644
--- a/contextualize/reference.py
+++ b/contextualize/reference.py
@@ -3,61 +3,26 @@
class FileReference:
def __init__(
- self,
- path: str,
- range: tuple = None,
- format="md",
- label="relative",
- clean_contents=False,
+ self, path, range=None, format="md", label="relative", clean_contents=False
):
self.range = range
self.path = path
self.format = format
self.label = label
self.clean_contents = clean_contents
-
- # prepare the reference string
self.output = self.get_contents()
def get_contents(self):
try:
with open(self.path, "r") as file:
contents = file.read()
- self.file_content = contents
- contents = self.process(contents)
- return contents
- except UnicodeDecodeError:
- print(f"Skipping unreadable file: {self.path}")
- return ""
- except FileNotFoundError:
- print(f"File not found: {self.path}")
- return ""
except Exception as e:
- print(f"Error occurred while reading file: {self.path}")
- print(f"Error details: {str(e)}")
+ print(f"Error reading file {self.path}: {str(e)}")
return ""
- def process(self, contents):
- if self.clean_contents:
- contents = self.clean(contents)
- if self.range:
- contents = self.extract_range(contents, self.range)
- if self.format == "md":
- max_backticks = self.count_max_backticks(contents)
- contents = self.delineate(
- contents, self.format, self.get_label(), max_backticks
- )
- else:
- contents = self.delineate(contents, self.format, self.get_label())
- return contents
-
- def extract_range(self, contents, range):
- start, end = range
- lines = contents.split("\n")
- return "\n".join(lines[start - 1 : end])
-
- def clean(self, contents):
- return contents.replace(" ", "\t")
+ return process_text(
+ contents, self.clean_contents, self.range, self.format, self.get_label()
+ )
def get_label(self):
if self.label == "relative":
@@ -69,23 +34,46 @@ def get_label(self):
else:
return ""
- def count_max_backticks(self, contents):
- max_backticks = 0
- lines = contents.split("\n")
- for line in lines:
- if line.startswith("`"):
- max_backticks = max(max_backticks, len(line) - len(line.lstrip("`")))
- return max_backticks
-
- def delineate(self, contents, format, label, max_backticks=0):
- if format == "md":
- backticks_str = "`" * (max_backticks + 2) if max_backticks >= 3 else "```"
- return f"{backticks_str}{label}\n{contents}\n{backticks_str}"
- elif format == "xml":
- return f"\n{contents}\n"
- else:
- return contents
-
def concat_refs(file_references: list):
return "\n\n".join(ref.output for ref in file_references)
+
+
+def _clean(text):
+ return text.replace(" ", "\t")
+
+
+def _extract_range(text, range):
+ """Extracts lines from contents based on range tuple."""
+ start, end = range
+ lines = text.split("\n")
+ return "\n".join(lines[start - 1 : end])
+
+
+def _count_max_backticks(text):
+ max_backticks = 0
+ lines = text.split("\n")
+ for line in lines:
+ if line.startswith("`"):
+ max_backticks = max(max_backticks, len(line) - len(line.lstrip("`")))
+ return max_backticks
+
+
+def _delimit(text, format, label, max_backticks=0):
+ if format == "md":
+ backticks_str = "`" * (max_backticks + 2) if max_backticks >= 3 else "```"
+ return f"{backticks_str}{label}\n{text}\n{backticks_str}"
+ elif format == "xml":
+ return f"\n{text}\n"
+ else:
+ return text
+
+
+def process_text(text, clean=False, range=None, format="md", label=""):
+ if clean:
+ text = _clean(text)
+ if range:
+ text = _extract_range(text, range)
+ max_backticks = _count_max_backticks(text)
+ contents = _delimit(text, format, label, max_backticks)
+ return contents