From df3f02d5644c214fe03a37ea9d022ac301ecb315 Mon Sep 17 00:00:00 2001 From: Steven Walton Date: Wed, 31 Jul 2024 01:56:25 -0700 Subject: [PATCH] Fixing Prompt Parser Prompt parser will get tripped up if "--" is found anywhere in the prompt and isn't surrounded by specific deliminators. Instead let's force arguments to be at the beginning of the prompt and collect no more arguments after. This way users don't have to place flags inside deliminators and we extend what we can pass to the LLMs. More context is discussed in #52 with an example. --- gptcli/__init__.py | 2 +- gptcli/cli.py | 42 +++++++++--------------------------------- 2 files changed, 10 insertions(+), 34 deletions(-) diff --git a/gptcli/__init__.py b/gptcli/__init__.py index d3ec452..3ced358 100644 --- a/gptcli/__init__.py +++ b/gptcli/__init__.py @@ -1 +1 @@ -__version__ = "0.2.0" +__version__ = "0.2.1" diff --git a/gptcli/cli.py b/gptcli/cli.py index 65e1040..669d5a8 100644 --- a/gptcli/cli.py +++ b/gptcli/cli.py @@ -114,39 +114,15 @@ def response_streamer(self) -> ResponseStreamer: def parse_args(input: str) -> Tuple[str, Dict[str, Any]]: - # Extract parts enclosed in specific delimiters (triple backticks, triple quotes, single backticks) - extracted_parts = [] - delimiters = ['```', '"""', '`'] - - def replacer(match): - for i, delimiter in enumerate(delimiters): - part = match.group(i + 1) - if part is not None: - extracted_parts.append((part, delimiter)) - break - return f"__EXTRACTED_PART_{len(extracted_parts) - 1}__" - - # Construct the regex pattern dynamically from the delimiters list - pattern_fragments = [re.escape(d) + '(.*?)' + re.escape(d) for d in delimiters] - pattern = re.compile('|'.join(pattern_fragments), re.DOTALL) - - input = pattern.sub(replacer, input) - - # Parse the remaining string for arguments - args = {} - regex = r'--(\w+)(?:=(\S+)|\s+(\S+))?' - matches = re.findall(regex, input) - - if matches: - for key, value1, value2 in matches: - value = value1 if value1 else value2 if value2 else '' - args[key] = value.strip("\"'") - input = re.sub(regex, "", input).strip() - - # Add back the extracted parts, with enclosing backticks or quotes - for i, (part, delimiter) in enumerate(extracted_parts): - input = input.replace(f"__EXTRACTED_PART_{i}__", f"{delimiter}{part.strip()}{delimiter}") - + # Pattern matches flags that start with -- or :, the flag name, a space or = + # deliminator, followed by an argument that begins with a letter and doesn't + # contain a space or = or a number + pattern = r"(?:--|:)(\w[^\s=]*)(?:[ =])(\w[^\s=]*|\d+[.]\d*)" + # Look for sequential flags that are space deliminated + arg_pattern = r"^(" + pattern + r"\s?)*" + args = re.findall(pattern, re.match(arg_pattern, input).group()) + args = dict((k,v) for k,v in args) + input = re.sub(arg_pattern, "", input) return input, args