Skip to content

Commit

Permalink
Fixing Prompt Parser
Browse files Browse the repository at this point in the history
Prompt parser will get tripped up if "--" is found anywhere in the
prompt and isn't surrounded by specific deliminators. Instead let's
force arguments to be at the beginning of the prompt and collect no more
arguments after. This way users don't have to place flags inside
deliminators and we extend what we can pass to the LLMs.

More context is discussed in kharvd#52 with an example.
  • Loading branch information
stevenwalton committed Jul 31, 2024
1 parent 67491ba commit df3f02d
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 34 deletions.
2 changes: 1 addition & 1 deletion gptcli/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.2.0"
__version__ = "0.2.1"
42 changes: 9 additions & 33 deletions gptcli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,39 +114,15 @@ def response_streamer(self) -> ResponseStreamer:


def parse_args(input: str) -> Tuple[str, Dict[str, Any]]:
# Extract parts enclosed in specific delimiters (triple backticks, triple quotes, single backticks)
extracted_parts = []
delimiters = ['```', '"""', '`']

def replacer(match):
for i, delimiter in enumerate(delimiters):
part = match.group(i + 1)
if part is not None:
extracted_parts.append((part, delimiter))
break
return f"__EXTRACTED_PART_{len(extracted_parts) - 1}__"

# Construct the regex pattern dynamically from the delimiters list
pattern_fragments = [re.escape(d) + '(.*?)' + re.escape(d) for d in delimiters]
pattern = re.compile('|'.join(pattern_fragments), re.DOTALL)

input = pattern.sub(replacer, input)

# Parse the remaining string for arguments
args = {}
regex = r'--(\w+)(?:=(\S+)|\s+(\S+))?'
matches = re.findall(regex, input)

if matches:
for key, value1, value2 in matches:
value = value1 if value1 else value2 if value2 else ''
args[key] = value.strip("\"'")
input = re.sub(regex, "", input).strip()

# Add back the extracted parts, with enclosing backticks or quotes
for i, (part, delimiter) in enumerate(extracted_parts):
input = input.replace(f"__EXTRACTED_PART_{i}__", f"{delimiter}{part.strip()}{delimiter}")

# Pattern matches flags that start with -- or :, the flag name, a space or =
# deliminator, followed by an argument that begins with a letter and doesn't
# contain a space or = or a number
pattern = r"(?:--|:)(\w[^\s=]*)(?:[ =])(\w[^\s=]*|\d+[.]\d*)"
# Look for sequential flags that are space deliminated
arg_pattern = r"^(" + pattern + r"\s?)*"
args = re.findall(pattern, re.match(arg_pattern, input).group())
args = dict((k,v) for k,v in args)
input = re.sub(arg_pattern, "", input)
return input, args


Expand Down

0 comments on commit df3f02d

Please sign in to comment.