Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add LLMBashChain to experimental #11305

Merged
merged 1 commit into from
Oct 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Chain that interprets a prompt and executes bash code to perform bash operations."""
125 changes: 125 additions & 0 deletions libs/experimental/langchain_experimental/llm_bash/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
"""Chain that interprets a prompt and executes bash operations."""
from __future__ import annotations

import logging
import warnings
from typing import Any, Dict, List, Optional

from langchain.callbacks.manager import CallbackManagerForChainRun
from langchain.chains.base import Chain
from langchain.chains.llm import LLMChain
from langchain.schema import BasePromptTemplate, OutputParserException
from langchain.schema.language_model import BaseLanguageModel

from langchain_experimental.llm_bash.bash import BashProcess
from langchain_experimental.llm_bash.prompt import PROMPT
from langchain_experimental.pydantic_v1 import Extra, Field, root_validator

logger = logging.getLogger(__name__)


class LLMBashChain(Chain):
"""Chain that interprets a prompt and executes bash operations.

Example:
.. code-block:: python

from langchain.chains import LLMBashChain
from langchain.llms import OpenAI
llm_bash = LLMBashChain.from_llm(OpenAI())
"""

llm_chain: LLMChain
llm: Optional[BaseLanguageModel] = None
"""[Deprecated] LLM wrapper to use."""
input_key: str = "question" #: :meta private:
output_key: str = "answer" #: :meta private:
prompt: BasePromptTemplate = PROMPT
"""[Deprecated]"""
bash_process: BashProcess = Field(default_factory=BashProcess) #: :meta private:

class Config:
"""Configuration for this pydantic object."""

extra = Extra.forbid
arbitrary_types_allowed = True

@root_validator(pre=True)
def raise_deprecation(cls, values: Dict) -> Dict:
if "llm" in values:
warnings.warn(
"Directly instantiating an LLMBashChain with an llm is deprecated. "
"Please instantiate with llm_chain or using the from_llm class method."
)
if "llm_chain" not in values and values["llm"] is not None:
prompt = values.get("prompt", PROMPT)
values["llm_chain"] = LLMChain(llm=values["llm"], prompt=prompt)
return values

@root_validator
def validate_prompt(cls, values: Dict) -> Dict:
if values["llm_chain"].prompt.output_parser is None:
raise ValueError(
"The prompt used by llm_chain is expected to have an output_parser."
)
return values

@property
def input_keys(self) -> List[str]:
"""Expect input key.

:meta private:
"""
return [self.input_key]

@property
def output_keys(self) -> List[str]:
"""Expect output key.

:meta private:
"""
return [self.output_key]

def _call(
self,
inputs: Dict[str, Any],
run_manager: Optional[CallbackManagerForChainRun] = None,
) -> Dict[str, str]:
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
_run_manager.on_text(inputs[self.input_key], verbose=self.verbose)

t = self.llm_chain.predict(
question=inputs[self.input_key], callbacks=_run_manager.get_child()
)
_run_manager.on_text(t, color="green", verbose=self.verbose)
t = t.strip()
try:
parser = self.llm_chain.prompt.output_parser
command_list = parser.parse(t) # type: ignore[union-attr]
except OutputParserException as e:
_run_manager.on_chain_error(e, verbose=self.verbose)
raise e

if self.verbose:
_run_manager.on_text("\nCode: ", verbose=self.verbose)
_run_manager.on_text(
str(command_list), color="yellow", verbose=self.verbose
)
output = self.bash_process.run(command_list)
_run_manager.on_text("\nAnswer: ", verbose=self.verbose)
_run_manager.on_text(output, color="yellow", verbose=self.verbose)
return {self.output_key: output}

@property
def _chain_type(self) -> str:
return "llm_bash_chain"

@classmethod
def from_llm(
cls,
llm: BaseLanguageModel,
prompt: BasePromptTemplate = PROMPT,
**kwargs: Any,
) -> LLMBashChain:
llm_chain = LLMChain(llm=llm, prompt=prompt)
return cls(llm_chain=llm_chain, **kwargs)
184 changes: 184 additions & 0 deletions libs/experimental/langchain_experimental/llm_bash/bash.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
"""Wrapper around subprocess to run commands."""
from __future__ import annotations

import platform
import re
import subprocess
from typing import TYPE_CHECKING, List, Union
from uuid import uuid4

if TYPE_CHECKING:
import pexpect


class BashProcess:
"""
Wrapper class for starting subprocesses.
Uses the python built-in subprocesses.run()
Persistent processes are **not** available
on Windows systems, as pexpect makes use of
Unix pseudoterminals (ptys). MacOS and Linux
are okay.

Example:
.. code-block:: python

from langchain.utilities.bash import BashProcess

bash = BashProcess(
strip_newlines = False,
return_err_output = False,
persistent = False
)
bash.run('echo \'hello world\'')

"""

strip_newlines: bool = False
"""Whether or not to run .strip() on the output"""
return_err_output: bool = False
"""Whether or not to return the output of a failed
command, or just the error message and stacktrace"""
persistent: bool = False
"""Whether or not to spawn a persistent session
NOTE: Unavailable for Windows environments"""

def __init__(
self,
strip_newlines: bool = False,
return_err_output: bool = False,
persistent: bool = False,
):
"""
Initializes with default settings
"""
self.strip_newlines = strip_newlines
self.return_err_output = return_err_output
self.prompt = ""
self.process = None
if persistent:
self.prompt = str(uuid4())
self.process = self._initialize_persistent_process(self, self.prompt)

@staticmethod
def _lazy_import_pexpect() -> pexpect:
"""Import pexpect only when needed."""
if platform.system() == "Windows":
raise ValueError(
"Persistent bash processes are not yet supported on Windows."
)
try:
import pexpect

except ImportError:
raise ImportError(
"pexpect required for persistent bash processes."
" To install, run `pip install pexpect`."
)
return pexpect

@staticmethod
def _initialize_persistent_process(self: BashProcess, prompt: str) -> pexpect.spawn:
# Start bash in a clean environment
# Doesn't work on windows
"""
Initializes a persistent bash setting in a
clean environment.
NOTE: Unavailable on Windows

Args:
Prompt(str): the bash command to execute
""" # noqa: E501
pexpect = self._lazy_import_pexpect()
process = pexpect.spawn(
"env", ["-i", "bash", "--norc", "--noprofile"], encoding="utf-8"
)
# Set the custom prompt
process.sendline("PS1=" + prompt)

process.expect_exact(prompt, timeout=10)
return process

def run(self, commands: Union[str, List[str]]) -> str:
"""
Run commands in either an existing persistent
subprocess or on in a new subprocess environment.

Args:
commands(List[str]): a list of commands to
execute in the session
""" # noqa: E501
if isinstance(commands, str):
commands = [commands]
commands = ";".join(commands)
if self.process is not None:
return self._run_persistent(
commands,
)
else:
return self._run(commands)

def _run(self, command: str) -> str:
"""
Runs a command in a subprocess and returns
the output.

Args:
command: The command to run
""" # noqa: E501
try:
output = subprocess.run(
command,
shell=True,
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
).stdout.decode()
except subprocess.CalledProcessError as error:
if self.return_err_output:
return error.stdout.decode()
return str(error)
if self.strip_newlines:
output = output.strip()
return output

def process_output(self, output: str, command: str) -> str:
"""
Uses regex to remove the command from the output

Args:
output: a process' output string
command: the executed command
""" # noqa: E501
pattern = re.escape(command) + r"\s*\n"
output = re.sub(pattern, "", output, count=1)
return output.strip()

def _run_persistent(self, command: str) -> str:
"""
Runs commands in a persistent environment
and returns the output.

Args:
command: the command to execute
""" # noqa: E501
pexpect = self._lazy_import_pexpect()
if self.process is None:
raise ValueError("Process not initialized")
self.process.sendline(command)

# Clear the output with an empty string
self.process.expect(self.prompt, timeout=10)
self.process.sendline("")

try:
self.process.expect([self.prompt, pexpect.EOF], timeout=10)
except pexpect.TIMEOUT:
return f"Timeout error while executing command {command}"
if self.process.after == pexpect.EOF:
return f"Exited with error status: {self.process.exitstatus}"
output = self.process.before
output = self.process_output(output, command)
if self.strip_newlines:
return output.strip()
return output
64 changes: 64 additions & 0 deletions libs/experimental/langchain_experimental/llm_bash/prompt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# flake8: noqa
from __future__ import annotations

import re
from typing import List

from langchain.prompts.prompt import PromptTemplate
from langchain.schema import BaseOutputParser, OutputParserException

_PROMPT_TEMPLATE = """If someone asks you to perform a task, your job is to come up with a series of bash commands that will perform the task. There is no need to put "#!/bin/bash" in your answer. Make sure to reason step by step, using this format:

Question: "copy the files in the directory named 'target' into a new directory at the same level as target called 'myNewDirectory'"

I need to take the following actions:
- List all files in the directory
- Create a new directory
- Copy the files from the first directory into the second directory
```bash
ls
mkdir myNewDirectory
cp -r target/* myNewDirectory
```

That is the format. Begin!

Question: {question}"""


class BashOutputParser(BaseOutputParser):
"""Parser for bash output."""

def parse(self, text: str) -> List[str]:
if "```bash" in text:
return self.get_code_blocks(text)
else:
raise OutputParserException(
f"Failed to parse bash output. Got: {text}",
)

@staticmethod
def get_code_blocks(t: str) -> List[str]:
"""Get multiple code blocks from the LLM result."""
code_blocks: List[str] = []
# Bash markdown code blocks
pattern = re.compile(r"```bash(.*?)(?:\n\s*)```", re.DOTALL)
for match in pattern.finditer(t):
matched = match.group(1).strip()
if matched:
code_blocks.extend(
[line for line in matched.split("\n") if line.strip()]
)

return code_blocks

@property
def _type(self) -> str:
return "bash"


PROMPT = PromptTemplate(
input_variables=["question"],
template=_PROMPT_TEMPLATE,
output_parser=BashOutputParser(),
)
Loading
Loading