diff --git a/.github/README.md b/.github/README.md
index 33439350..a607bb9c 100644
--- a/.github/README.md
+++ b/.github/README.md
@@ -97,7 +97,7 @@ To stop playing press Ctrl+C in either the terminal or mpv
List all subcommands
$ library
- library (v2.8.067; 79 subcommands)
+ library (v2.8.068; 80 subcommands)
Create database subcommands:
╭───────────────┬──────────────────────────────────────────╮
@@ -177,6 +177,8 @@ To stop playing press Ctrl+C in either the terminal or mpv
│ sample-compare │ Compare files using sample-hash and other shortcuts │
├────────────────┼─────────────────────────────────────────────────────┤
│ similar-files │ Find similar files based on filename and size │
+ ├────────────────┼─────────────────────────────────────────────────────┤
+ │ llm-map │ Run LLMs across multiple files │
╰────────────────┴─────────────────────────────────────────────────────╯
Tabular data subcommands:
@@ -770,9 +772,9 @@ BTW, for some cols like time_deleted you'll need to specify a where clause so th
- Set `--fixed-pages` to _always_ fetch the desired number of pages
If the website is supported by --auto-pager data is fetched twice when using page iteration.
- As such, page iteration (--max-pages, --fixed-pages, etc) is disabled when using `--auto-pager`.
+ As such, manual page iteration (--max-pages, --fixed-pages, etc) is disabled when using `--auto-pager`.
- You can set unset --fixed-pages for all the playlists in your database by running this command:
+ You can unset --fixed-pages for all the playlists in your database by running this command:
sqlite your.db "UPDATE playlists SET extractor_config = json_replace(extractor_config, '$.fixed_pages', null)"
To use "&p=1" instead of "&page=1"
@@ -1553,6 +1555,34 @@ BTW, for some cols like time_deleted you'll need to specify a where clause so th
+
+
+###### llm-map
+
+Run LLMs across multiple files
+
+ $ library llm-map -h
+ usage: library llm-map LLAMA_FILE [paths ...] [--llama-args LLAMA_ARGS] [--prompt STR] [--text [INT]] [--rename]
+
+ Run a llamafile with a prompt including path names and file contents
+
+ Rename files based on file contents
+
+ library llm-map ./gemma2.llamafile ~/Downloads/booka.pdf --rename --text
+
+ cat llm_map_renames.csv
+ Path,Output
+ /home/xk/Downloads/booka.pdf,/home/xk/Downloads/Mining_Massive_Datasets.pdf
+
+ You can run a GGUF file with this:
+
+ wget https://github.com/Mozilla-Ocho/llamafile/releases/download/0.8.9/llamafile-0.8.9
+ chmod +x ~/Downloads/llamafile-0.8.9
+ mv ~/Downloads/llamafile-0.8.9 ~/.local/bin/llamafile # move it somewhere in your $PATH
+
+ library llm-map --model ~/Downloads/llava-v1.5-7b-Q4_K.gguf --image-model ~/Downloads/llava-v1.5-7b-mmproj-Q4_0.gguf --prompt 'what do you see?' ~/Downloads/comp_*.jpg
+
+
### Tabular data subcommands
diff --git a/xklb/__init__.py b/xklb/__init__.py
index 0ead5b34..cea18e5f 100644
--- a/xklb/__init__.py
+++ b/xklb/__init__.py
@@ -1 +1 @@
-__version__ = "2.8.067"
+__version__ = "2.8.068"
diff --git a/xklb/files/llm_map.py b/xklb/files/llm_map.py
index 7a0de85e..637c1cf7 100644
--- a/xklb/files/llm_map.py
+++ b/xklb/files/llm_map.py
@@ -10,22 +10,25 @@
def parse_args():
parser = argparse_utils.ArgumentParser(usage=usage.llm_map)
- parser.add_argument("--prompt", '-q', "--custom-prompt", help="Use a custom prompt")
+ parser.add_argument("--prompt", "-q", "--custom-prompt", help="Use a custom prompt")
parser.add_argument(
"--text", type=int, nargs="?", const=1500, help="Pass text file contents of each file to the LLM"
)
- parser.add_argument("--images", action="store_true", help="Treat paths as image files")
parser.add_argument("--rename", action="store_true", help="Use rename prompt")
parser.add_argument("--output", help="The output CSV file to save the results.")
arggroups.debug(parser)
parser.add_argument(
- '--model',
- '-m',
+ "--model",
+ "-m",
"--llamafile",
help="The path to the llamafile to run. If llamafile is in your PATH then you can also specify a GGUF file.",
)
- parser.add_argument('--image-model', '--mmproj', help="The path to the LLaVA vision GGUF model.")
+ parser.add_argument(
+ "--image-model",
+ "--mmproj",
+ help="The path to the LLaVA vision GGUF model. When specified, paths will be treated as image files",
+ )
parser.add_argument(
"--llama-args", "--custom-args", type=shlex.split, default=[], help="Use custom llamafile arguments"
)
@@ -43,14 +46,14 @@ def parse_args():
else:
raise NotImplementedError
- args.exe = which('llamafile')
+ args.exe = which("llamafile")
if args.exe:
- args.llama_args += ['-m', args.model]
+ args.llama_args += ["-m", args.model]
else:
args.exe = args.model
if args.image_model:
- args.llama_args += ['--mmproj', args.image_model]
+ args.llama_args += ["--mmproj", args.image_model]
if args.output is None:
args.output = f"llm_map_{args.prompt}.csv"
@@ -78,15 +81,15 @@ def llm_map():
prompt = args.prompt
replacements = {
- '{path}': "Existing path: " + path,
- '{abspath}': "Existing path: " + str(Path(path).absolute()),
- '{name}': "Existing filename: " + Path(path).name,
- '{stem}': "Existing filename: " + Path(path).stem,
+ "{path}": "Existing path: " + path,
+ "{abspath}": "Existing path: " + str(Path(path).absolute()),
+ "{name}": "Existing filename: " + Path(path).name,
+ "{stem}": "Existing filename: " + Path(path).stem,
}
for k, v in replacements.items():
- prompt.replace(k, '\n' + v + '\n')
+ prompt.replace(k, "\n" + v + "\n")
- if args.images:
+ if args.image_model:
args.llama_args += ["--image", str(Path(path).absolute())]
elif args.text:
file_contents = fs_add.munge_book_tags_fast(path)
diff --git a/xklb/text/extract_text.py b/xklb/text/extract_text.py
index 1f37943b..f4252dba 100644
--- a/xklb/text/extract_text.py
+++ b/xklb/text/extract_text.py
@@ -59,7 +59,7 @@ def get_text(args, url):
if not url.startswith("http") and Path(url).is_file():
text = fs_add.munge_book_tags_fast(url)
if text:
- yield text.get("tags").replace(';', '\n')
+ yield text.get("tags").replace(";", "\n")
yield None
if args.selenium:
diff --git a/xklb/usage.py b/xklb/usage.py
index 0bff63ef..e0b38b23 100644
--- a/xklb/usage.py
+++ b/xklb/usage.py
@@ -1646,4 +1646,12 @@ def play(action) -> str:
cat llm_map_renames.csv
Path,Output
/home/xk/Downloads/booka.pdf,/home/xk/Downloads/Mining_Massive_Datasets.pdf
+
+ You can run a GGUF file with this:
+
+ wget https://github.com/Mozilla-Ocho/llamafile/releases/download/0.8.9/llamafile-0.8.9
+ chmod +x ~/Downloads/llamafile-0.8.9
+ mv ~/Downloads/llamafile-0.8.9 ~/.local/bin/llamafile # move it somewhere in your $PATH
+
+ library llm-map --model ~/Downloads/llava-v1.5-7b-Q4_K.gguf --image-model ~/Downloads/llava-v1.5-7b-mmproj-Q4_0.gguf --prompt 'what do you see?' ~/Downloads/comp_*.jpg
"""