From a865417556daff9084c1465cde247b2119e31758 Mon Sep 17 00:00:00 2001
From: peter szemraj <peterszemraj@gmail.com>
Date: Sat, 2 Nov 2024 16:39:30 -0400
Subject: [PATCH 01/11] =?UTF-8?q?=F0=9F=93=9D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: peter szemraj <peterszemraj@gmail.com>
---
 CHANGELOG.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1ab576b..f85822f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. Dates are d
 
 Generated by [`auto-changelog`](https://github.com/CookPete/auto-changelog).
 
+#### [v0.2.1](https://github.com/pszemraj/textsum/compare/v0.2.0...v0.2.1)
+
+> 18 February 2024
+
+- Batch processing [`#13`](https://github.com/pszemraj/textsum/pull/13)
+
 #### [v0.2.0](https://github.com/pszemraj/textsum/compare/v0.1.5...v0.2.0)
 
 > 8 July 2023

From 58bab0d5f3dbf33767b707a9d98029a08360bf27 Mon Sep 17 00:00:00 2001
From: peter szemraj <peterszemraj@gmail.com>
Date: Sat, 2 Nov 2024 16:45:51 -0400
Subject: [PATCH 02/11] =?UTF-8?q?=E2=9C=A8=20update=20default=20model=20to?=
 =?UTF-8?q?=20ft=20pegasus-x?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: peter szemraj <peterszemraj@gmail.com>
---
 README.md                | 2 +-
 src/textsum/cli.py       | 4 ++--
 src/textsum/summarize.py | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 7899baa..50b4f80 100644
--- a/README.md
+++ b/README.md
@@ -220,7 +220,7 @@ This will start a local server that you can access in your browser & a shareable
 
 ## Models
 
-Summarization is a memory-intensive task, and the [default model is relatively small and efficient](https://huggingface.co/pszemraj/long-t5-tglobal-base-16384-book-summary) for long-form text summarization. If you want to use a bigger model, you can specify the `model_name_or_path` argument when instantiating the `Summarizer` class.
+Summarization is a memory-intensive task, and the [default model is relatively small and efficient](https://huggingface.co/BEE-spoke-data/pegasus-x-base-synthsumm_open-16k) for long-form text summarization. If you want to use a bigger model, you can specify the `model_name_or_path` argument when instantiating the `Summarizer` class.
 
 ```python
 summarizer = Summarizer(model_name_or_path='pszemraj/long-t5-tglobal-xl-16384-book-summary')
diff --git a/src/textsum/cli.py b/src/textsum/cli.py
index faa58a8..fbe730d 100644
--- a/src/textsum/cli.py
+++ b/src/textsum/cli.py
@@ -22,7 +22,7 @@
 def main(
     input_dir: str,
     output_dir: Optional[str] = None,
-    model: str = "pszemraj/long-t5-tglobal-base-16384-book-summary",
+    model: str = "BEE-spoke-data/pegasus-x-base-synthsumm_open-16k",
     no_cuda: bool = False,
     tf32: bool = False,
     force_cache: bool = False,
@@ -53,7 +53,7 @@ def main(
     Args:
         input_dir (str, required): The directory containing the input files.
         output_dir (str, optional): Directory to write the output files. If None, writes to input_dir/summarized.
-        model (str, optional): The name of the model to use for summarization. Default: "pszemraj/long-t5-tglobal-base-16384-book-summary".
+        model (str, optional): The name of the model to use for summarization. Default: "BEE-spoke-data/pegasus-x-base-synthsumm_open-16k".
         no_cuda (bool, optional): Flag to not use cuda if available. Default: False.
         tf32 (bool, optional): Enable tf32 data type for computation (requires ampere series GPU or newer). Default: False.
         force_cache (bool, optional): Force the use_cache flag to True in the Summarizer. Default: False.
diff --git a/src/textsum/summarize.py b/src/textsum/summarize.py
index a0b4211..6c8fbc6 100644
--- a/src/textsum/summarize.py
+++ b/src/textsum/summarize.py
@@ -44,7 +44,7 @@ class Summarizer:
 
     def __init__(
         self,
-        model_name_or_path: str = "pszemraj/long-t5-tglobal-base-16384-book-summary",
+        model_name_or_path: str = "BEE-spoke-data/pegasus-x-base-synthsumm_open-16k",
         use_cuda: bool = True,
         is_general_attention_model: bool = True,
         token_batch_length: int = 2048,
@@ -60,7 +60,7 @@ def __init__(
         f"""
         __init__ - initialize the Summarizer class
 
-        :param str model_name_or_path: the name or path of the model to load, defaults to "pszemraj/long-t5-tglobal-base-16384-book-summary"
+        :param str model_name_or_path: the name or path of the model to load, defaults to "BEE-spoke-data/pegasus-x-base-synthsumm_open-16k"
         :param bool use_cuda: whether to use cuda, defaults to True
         :param bool is_general_attention_model: whether the model is a general attention model, defaults to True
         :param int token_batch_length: the amount of tokens to process in a batch, defaults to 2048

From b2ea950f77bed4c24ac801b9121f6056c1f2edb9 Mon Sep 17 00:00:00 2001
From: peter szemraj <peterszemraj@gmail.com>
Date: Sat, 2 Nov 2024 16:50:53 -0400
Subject: [PATCH 03/11] =?UTF-8?q?=F0=9F=93=8C=20update=20min=20transformer?=
 =?UTF-8?q?s,=20torch=20versions?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: peter szemraj <peterszemraj@gmail.com>
---
 setup.cfg | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index e3f1b59..a82541a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -45,9 +45,9 @@ install_requires =
     fire
     natsort
     nltk
-    torch
+    torch>=2.0.0
     tqdm
-    transformers>=4.26.0
+    transformers>=4.46.0
 
 [options.packages.find]
 where = src

From 98d006313720c688752864bb3e970865bb51c39c Mon Sep 17 00:00:00 2001
From: peter szemraj <peterszemraj@gmail.com>
Date: Sat, 2 Nov 2024 16:53:43 -0400
Subject: [PATCH 04/11] =?UTF-8?q?=E2=9A=B0=EF=B8=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: peter szemraj <peterszemraj@gmail.com>
---
 src/textsum/utils.py | 53 +-------------------------------------------
 1 file changed, 1 insertion(+), 52 deletions(-)

diff --git a/src/textsum/utils.py b/src/textsum/utils.py
index a102804..e230fdf 100644
--- a/src/textsum/utils.py
+++ b/src/textsum/utils.py
@@ -1,10 +1,9 @@
 """
-    utils.py - Utility functions for the project.
+utils.py - Utility functions for the project.
 """
 
 import logging
 import re
-import subprocess
 import sys
 from datetime import datetime
 from pathlib import Path
@@ -25,56 +24,6 @@ def get_timestamp() -> str:
     return datetime.now().strftime("%Y%m%d_%H%M%S")
 
 
-def regex_gpu_name(input_text: str):
-    """backup if not a100"""
-
-    pattern = re.compile(r"(\s([A-Za-z0-9]+\s)+)(\s([A-Za-z0-9]+\s)+)", re.IGNORECASE)
-    return pattern.search(input_text).group()
-
-
-def check_GPU(verbose=False):
-    """
-    check_GPU - a function in Python that uses the subprocess module and regex to call the `nvidia-smi` command and check the available GPU. the function returns a boolean as to whether the GPU is an A100 or not
-
-    :param verbose: if true, print out which GPU was found if it is not an A100
-    """
-    # call nvidia-smi
-    nvidia_smi = subprocess.run(
-        ["nvidia-smi"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True
-    )
-    # convert to string
-    nvidia_smi = nvidia_smi.stdout.decode("utf-8")
-    search_past = "==============================="
-    # use regex to find the GPU name. search in the first newline underneath <search_past>
-    output_lines = nvidia_smi.split("\n")
-    for i, line in enumerate(output_lines):
-        if search_past in line:
-            break
-    # get the next line
-    next_line = output_lines[i + 1]
-    if verbose:
-        print(next_line)
-    # use regex to find the GPU name
-    try:
-        gpu_name = re.search(r"\w+-\w+-\w+", next_line).group()
-    except AttributeError:
-        logging.debug("Could not find GPU name with initial regex")
-        gpu_name = None
-
-    if gpu_name is None:
-        # try alternates
-        try:
-            gpu_name = regex_gpu_name(next_line)
-        except Exception as e:
-            logging.error(f"Could not find GPU name: {e}")
-            return False
-
-    if verbose:
-        print(f"GPU found: {gpu_name}")
-    # check if it is an A100
-    return bool("A100" in gpu_name)
-
-
 def validate_pytorch2(torch_version: str = None):
     torch_version = torch.__version__ if torch_version is None else torch_version
 

From 3d209a24f38768a5c516110ec00f4e5d8f5c62b9 Mon Sep 17 00:00:00 2001
From: peter szemraj <peterszemraj@gmail.com>
Date: Sat, 2 Nov 2024 17:08:19 -0400
Subject: [PATCH 05/11] =?UTF-8?q?=F0=9F=9A=9A=20rename=20fn?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: peter szemraj <peterszemraj@gmail.com>
---
 src/textsum/cli.py   | 4 ++--
 src/textsum/utils.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/textsum/cli.py b/src/textsum/cli.py
index fbe730d..d770432 100644
--- a/src/textsum/cli.py
+++ b/src/textsum/cli.py
@@ -16,7 +16,7 @@
 
 import textsum
 from textsum.summarize import Summarizer
-from textsum.utils import enable_tf32, setup_logging
+from textsum.utils import check_ampere_gpu, setup_logging
 
 
 def main(
@@ -98,7 +98,7 @@ def main(
     }
 
     if tf32:
-        enable_tf32()  # enable tf32 for computation
+        check_ampere_gpu()  # enable tf32 for computation
 
     summarizer = Summarizer(
         model_name_or_path=model,
diff --git a/src/textsum/utils.py b/src/textsum/utils.py
index e230fdf..da1fa94 100644
--- a/src/textsum/utils.py
+++ b/src/textsum/utils.py
@@ -173,7 +173,7 @@ def check_bitsandbytes_available():
     return True
 
 
-def enable_tf32():
+def check_ampere_gpu():
     """
     enable_tf32 - enables computation in tf32 precision. (requires ampere series GPU or newer)
 

From c52e4398807018e6562343a4c610a971c1cd160a Mon Sep 17 00:00:00 2001
From: peter szemraj <peterszemraj@gmail.com>
Date: Sat, 2 Nov 2024 17:13:39 -0400
Subject: [PATCH 06/11] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20clean=20up=20utils?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: peter szemraj <peterszemraj@gmail.com>
---
 src/textsum/utils.py | 55 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 41 insertions(+), 14 deletions(-)

diff --git a/src/textsum/utils.py b/src/textsum/utils.py
index da1fa94..90525f4 100644
--- a/src/textsum/utils.py
+++ b/src/textsum/utils.py
@@ -142,23 +142,28 @@ def setup_logging(loglevel, logfile=None) -> None:
 
 def postprocess_booksummary(text: str, custom_phrases: list = None) -> str:
     """
-    postprocess_booksummary - postprocess the book summary
+    Postprocess the book summary by removing specified introductory phrases if they
+    appear at the beginning of the text (case-insensitive).
 
-    :param str text: the text to postprocess
-    :param list custom_phrases: custom phrases to remove from the text, defaults to None
-    :return str: the postprocessed text
+    :param str text: The text to postprocess.
+    :param list custom_phrases: Custom phrases to remove from the text, defaults to None.
+    :return str: The postprocessed text.
     """
     REMOVAL_PHRASES = [
         "In this section, ",
         "In this lecture, ",
         "In this chapter, ",
         "In this paper, ",
-    ]  # the default phrases to remove (from booksum dataset)
+    ]
 
-    if custom_phrases is not None:
+    if custom_phrases:
         REMOVAL_PHRASES.extend(custom_phrases)
-    for pr in REMOVAL_PHRASES:
-        text = text.replace(pr, "")
+
+    for phrase in REMOVAL_PHRASES:
+        if text.lower().startswith(phrase.lower()):
+            text = text[len(phrase) :]
+            break  # Stop after the first match to preserve other phrases
+
     return text.strip()
 
 
@@ -173,11 +178,33 @@ def check_bitsandbytes_available():
     return True
 
 
-def check_ampere_gpu():
+def check_ampere_gpu() -> None:
     """
-    enable_tf32 - enables computation in tf32 precision. (requires ampere series GPU or newer)
-
-        See https://blogs.nvidia.com/blog/2020/05/14/tensorfloat-32-precision-format/ for details
+    Check if the GPU supports NVIDIA Ampere or later and enable TF32 in PyTorch if it does.
     """
-    logging.debug("Enabling TF32 computation")
-    torch.backends.cuda.matmul.allow_tf32 = True
+    # Check if CUDA is available
+    if not torch.cuda.is_available():
+        logging.info("No GPU detected, running on CPU.")
+        return
+
+    try:
+        device = torch.cuda.current_device()
+        capability = torch.cuda.get_device_capability(device)
+        major, minor = capability
+
+        # Check if Ampere or newer (compute capability >= 8.0)
+        if major >= 8:
+            torch.backends.cuda.matmul.allow_tf32 = True
+            torch.backends.cudnn.allow_tf32 = True
+            gpu_name = torch.cuda.get_device_name(device)
+            print(
+                f"{gpu_name} (compute capability {major}.{minor}) supports NVIDIA Ampere or later, enabled TF32 in PyTorch."
+            )
+        else:
+            gpu_name = torch.cuda.get_device_name(device)
+            print(
+                f"{gpu_name} (compute capability {major}.{minor}) is not NVIDIA Ampere or later."
+            )
+
+    except Exception as e:
+        logging.warning(f"Error occurred while checking GPU: {e}")

From 59f6b92053711a5ea536fccaeb3ddd056ee9adb3 Mon Sep 17 00:00:00 2001
From: peter szemraj <peterszemraj@gmail.com>
Date: Sat, 2 Nov 2024 17:22:58 -0400
Subject: [PATCH 07/11] update constants

Signed-off-by: peter szemraj <peterszemraj@gmail.com>
---
 src/textsum/summarize.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/textsum/summarize.py b/src/textsum/summarize.py
index 6c8fbc6..4078f16 100644
--- a/src/textsum/summarize.py
+++ b/src/textsum/summarize.py
@@ -47,7 +47,7 @@ def __init__(
         model_name_or_path: str = "BEE-spoke-data/pegasus-x-base-synthsumm_open-16k",
         use_cuda: bool = True,
         is_general_attention_model: bool = True,
-        token_batch_length: int = 2048,
+        token_batch_length: int = 4096,
         batch_stride: int = 16,
         max_length_ratio: float = 0.25,
         load_in_8bit: bool = False,
@@ -625,8 +625,9 @@ def __call__(self, input_data, **kwargs):
             # or
             summary = summarizer("/path/to/textfile.txt")
         """
+        MAX_FILEPATH_LENGTH = 300  # est
         if (
-            len(str(input_data)) < 1000  # assume > 1000 characters is plaintext
+            len(str(input_data)) < MAX_FILEPATH_LENGTH
             and isinstance(input_data, (str, Path))
             and Path(input_data).is_file()
         ):

From 13c6f0d38ed83625b212ee50f550a57ae8741e00 Mon Sep 17 00:00:00 2001
From: peter szemraj <peterszemraj@gmail.com>
Date: Sat, 2 Nov 2024 17:26:24 -0400
Subject: [PATCH 08/11] =?UTF-8?q?=F0=9F=93=9D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: peter szemraj <peterszemraj@gmail.com>
---
 src/textsum/summarize.py | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/src/textsum/summarize.py b/src/textsum/summarize.py
index 4078f16..0a68e29 100644
--- a/src/textsum/summarize.py
+++ b/src/textsum/summarize.py
@@ -26,7 +26,7 @@
 
 class Summarizer:
     """
-    Summarizer - utility class for summarizing long text using a pretrained model
+    Summarizer - utility class for summarizing long text using a pretrained text2text model
     """
 
     settable_inference_params = [
@@ -60,17 +60,17 @@ def __init__(
         f"""
         __init__ - initialize the Summarizer class
 
-        :param str model_name_or_path: the name or path of the model to load, defaults to "BEE-spoke-data/pegasus-x-base-synthsumm_open-16k"
-        :param bool use_cuda: whether to use cuda, defaults to True
+        :param str model_name_or_path: name or path of the model to load, defaults to "BEE-spoke-data/pegasus-x-base-synthsumm_open-16k"
+        :param bool use_cuda: whether to use cuda if available, defaults to True
         :param bool is_general_attention_model: whether the model is a general attention model, defaults to True
-        :param int token_batch_length: the amount of tokens to process in a batch, defaults to 2048
+        :param int token_batch_length: number of tokens to split the text into for batch summaries, defaults to 4096
         :param int batch_stride: the amount of tokens to stride the batch by, defaults to 16
-        :param float max_length_ratio: the ratio of the token_batch_length to use as the max_length for the model, defaults to 0.25
-        :param bool load_in_8bit: whether to load the model in 8bit precision (LLM.int8), defaults to False
-        :param bool compile_model: whether to compile the model (pytorch 2.0+ only), defaults to False
-        :param bool optimum_onnx: whether to load the model in ONNX Runtime, defaults to False
-        :param bool force_cache: whether to force the model to use cache, defaults to False
-        :param bool disable_progress_bar: whether to disable the progress bar, defaults to False
+        :param float max_length_ratio: ratio of the token_batch_length to calculate max_length (of outputs), defaults to 0.25
+        :param bool load_in_8bit: load the model in 8bit precision (LLM.int8), defaults to False
+        :param bool compile_model: compile the model (pytorch 2.0+ only), defaults to False
+        :param bool optimum_onnx: load the model in ONNX Runtime, defaults to False
+        :param bool force_cache: force the model to use cache in generation, defaults to False
+        :param bool disable_progress_bar: disable the per-document progress bar, defaults to False
         :param kwargs: additional keyword arguments to pass to the model as inference parameters, any of: {self.settable_inference_params}
         """
         self.logger = logging.getLogger(__name__)
@@ -113,6 +113,10 @@ def __init__(
                 provider=provider,
                 export=not Path(self.model_name_or_path).is_dir(),
             )  # if a directory, already exported
+            self.logger.warning(
+                "ONNXruntime support is experimental, and functionality may vary per-model. "
+                "Model outputs should be checked for accuracy"
+            )
         else:
             self.model = AutoModelForSeq2SeqLM.from_pretrained(
                 self.model_name_or_path,

From 4e5310b910dd9d7605067b538539c323f0c1ec6c Mon Sep 17 00:00:00 2001
From: peter szemraj <peterszemraj@gmail.com>
Date: Sat, 2 Nov 2024 19:07:53 -0400
Subject: [PATCH 09/11] =?UTF-8?q?=F0=9F=93=9D=20refresh?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: peter szemraj <peterszemraj@gmail.com>
---
 README.md | 77 ++++++++++++++++++++++++++++++-------------------------
 1 file changed, 42 insertions(+), 35 deletions(-)

diff --git a/README.md b/README.md
index 50b4f80..f240b07 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,3 @@
-<!-- These are examples of badges you might want to add to your README:
-     please update the URLs accordingly
-
-[![Built Status](https://api.cirrus-ci.com/github/<USER>/textsum.svg?branch=main)](https://cirrus-ci.com/github/<USER>/textsum)
-[![ReadTheDocs](https://readthedocs.org/projects/textsum/badge/?version=latest)](https://textsum.readthedocs.io/en/stable/)
-[![Coveralls](https://img.shields.io/coveralls/github/<USER>/textsum/main.svg)](https://coveralls.io/r/<USER>/textsum)
-[![PyPI-Server](https://img.shields.io/pypi/v/textsum.svg)](https://pypi.org/project/textsum/)
-[![Conda-Forge](https://img.shields.io/conda/vn/conda-forge/textsum.svg)](https://anaconda.org/conda-forge/textsum)
-[![Monthly Downloads](https://pepy.tech/badge/textsum/month)](https://pepy.tech/project/textsum)
-[![Twitter](https://img.shields.io/twitter/url/http/shields.io.svg?style=social&label=Twitter)](https://twitter.com/textsum)
--->
-
 # textsum
 
  <a href="https://colab.research.google.com/gist/pszemraj/ff8a8486dc3303199fe9c9790a606fff/textsum-summarize-text-files-example.ipynb">
@@ -23,7 +11,8 @@
 
 This package provides easy-to-use interfaces for using summarization models on text documents of arbitrary length. Currently implemented interfaces include a python API, CLI, and a shareable demo app.
 
-For details, explanations, and docs, see the [wiki](https://github.com/pszemraj/textsum/wiki)
+> [!TIP]
+> For additional details, explanations, and docs, see the [wiki](https://github.com/pszemraj/textsum/wiki)
 
 ---
 
@@ -98,15 +87,16 @@ pip install -e .[all]
 
 The package also supports a number of optional extra features, which can be installed as follows:
 
-- `8bit`: Install with `pip install -e .[8bit]`
-- `optimum`: Install with `pip install -e .[optimum]`
-- `PDF`: Install with `pip install -e .[PDF]`
-- `app`: Install with `pip install -e .[app]`
-- `unidecode`: Install with `pip install -e .[unidecode]`
+- `8bit`: Install with `pip install -e "textsum[8bit]"`
+- `optimum`: Install with `pip install -e "textsum[optimum]"`
+- `PDF`: Install with `pip install -e "textsum[PDF]"`
+- `app`: Install with `pip install -e "textsum[app]"`
+- `unidecode`: Install with `pip install -e "textsum[unidecode]"`
 
-Read below for more details on how to use these features.
+Replace `textsum` in the command with `.` if installing from source.Read below for more details on how to use these features.
 
-> _Note:_ The `unidecode` extra is a GPL-licensed dependency that is not included by default with the `clean-text` python package. While it can be used for text cleaning pre-summarization, it generally should not make a significant difference in most use cases.
+> [!TIP]
+> The `unidecode` extra is a GPL-licensed dependency not included by default with the `clean-text` python package. Installing it would improve the text cleaning pre-summarization for noisy text, but in general it should not make a significant difference in most use cases.
 
 ## Usage
 
@@ -202,7 +192,7 @@ textsum-dir --help
 
 ### Demo App
 
-For convenience, a UI demo[^1] is provided using [gradio](https://gradio.app/). To ensure you have the dependencies installed, clone the repo and run the following command:
+For convenience, a UI demo[^1] is provided using [gradio](https://gradio.app/). To ensure you have the dependencies installed, run the following command:
 
 ```bash
 pip install textsum[app]
@@ -240,24 +230,27 @@ Any [text-to-text](https://huggingface.co/models?filter=text2text) or [summariza
 
 ### Parameters
 
-Memory usage can also be reduced by adjusting the parameters for inference. This is discussed in detail in the [project wiki](https://github.com/pszemraj/textsum/wiki).
+Memory usage can also be reduced by adjusting the [parameters for inference](https://huggingface.co/docs/transformers/generation_strategies#beam-search-decoding). This is discussed in detail in the [project wiki](https://github.com/pszemraj/textsum/wiki).
 
-tl;dr for this README: use the `summarizer.set_inference_params()` and `summarizer.get_inference_params()` methods to adjust the parameters for inference from either a python `dict` or a JSON file.
+> [!IMPORTANT]
+> tl;dr for this README: use the `summarizer.set_inference_params()` and `summarizer.get_inference_params()` methods to adjust the inference parameters, passing either a python `dict` or a JSON file.
 
 Support for `GenerationConfig` as the primary method to adjust inference parameters is planned for a future release.
 
 ### 8-bit Quantization & TensorFloat32
 
-Some methods of reducing memory usage _if you have compatible hardware_ include loading the model in 8-bit precision via [LLM.int8](https://arxiv.org/abs/2208.07339) and using the `--tf32` flag to use TensorFloat32 precision. See the [transformers docs](https://huggingface.co/docs/transformers/perf_infer_gpu_one#efficient-inference-on-a-single-gpu) for more details on how this works. Using LLM.int8 requires the [bitsandbytes](https://github.com/TimDettmers/bitsandbytes) package, which can either be installed directly or via the `textsum[8bit]` extra:
+Some methods of efficient inference[^2] include loading the model in 8-bit precision via [LLM.int8](https://arxiv.org/abs/2208.07339) (_reduces memory usage_) and enabling TensorFloat32 precision  in the torch backend (_reduces latency_). See the [transformers docs](https://huggingface.co/docs/transformers/perf_infer_gpu_one#efficient-inference-on-a-single-gpu) for more details. Using LLM.int8 requires the [bitsandbytes](https://github.com/TimDettmers/bitsandbytes) package, which can either be installed directly or via the `textsum[8bit]` extra:
+
+[^2]: if you have compatible hardware. In general, ampere (RTX 30XX) and newer GPUs are recommended.
 
 ```bash
 pip install textsum[8bit]
 ```
 
-To use these options, use the `-8bit` and `--tf32` flags when using the CLI:
+To use these options, use the `--load_in_8bit` and `--tf32` flags when using the CLI:
 
 ```bash
-textsum-dir /path/to/dir -8bit --tf32
+textsum-dir /path/to/dir --load_in_8bit --tf32
 ```
 
 Or in python, using the `load_in_8bit` argument:
@@ -266,20 +259,29 @@ Or in python, using the `load_in_8bit` argument:
 summarizer = Summarizer(load_in_8bit=True)
 ```
 
-If using the python API, it's better to initiate tf32 yourself; see [here](https://huggingface.co/docs/transformers/perf_train_gpu_one#tf32) for how.
+If using the Python API, either [manually activate tf32](https://huggingface.co/docs/transformers/perf_train_gpu_one#tf32) or use the `check_ampere_gpu()` function from `textsum.utils` **before initializing the `Summarizer` class**:
+
+```python
+from textsum.utils import check_ampere_gpu
+check_ampere_gpu() # automatically enables TF32 if Ampere+ available
+summarizer = Summarizer(load_in_8bit=True)
+```
 
 ### Using Optimum ONNX Runtime
 
-> ⚠️ **Note:** This feature is experimental and might not work as expected. Use at your own risk. ⚠️🧪
+> [!CAUTION]
+> This feature is experimental and might not work as expected. Use at your own risk. ⚠️🧪
 
-ONNX Runtime is a performance-focused inference engine for ONNX models. It can be used to enhance the speed of model predictions, especially on Windows and in environments where GPU acceleration is not available. If you want to use ONNX runtime for inference, you need to set `optimum_onnx=True` when initializing the `Summarizer` class.
+ONNX Runtime is a performance-oriented inference engine for ONNX models. It can be used to increase the speed of model inference, especially on Windows and in environments where GPU acceleration is not available. If you want to use ONNX runtime for inference, you need to set `optimum_onnx=True` when initializing the `Summarizer` class.
 
-First, install with `pip install textsum[optimum]`. Then, you can use the following code to initialize the `Summarizer` class with ONNX runtime:
+First, install with `pip install textsum[optimum]`. Then initialize the `Summarizer` class with ONNX runtime:
 
 ```python
-summarizer = Summarizer(optimum_onnx=True)
+summarizer = Summarizer(model_name_or_Path="onnx-compatible-model-name", optimum_onnx=True)
 ```
 
+It will automatically convert the model if it has not been converted to ONNX yet.
+
 **Notes:**
 
 1. ONNX runtime+cuda needs an additional package. Manually install `onnxruntime-gpu` if you plan to use ONNX with GPU.
@@ -287,23 +289,28 @@ summarizer = Summarizer(optimum_onnx=True)
 
 ### Force Cache
 
-By default, the summarization model uses past computations to speed up decoding. If you want to force the model to always use cache irrespective of the model's default behavior, you can set `force_cache=True` when initializing the `Summarizer` class.
+> [!CAUTION]
+> Setting `force_cache=True` might lead to different behavior in certain models. Test the model with and without `force_cache` on **the same input text** before using it for anything important.
+
+Using the cache speeds up autoregressive generation by avoiding recomputing attention for tokens that have already been generated. If you want to force the model to always use cache irrespective of the model's default behavior[^3], you can set `force_cache=True` when initializing the `Summarizer` class.
+
+[^3]: `use_cache` can sometimes be disabled due to things like gradient accumulation training, etc., and if not re-enabled will result in slower inference times.
 
 ```python
 summarizer = Summarizer(force_cache=True)
 ```
 
-**Note:** Setting `force_cache=True` might lead to different behavior in certain models.
 
 ### Compile Model
 
-By default, the model isn't compiled for efficient inference. If you want to compile the model for faster inference times, you can set `compile_model=True` when initializing the `Summarizer` class.
+If you want to [compile the model](https://pytorch.org/tutorials/intermediate/torch_compile_tutorial.html) for faster inference times, you can set `compile_model=True` when initializing the `Summarizer` class.
 
 ```python
 summarizer = Summarizer(compile_model=True)
 ```
 
-**Note:** Compiling the model might not be supported on all platforms and requires pytorch > 2.0.0.
+> [!NOTE]
+> Compiling the model might not be supported on all platforms and requires pytorch > 2.0.0.
 
 ---
 

From 1fc7823584acd99d806b451a7fea2f8d9af89d78 Mon Sep 17 00:00:00 2001
From: peter szemraj <peterszemraj@gmail.com>
Date: Sat, 2 Nov 2024 19:15:23 -0400
Subject: [PATCH 10/11] =?UTF-8?q?=E2=9C=8F=EF=B8=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: peter szemraj <peterszemraj@gmail.com>
---
 README.md | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index f240b07..8883fb2 100644
--- a/README.md
+++ b/README.md
@@ -93,10 +93,10 @@ The package also supports a number of optional extra features, which can be inst
 - `app`: Install with `pip install -e "textsum[app]"`
 - `unidecode`: Install with `pip install -e "textsum[unidecode]"`
 
-Replace `textsum` in the command with `.` if installing from source.Read below for more details on how to use these features.
+Replace `textsum` in the command with `.` if installing from source. Read below for more details on how to use these features.
 
 > [!TIP]
-> The `unidecode` extra is a GPL-licensed dependency not included by default with the `clean-text` python package. Installing it would improve the text cleaning pre-summarization for noisy text, but in general it should not make a significant difference in most use cases.
+> The `unidecode` extra is a GPL-licensed dependency not included by default with the `clean-text` package. Installing it should improve the cleaning of noisy input text, but it should not make a significant difference in most use cases.
 
 ## Usage
 
@@ -131,13 +131,13 @@ print(f'summary saved to {out_path}')
 
 ### CLI
 
-To summarize a directory of text files, run the following command:
+To summarize a directory of text files, run the following command in your terminal:
 
 ```bash
 textsum-dir /path/to/dir
 ```
 
-A full list:
+There are many CLI flags available. A full list:
 
 
 <details>
@@ -177,14 +177,13 @@ Some useful options are:
 
 Arguments:
 
-- `input_dir`: The directory containing the input text files to be summarized.
 - `--model`: model name or path to use for summarization. (Optional)
 - `--shuffle`: Shuffle the input files before processing. (Optional)
 - `--skip_completed`: Skip already completed files in the output directory. (Optional)
 - `--batch_length`: The maximum length of each input batch. Default is 4096. (Optional)
 - `--output_dir`: The directory to write the summarized output files. Default is `./summarized/`. (Optional)
 
-For more information, run the following:
+To see all available options, run the following command:
 
 ```bash
 textsum-dir --help
@@ -198,7 +197,7 @@ For convenience, a UI demo[^1] is provided using [gradio](https://gradio.app/).
 pip install textsum[app]
 ```
 
-To run the demo, run the following command:
+To launch the demo, run:
 
 ```bash
 textsum-ui
@@ -210,7 +209,7 @@ This will start a local server that you can access in your browser & a shareable
 
 ## Models
 
-Summarization is a memory-intensive task, and the [default model is relatively small and efficient](https://huggingface.co/BEE-spoke-data/pegasus-x-base-synthsumm_open-16k) for long-form text summarization. If you want to use a bigger model, you can specify the `model_name_or_path` argument when instantiating the `Summarizer` class.
+Summarization is a memory-intensive task, and the [default model is relatively small and efficient](https://huggingface.co/BEE-spoke-data/pegasus-x-base-synthsumm_open-16k) for long-form text summarization. If you want to use a different model, you can specify the `model_name_or_path` argument when instantiating the `Summarizer` class.
 
 ```python
 summarizer = Summarizer(model_name_or_path='pszemraj/long-t5-tglobal-xl-16384-book-summary')
@@ -233,7 +232,7 @@ Any [text-to-text](https://huggingface.co/models?filter=text2text) or [summariza
 Memory usage can also be reduced by adjusting the [parameters for inference](https://huggingface.co/docs/transformers/generation_strategies#beam-search-decoding). This is discussed in detail in the [project wiki](https://github.com/pszemraj/textsum/wiki).
 
 > [!IMPORTANT]
-> tl;dr for this README: use the `summarizer.set_inference_params()` and `summarizer.get_inference_params()` methods to adjust the inference parameters, passing either a python `dict` or a JSON file.
+> tl;dr: use the `summarizer.set_inference_params()` and `summarizer.get_inference_params()` methods to adjust the inference parameters, passing either a python `dict` or a JSON file.
 
 Support for `GenerationConfig` as the primary method to adjust inference parameters is planned for a future release.
 
@@ -253,7 +252,7 @@ To use these options, use the `--load_in_8bit` and `--tf32` flags when using the
 textsum-dir /path/to/dir --load_in_8bit --tf32
 ```
 
-Or in python, using the `load_in_8bit` argument:
+Or in Python, using the `load_in_8bit` argument:
 
 ```python
 summarizer = Summarizer(load_in_8bit=True)

From 123fc492ec1e10ad64efd3e837af9f205582960a Mon Sep 17 00:00:00 2001
From: peter szemraj <peterszemraj@gmail.com>
Date: Sat, 2 Nov 2024 19:19:01 -0400
Subject: [PATCH 11/11] =?UTF-8?q?=F0=9F=8E=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: peter szemraj <peterszemraj@gmail.com>
---
 src/textsum/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/textsum/__init__.py b/src/textsum/__init__.py
index 86c24e3..4aed982 100644
--- a/src/textsum/__init__.py
+++ b/src/textsum/__init__.py
@@ -2,6 +2,7 @@
 textsum - a package for summarizing text
 
 """
+
 import sys
 
 if sys.version_info[:2] >= (3, 8):