From 8c283f3a7aca5c632af32788b3611f978047358d Mon Sep 17 00:00:00 2001 From: b08x Date: Wed, 27 Mar 2024 22:32:55 -0400 Subject: [PATCH] updated readmes --- README.md | 4 +- llamaindex/Dockerfile | 20 ++---- llamaindex/Gemfile | 26 ++++--- llamaindex/README.md | 67 +++++++++++++++-- llamaindex/requirements.txt | 19 +++++ minimal/README.md | 135 ++++++++++++++++++++++++++++------ nlp/Dockerfile | 8 --- nlp/Gemfile | 60 ++++++++-------- nlp/requirements.txt | 3 +- nlp/table.md | 139 ++++++++++++++++++++++++++++++++++++ 10 files changed, 387 insertions(+), 94 deletions(-) create mode 100644 llamaindex/requirements.txt create mode 100644 nlp/table.md diff --git a/README.md b/README.md index ec37b8b..2cd394e 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,9 @@ This repository is based on [rubydata/docker-stacks](https://github.com/RubyData ### Minimal Image -### Data Science Image +### LLM Image + +### LlamaStuff Image ### NLP Image diff --git a/llamaindex/Dockerfile b/llamaindex/Dockerfile index b81f3bc..dd89186 100644 --- a/llamaindex/Dockerfile +++ b/llamaindex/Dockerfile @@ -7,21 +7,15 @@ USER $NB_UID WORKDIR /home/$NB_USER -# List of txtai components to install -ARG COMPONENTS=[all] - -RUN \ - pip install --no-cache-dir -U pip wheel setuptools && \ - pip install --no-cache-dir llama-index-llms-ollama \ - llama-index-readers-obsidian llama-index-llms-langchain \ - llama-index-graph-stores-nebula llama-index-multi-modal-llms-ollama \ - llama-index-readers-file unstructured llama-index-embeddings-huggingface \ - llama-index-vector-stores-chroma && \ - pip install --no-cache-dir llama_index pyvis IPython && \ - pip install --no-cache-dir prompttools && \ - pip install --no-cache-dir txtai${COMPONENTS} && \ +COPY nlp/requirements.txt . + +RUN pip install --no-cache-dir -U pip wheel setuptools && \ + pip install -r requirements.txt && \ + python3 -m spacy download en_core_web_sm && \ + python3 -m spacy download en_core_web_lg && \ python -c "import sys, importlib.util as util; 1 if util.find_spec('nltk') else sys.exit(); import nltk; nltk.download('punkt')" + # NOTE: DO NOT CHANGE the version in the path of gem's bin directory ENV PATH $HOME/.local/share/gem/ruby/3.1.0/bin:$PATH ENV BUNDLE_PATH $HOME/.local/share/gem diff --git a/llamaindex/Gemfile b/llamaindex/Gemfile index 38ca061..d4374cb 100644 --- a/llamaindex/Gemfile +++ b/llamaindex/Gemfile @@ -62,33 +62,31 @@ gem 'tty-prompt' gem 'tty-screen' gem 'yaml' - -# Install basic gems gem 'charty', '>= 0.2.12' -gem 'matplotlib', '>= 1.2.0' -gem 'numpy', '>= 0.4.0' -gem 'pandas', '>= 0.3.8' -# gem 'red_amber', '0.4.2' -# gem 'red-arrow', '11.0.0' -# gem 'red-datasets', '>= 0.1.4' -# gem 'red-gandiva', '11.0.0' -# gem 'red-parquet', '11.0.0' -gem 'unicode_plot', '>= 0.0.5' - -# Additional gems gem 'daru' gem 'daru-view' gem 'enumerable-statistics' gem 'ffi-rzmq' +gem 'matplotlib', '>= 1.2.0' gem 'nmatrix' gem 'nmatrix-lapacke' gem 'numo-linalg' gem 'numo-narray' +gem 'numpy', '>= 0.4.0' +gem 'pandas', '>= 0.3.8' gem 'rbplotly' +gem 'rumale' +gem 'unicode_plot', '>= 0.0.5' + +# Additional gems +# gem 'red-arrow', '11.0.0' # gem 'red-arrow-numo-narray' # gem 'red-chainer' +# gem 'red-datasets', '>= 0.1.4' # gem 'red-datasets-arrow' # gem 'red-datasets-daru' # gem 'red-datasets-pandas' +# gem 'red-gandiva', '11.0.0' +# gem 'red-parquet', '11.0.0' # gem 'red-plasma' -gem 'rumale' +# gem 'red_amber', '0.4.2' \ No newline at end of file diff --git a/llamaindex/README.md b/llamaindex/README.md index 8341465..fdcdef0 100644 --- a/llamaindex/README.md +++ b/llamaindex/README.md @@ -1,16 +1,73 @@ -# RubyData Data Science +# Docker Stacks: llamaindex -```bash +```bash +--------+ +------------+ | ruby +------>|data science| +--------+ +------------+ +``` + +## Project Title: +Unchanged. + +## Description: +Unchanged. + +## Docker Image: +This Docker image offers a meticulously crafted environment tailored for machine learning, natural language processing, and data science workflows, with a robust foundation for integrating both Python and Ruby-based tools. Here's an updated breakdown: +- **Base Image:** A customized Jupyter/scipy-notebook image (Tag: f3079808ca8c or a designated version) encompassing essential foundational tools. +- **Customization:** + - **System Updates & Prerequisites:** Installation of commonly employed development tools and libraries (e.g., git, gcc, build essentials, database clients) to facilitate a robust development environment. + - **LLVM:** Inclusion of the LLVM compiler infrastructure (version 11) for enhanced compilation capabilities. + - **Python 3.10.7:** Seamless integration of Python 3.10.7, directly copied from the official python:3.10-slim image, ensuring compatibility and reliability. + - **Ruby 3.1.3:** Seamless integration of Ruby 3.1.3, directly copied from the official rubylang/ruby image, ensuring compatibility and reliability. +- **User & Permissions Setup:** Configuration ensures that the container's primary user possesses appropriate permissions, including sudo access, for efficient resource management. +- **Languages:** Python, Ruby +- **Frameworks/Libraries:** + - **Python:** + - spaCy: Natural language processing + - txtai: Real-time text and code search + - LangChain: Language model chaining + - llama-index: Large language model indexing + - IPython: Interactive Python shell + - nbconvert: Convert notebooks to various formats + - NumPy: Scientific computing + - Pandas: Data manipulation and analysis + - Matplotlib: Data visualization + - ...Include other items from requirements.txt + - **Ruby:** + - iruby: Interactive Ruby shell + - pycall: Call Python code from Ruby + - dotenv: Environment variable management + - LangChainRB: Language model chaining for Ruby + - ruby-openai: OpenAI API client + - ...Include other items from Gemfile +## Installation: +### Prerequisites: +- Docker installed and operational + +### Pull the Image: +```bash +docker pull : +``` +(Replace `` and `` with the appropriate values.) + +## Usage: +### Start the container: +```bash +docker run -it -p 8888:8888 : ``` +### Access Jupyter Notebook: +Navigate to [http://localhost:8888](http://localhost:8888) in your web browser. You'll require the token provided in the container's output to log in. -https://github.com/red-data-tools/packages.red-data-tools.org +### Example: +Provide a fundamental code example or a concise explanation illustrating a core use case in either Python or Ruby to demonstrate the image's capabilities. +## Additional Notes: +- **Key Changes:** This revised version of the image allows for seamless integration and utilization of both Python and Ruby within the Jupyter environment, expanding the possibilities for data science and machine learning workflows. +- **Ruby Integration:** Explore the Ruby gems listed in the Gemfile or incorporate your own to augment your workflow and extend functionalities. +- **Customization:** While the base image provides a solid foundation, it can be further customized to cater to project-specific dependencies and requirements, ensuring a tailored environment for your specific needs. -- Ruby stack - - pry, iruby, pycall, numpy, pandas, matplotlib, numo-narray, numo-linalg, nmatrix, nmatrix-lapacke, red-arrow, red-arrow-numo-narray, red-arrow-nmatrix, daru, rbplotly, charty +We hope this revised version of the Docker image empowers you to unlock the full potential of your data science and machine learning endeavors. diff --git a/llamaindex/requirements.txt b/llamaindex/requirements.txt new file mode 100644 index 0000000..3bfd125 --- /dev/null +++ b/llamaindex/requirements.txt @@ -0,0 +1,19 @@ +llama-index-embeddings-huggingface +llama-index-graph-stores-nebula +llama-index-llms-langchain +llama-index-llms-ollama +llama-index-multi-modal-llms-ollama +llama-index-readers-file +llama-index-readers-obsidian +llama-index-vector-stores-chroma +llama_index +pyvis +IPython +nbconvert[all] +prompttools +pydantic==1.9 +spacy_version>=3.3.0,<3.5.0 +txtai[all] +typing_extensions<4.6.0 +unstructured +llama-parse \ No newline at end of file diff --git a/minimal/README.md b/minimal/README.md index 21b95bb..0164a79 100644 --- a/minimal/README.md +++ b/minimal/README.md @@ -8,25 +8,116 @@ This docker image consists of minimal components for data science using Ruby. -tty-toolkit -langchain, ruby-openai -sequel, pg, mysql2, sqlite3 -pry - - -## Components in this image - -- Based on jupyter/scipy-notebook -- Ruby 3.1.3 - - Almost same as docker-library's ruby image -- Python 3.10.1 - - Almost same as docker-library's python image -- OpenBLAS -- CZMQ 4.0.2 - -- [tini](https://github.com/krallin/tini) as the container entrypoint -- Python stack - - numpy, pandas, matplotlib, scipy, seaborn, bokeh, plotly, holoviews, - scikit-learn, scikit-image, sympy, gensim, nltk, cython, statsmodel, - patsy, cloudpickle, dill, numba, xray, pyarrow, tensorflow, keras, - chainer, xgboost +Layered on the jupyter/scipy-notebook & 3.1.3 from rubylang/ruby Docker images + + +Here is the provided text formatted into markdown: + + +## Docker Image: +This Docker image offers a crafted environment tailored for machine learning, natural language processing, and data science workflows, with a robust foundation for integrating both Python and Ruby-based tools. Here's an updated breakdown: + +- **Base Image:** A customized Jupyter/scipy-notebook image (Tag: f3079808ca8c or a designated version) encompassing essential foundational tools. +- **Customization:** + - **System Updates & Prerequisites:** Installation of commonly employed development tools and libraries (e.g., git, gcc, build essentials, database clients) to facilitate a robust development environment. + - **LLVM:** Inclusion of the LLVM compiler infrastructure (version 11) for enhanced compilation capabilities. + - **Python 3.10.7:** Seamless integration of Python 3.10.7, directly copied from the official python:3.10-slim image, ensuring compatibility and reliability. + - **Ruby 3.1.3:** Seamless integration of Ruby 3.1.3, directly copied from the official rubylang/ruby image, ensuring compatibility and reliability. +- **User & Permissions Setup:** Configuration ensures that the container's primary user possesses appropriate permissions, including sudo access, for efficient resource management. +- **Languages:** Python, Ruby + +### Ruby Tools +- **IRB:** The interactive Ruby shell, allowing users to interactively execute Ruby code in the terminal. +- **IRuby:** An implementation of IRB that runs on JRuby, allowing users to interactively execute Ruby code in the terminal on a JVM. +- **Pry:** A gem that provides a REPL (Read-Eval-Print Loop) environment for Ruby, allowing users to interactively explore their code. + +### Data Serialization and Parsing +- **JSON:** A gem that provides support for working with JSON data in Ruby. +- **JSONL:** A gem that provides support for working with JSON lines in Ruby. +- **Psych:** A gem that provides support for parsing and generating YAML data in Ruby. +- **REXML:** A gem that provides support for working with XML data in Ruby, allowing users to parse and generate XML documents. + +### Database Connectivity and ORM +- **ActiveSupport:** A collection of utility classes and standard library extensions that were found useful for the Rails framework, including support for multibyte strings, internationalization, time zones, and testing. +- **MySQL2:** A gem that provides a database adapter for MySQL databases in Ruby. +- **PG:** A gem that provides a database adapter for PostgreSQL databases in Ruby. +- **Sequel:** An object-oriented SQL database interface for Ruby, supporting multiple databases and database adapters. +- **SQLite3:** A gem that provides a database adapter for SQLite databases in Ruby. + +### Logging and Output Formatting +- **Awesome Print:** A gem that beautifies the output of your Ruby objects in the console or IRB, making them easier to read. +- **Colorize:** A gem that adds color support to the console output of Ruby programs. +- **Logging:** A gem that provides support for logging in Ruby, allowing users to log messages to various output streams. +- **Pastel:** A gem that provides support for colorizing text in the console output of Ruby programs. + +### Testing Frameworks +- **Minitest:** A gem that provides a testing framework for Ruby, allowing users to write unit and integration tests for their applications. +- **Mocha:** A gem that provides a testing framework for Ruby, allowing users to write behavior-driven development tests for their applications. + +### Documentation and Reporting +- **Ruport:** A gem that provides support for generating reports from data in Ruby. +- **RDoc:** A gem that provides support for generating documentation for Ruby programs. +- **Kramdown:** A gem that provides a markdown parser and converter for Ruby, allowing users to convert markdown text to HTML or other formats. + +### Version Control +- **Rugged:** A gem that provides support for working with Git repositories in Ruby. + +### File and Directory Management +- **Sync:** A gem that provides support for synchronizing files and directories in Ruby. + +### Concurrency and Parallel Processing +- **EventMachine:** A gem that provides a concurrency framework for Ruby, allowing users to write concurrent applications using an event-driven model. +- **Parallel:** A gem that provides support for parallel processing in Ruby, allowing users to execute tasks concurrently. + +### Foreign Function Interface (FFI) +- **FFI:** A gem that provides a foreign function interface for Ruby, allowing users to call functions written in other languages from Ruby. + +### Shell Command Execution and Management +- **ChildProcess:** A gem that provides a cross-platform interface for spawning child processes and managing their input, output, and error streams. +- **TTY-Box:** A component of the TTY toolkit that allows running shell commands with pretty logging and capturing stdout, stderr, and exit status. +- **TTY-Command:** A component of the TTY toolkit that allows running shell commands with pretty logging and capturing stdout, stderr, and exit status. +- **TTY-Config:** A component of the TTY toolkit that allows running shell commands with pretty logging and capturing stdout, stderr, and exit status. + +### SSH Client +- **Net-SSH:** A gem that provides a client for connecting to and executing commands on remote SSH servers. + + + + +| **Functionality** | **Gem** | **Version** | **Description** | +|----------------------------------------|---------------|----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Interactive Ruby Shell | irb | >= 1.3.8.pre.9 | The interactive Ruby shell, allowing users to interactively execute Ruby code in the terminal. | +| | iruby | >= 0.7.4 | An implementation of IRB that runs on JRuby, allowing users to interactively execute Ruby code in the terminal on a JVM. | +| | pry | | A gem that provides a REPL (Read-Eval-Print Loop) environment for Ruby, allowing users to interactively explore their code. | +| | pry-doc | | A gem that provides documentation for Pry, the REPL environment for Ruby. | +| Data Serialization and Parsing | json | | A gem that provides support for working with JSON data in Ruby. | +| | jsonl | | A gem that provides support for working with JSON lines in Ruby. | +| | psych | | A gem that provides support for parsing and generating YAML data in Ruby. | +| | rexml | | A gem that provides support for working with XML data in Ruby, allowing users to parse and generate XML documents. | +| | yaml | | | +| Database Connectivity and ORM | activesupport | | A collection of utility classes and standard library extensions that were found useful for the Rails framework, including support for multibyte strings, internationalization, time zones, and testing. | +| | mysql2 | | A gem that provides a database adapter for MySQL databases in Ruby. | +| | pg | | A gem that provides a database adapter for PostgreSQL databases in Ruby. | +| | sequel | | An object-oriented SQL database interface for Ruby, supporting multiple databases and database adapters. | +| | sqlite3 | | A gem that provides a database adapter for SQLite databases in Ruby. | +| Logging and Output Formatting | awesome_print | | A gem that beautifies the output of your Ruby objects in the console or IRB, making them easier to read. | +| | colorize | | A gem that adds color support to the console output of Ruby programs. | +| | logging | | A gem that provides support for logging in Ruby, allowing users to log messages to various output streams. | +| | pastel | | A gem that provides support for colorizing text in the console output of Ruby programs. | +| Testing Frameworks | minitest | | A gem that provides a testing framework for Ruby, allowing users to write unit and integration tests for their applications. | +| | mocha | | A gem that provides a testing framework for Ruby, allowing users to write behavior-driven development tests for their applications. | +| Documentation and Reporting | ruport | | A gem that provides support for generating reports from data in Ruby. | +| | rdoc | | A gem that provides support for generating documentation for Ruby programs. | +| | kramdown | | A gem that provides a markdown parser and converter for Ruby, allowing users to convert markdown text to HTML or other formats. | +| Version Control | rugged | | A gem that provides support for working with Git repositories in Ruby. | +| File and Directory Management | sync | | A gem that provides support for synchronizing files and directories in Ruby. | +| | open3 | | A gem that provides a cross-platform interface for spawning child processes and managing their input, output, and error streams. | +| | open4 | | A gem that provides a cross-platform interface for spawning child processes and managing their input, output, and error streams, with additional features for managing pipes. | +| Concurrency and Parallel Processing | eventmachine | | A gem that provides a concurrency framework for Ruby, allowing users to write concurrent applications using an event-driven model. | +| | parallel | | A gem that provides support for parallel processing in Ruby, allowing users to execute tasks concurrently. | +| Foreign Function Interface (FFI) | ffi | | A gem that provides a foreign function interface for Ruby, allowing users to call functions written in other languages from Ruby. | +| Shell Command Execution and Management | childprocess | | A gem that provides a cross-platform interface for spawning child processes and managing their input, output, and error streams. | +| | tty-box | | A component of the TTY toolkit that allows running shell commands with pretty logging and capturing stdout, stderr, and exit status. | +| | tty-command | | A component of the TTY toolkit that allows running shell commands with pretty logging and capturing stdout, stderr, and exit status. | +| | tty-config | | A component of the TTY toolkit that allows running shell commands with pretty logging and capturing stdout, stderr, and exit status. | +| | net-ssh | | A gem that provides a client for connecting \ No newline at end of file diff --git a/nlp/Dockerfile b/nlp/Dockerfile index 0ba7dcd..f7135d3 100644 --- a/nlp/Dockerfile +++ b/nlp/Dockerfile @@ -33,14 +33,6 @@ ENV LC_ALL=C.UTF-8 ENV PATH $HOME/.local/share/gem/ruby/3.1.0/bin:$HOME/.local/bin:$PATH ENV BUNDLE_PATH $HOME/.local/share/gem -COPY nlp/requirements.txt . - -RUN pip install --no-cache-dir -U pip wheel setuptools && \ - pip install -r requirements.txt && \ - python3 -m spacy download en_core_web_sm && \ - python3 -m spacy download en_core_web_lg && \ - python -c "import sys, importlib.util as util; 1 if util.find_spec('nltk') else sys.exit(); import nltk; nltk.download('punkt')" - COPY nlp/Gemfile . #TODO: Create gemspec for ferret COPY gems/ferret-0.11.9.0.gem . diff --git a/nlp/Gemfile b/nlp/Gemfile index 18954a2..19eb02a 100644 --- a/nlp/Gemfile +++ b/nlp/Gemfile @@ -61,54 +61,27 @@ gem 'tty-screen' gem 'yajl-ruby', require: 'yajl' gem 'yaml' - -# Install basic gems gem 'charty', '>= 0.2.12' gem 'matplotlib', '>= 1.2.0' gem 'numpy', '>= 0.4.0' gem 'pandas', '>= 0.3.8' -# gem 'red_amber', '0.4.2' -# gem 'red-arrow', '11.0.0' -# gem 'red-datasets', '>= 0.1.4' -# gem 'red-gandiva', '11.0.0' -# gem 'red-parquet', '11.0.0' gem 'unicode_plot', '>= 0.0.5' -# Additional gems gem 'daru' gem 'daru-view' gem 'enumerable-statistics' gem 'ffi-rzmq' gem 'numo-linalg' gem 'numo-narray' -# gem 'red-arrow-numo-narray' -# gem 'red-chainer' -# gem 'red-datasets-arrow' -# gem 'red-datasets-daru' -# gem 'red-datasets-pandas' -# gem 'red-plasma' gem 'rumale' - -#TODO: -# source "https://rubygems.pkg.github.com/b08x" do -# gem "ferret" -# gem "nmatrix" -# gem 'nmatrix-lapacke' -# gem 'rbplotly' -# end - git_source(:github) { |repo_name| "https://github.com/#{repo_name}" } gem 'nmatrix', git: 'https://github.com/b08x/nmatrix.git', branch: 'development' gem 'nmatrix-fftw', git: 'https://github.com/b08x/nmatrix.git', branch: 'development' gem 'nmatrix-lapacke', git: 'https://github.com/b08x/nmatrix.git', branch: 'development' gem 'rbplotly', git: 'https://github.com/b08x/rbplotly.git', branch: 'development' -# gem "betty", git: "https://github.com/b08x/betty.git", branch: "main" - -# gem 'summarize' -# gem 'aoororachain' gem 'beckett' gem 'chroma-db' gem 'cli-ui' @@ -127,10 +100,8 @@ gem 'fuzzy_match' gem 'fuzzy-string-match' gem 'fuzzy_tools' gem 'gnuplot' -# gem 'google-cloud' gem 'google-cloud-ai_platform-v1' gem 'google_drive' -# gem 'google_palm_api', '>= 0.1.3' gem 'google_search_results' gem 'graphr' gem 'hexapdf' @@ -139,7 +110,7 @@ gem 'jongleur' gem 'langchainrb' gem 'lemmatizer' gem 'lingua' -# gem 'llm_memory' +gem 'llm_memory' gem 'mimemagic' gem 'networkx' gem 'nokogiri' @@ -179,3 +150,32 @@ gem 'verbal_expressions' gem 'wikipedia-client' gem 'wordnet' gem 'wordnet-defaultdb' + + + + +#TODO: +# source "https://rubygems.pkg.github.com/b08x" do +# gem "ferret" +# gem "nmatrix" +# gem 'nmatrix-lapacke' +# gem 'rbplotly' +# end + + +# gem "betty", git: "https://github.com/b08x/betty.git", branch: "main" +# gem 'aoororachain' +# gem 'google-cloud' +# gem 'google_palm_api', '>= 0.1.3' +# gem 'red-arrow', '11.0.0' +# gem 'red-arrow-numo-narray' +# gem 'red-chainer' +# gem 'red-datasets', '>= 0.1.4' +# gem 'red-datasets-arrow' +# gem 'red-datasets-daru' +# gem 'red-datasets-pandas' +# gem 'red-gandiva', '11.0.0' +# gem 'red-parquet', '11.0.0' +# gem 'red-plasma' +# gem 'red_amber', '0.4.2' +# gem 'summarize' \ No newline at end of file diff --git a/nlp/requirements.txt b/nlp/requirements.txt index 543da56..3bfd125 100644 --- a/nlp/requirements.txt +++ b/nlp/requirements.txt @@ -11,7 +11,8 @@ pyvis IPython nbconvert[all] prompttools -spacy>=3.4.0 +pydantic==1.9 +spacy_version>=3.3.0,<3.5.0 txtai[all] typing_extensions<4.6.0 unstructured diff --git a/nlp/table.md b/nlp/table.md new file mode 100644 index 0000000..2367bdb --- /dev/null +++ b/nlp/table.md @@ -0,0 +1,139 @@ +# gems grouped by function + + +| **Functionality** | **Gem** | **Description** | **URL** | | +|--------------------------------|-----------------------------|-----------------|--------------------------------------------------------------------------------|-----| +| Cloud Computing | google_drive | | [google_drive](https://github.com/gimite/google-drive-ruby) | | +| | google_cloud-ai_platform-v1 | | [google_cloud-ai_platform-v1](https://github.com/googleapis/google-cloud-ruby) | | +| **Functionality** | **Gem** | **Description** | **URL** | | +| ------------------------------ | --------------------------- | --------------- | ------------------------------------------------------------------------------ | --- | +| Encryption | openssl | | [openssl](https://github.com/ruby/openssl) | | + +| **Functionality** | **Gem** | **Description** | **URL** | | +|--------------------------------|-----------------------------|-----------------|--------------------------------------------------------------------------------|-----| +| General-Purpose Matrix Library | nmatrix | | [nmatrix](https://github.com/b08x/nmatrix.git) | | +| | nmatrix-fftw | | [nmatrix-fftw](https://github.com/b08x/nmatrix.git) | | +| | nmatrix-lapacke | | [nmatrix-lapacke](https://github.com/b08x/nmatrix.git) | | +| **Functionality** | **Gem** | **Description** | **URL** | | +| ------------------------------ | --------------------------- | --------------- | ------------------------------------------------------------------------------ | --- | +| Linear Algebra | numo-linalg | | | | + +| **Functionality** | **Gem** | **Description** | **URL** | | +|---------------------|-------------|-----------------|---------|---| +| Numerical Computing | numo-narray | | | | + + +| **Functionality** | **Gem** | **Description** | **URL** | | +|-------------------|--------------|-----------------|-----------------------------------------------------------|---| +| Machine Learning | decisiontree | | [decisiontree](https://github.com/igrigorik/decisiontree) | | +| | rumale | | [rumale](https://github.com/) | | +| | rover-df | | [rover-df](https://github.com/ankane/rover) | | +| | sad_panda | | [sad_panda](https://github.com/mattThousand/sad_panda) | | +| | | | | | +| | | | | | + +| **Functionality** | **Gem** | **Description** | **URL** | | +|----------------------|--------------|-----------------|--------------------------------------------------------------|---| +| Large Language Model | ollama-ai | | [ollama-ai](https://github.com/gbaptista/ollama-ai)
| | +| | hugging-face | | [hugging-face](https://github.com/) | | +| | cohere | | [cohere](https://github.com/andreibondarev/cohere-ruby) | | +| | cohere-ruby | | [cohere-ruby](https://github.com/andreibondarev/cohere-ruby) | | +| | ruby-openai | | [ruby-openai](https://github.com/alexrudall/ruby-openai) | | +| | | | | | +| | | | | | + + +| **Functionality** | **Gem** | **Description** | **URL** | | +|-----------------------------|--------------------|-----------------|--------------------------------------------------------------------|---| +| Natural Language Processing | engtagger | | [engtagger](https://github.com/yohasebe/engtagger) | | +| | fasttext | | [fasttext](https://github.com/ankane/fastText-ruby) | | +| | lemmatizer | | [lemmatizer](https://github.com/yohasebe/lemmatizer) | | +| | lingua | | [lingua](https://github.com/dbalatero/lingua) | | +| | ruby-spacy | | [ruby-spacy](https://github.com/yohasebe/ruby-spacy) | | +| | wordnet | | [wordnet](https://github.com) | | +| | wordnet-defaultdb | | [wordnet-defaultdb](https://github.com) | | +| | fuzzy-string-match | | [fuzzy-string-match](https://github.com/kiyoka/fuzzy-string-match) | | +| | fuzzy_tools | | [fuzzy_tools](https://github.com/brianhempel/fuzzy_tools) | | +| | jongleur | | [jongleur](https://github.com/) | | +| | langchainrb | | [langchainrb](https://github.com/) | | +| | safe_ruby | | [safe_ruby](https://github.com/) | | +| | socrates | | [socrates](https://github.com/carbonfive/socrates) | | +| | standard | | [standard](https://github.com/standardrb/standard) | | +| | fuzzy_match | | [fuzzy_match](https://github.com/seamusabshere/fuzzy_match) | | + +| **Functionality** | **Gem** | **Description** | **URL** | | +|-------------------|-----------|-----------------|-------------------------------------------------------|---| +| File Handling | open-uri | | [open-uri](https://github.com/ruby/open-uri) | | +| | poppler | | [poppler](https://github.com/) | | +| | osc-ruby | | [osc-ruby](https://github.com/aberant/osc-ruby) | | +| | mimemagic | | [mimemagic](https://github.com/mimemagicrb/mimemagic) | | + +| **Functionality** | **Gem** | **Description** | **URL** | | +|-------------------|---------------------|-----------------|-----------------------------------------------------------------------|---| +| File Parsing | docx | | [docx](https://github.com/chrahunt/docx) | | +| | pdf-reader | | [pdf-reader](https://github.com/yob/pdf-reader) | | +| | pragmatic_tokenizer | | [pragmatic_tokenizer](https://github.com/diasks2/pragmatic_tokenizer) | | +| | pdf_paradise | | [pdf_paradise](https://github.com) | | +| | pragmatic_segmenter | | [pragmatic_segmenter](https://github.com/diasks2/pragmatic_segmenter) | | +| | hexapdf | | [hexapdf](https://github.com/) | | + +| **Functionality** | **Gem** | **Description** | **URL** | | +|-------------------|--------------------|-----------------|---------------------------------------------------------------------------|---| +| Data Manipulation | llm_memory | | [llm_memory](https://github.com/shohey1226/llm_memory) | | +| | mimemagic | | [mimemagic](https://github.com/mimemagicrb/mimemagic) | | +| | roo | | [roo](https://github.com/roo-rb/roo) | | +| | rubydown | | [rubydown](https://github.com/sciruby-jp/rubydown) | | +| | scalpel | | [scalpel](https://github.com/louismullie/scalpel) | | +| | stream_lines | | [stream_lines](https://github.com/jdlubrano/stream_lines) | | +| | syntax_tree | | [syntax_tree](https://github.com/kddnewton/syntax_tree) | | +| | verbal_expressions | | [verbal_expressions](https://github.com/ryan-endacott/verbal_expressions) | | + +| **Functionality** | **Gem** | **Description** | **URL** | | +|-------------------|-----------------------|-----------------|--------------------------------------------------------------------------------|---| +| Search | google_search_results | | [google_search_results](https://github.com/serpapi/google-search-results-ruby) | | + +| **Functionality** | **Gem** | **Description** | **URL** | | +|-------------------|------------------|-----------------|------------------------------------------------------------------|---| +| Web Scraping | graphr | | [graphr](https://github.com/louismullie/graphr) | | +| | nokogiri | | [nokogiri](https://github.com/sparklemotion/nokogiri) | | +| | roo | | [roo](https://github.com/roo-rb/roo) | | +| | wikipedia-client | | [wikipedia-client](https://github.com/kenpratt/wikipedia-client) | | + +| **Functionality** | **Gem** | **Description** | **URL** | | +|--------------------|---------|-----------------|-------------------------------------|---| +| Data Serialization | oj | | [oj](https://github.com/ohler55/oj) | | + + +| **Functionality** | **Gem** | **Description** | **URL** | | +|-------------------|-------------|-----------------|-------------------------------------------------------|---| +| Data Storage | pgvector | | [pgvector](https://github.com/pgvector/pgvector-ruby) | | +| | redis | | [redis](https://github.com/redis/redis-rb) | | +| | redic | | [redic](https://github.com/amakawa/redic) | | +| | ohm | | [ohm](https://github.com/cyx/ohm-contrib) | | +| | ohm-contrib | | [ohm-contrib](https://github.com/cyx/ohm-contrib) | | + +| **Functionality** | **Gem** | **Description** | **URL** | | +|-------------------|---------|-----------------|------------------------------------------------------------|---| +| Graphics | gnuplot | | [gnuplot](https://github.com/rdp/ruby_gnuplot/tree/master) | | +| | vega | | [vega](https://github.com/ankane/vega-ruby) | | + +| **Functionality** | **Gem** | **Description** | **URL** | | +|--------------------|--------------|-----------------|--------------------------------------------------|---| +| Data Visualization | charty | | | | +| | matplotlib | | | | +| | unicode_plot | | | | +| | rbplotly | | [rbplotly](https://github.com/b08x/rbplotly.git) | | + +| **Functionality** | **Gem** | **Description** | **URL** | | +|-------------------|-----------|-----------------|--------------------------------------------------|---| +| Data Analysis | epitome | | [epitome](https://github.com/McFreely/epitome) | | +| | rdatasets | | [rdatasets](https://github.com/kojix2/rdatasets) | | +| | tomoto | | [tomoto](https://github.com/ankane/tomoto-ruby) | | +| | vega | | [vega](https://github.com/ankane/vega-ruby) | | +| | numpy | | | | +| | pandas | | | | +| | daru | | | | + +| **Functionality** | **Gem** | **Description** | **URL** | | +|-------------------|---------|-----------------|------------------------------|---| +| Testing | rspec | | [rspec](https://github.com/) | |