-
-
Notifications
You must be signed in to change notification settings - Fork 563
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #93 from DrewThomasson/v2.0_update_readme
V2.0 update readme improved and added assets folder
- Loading branch information
Showing
68 changed files
with
5,122 additions
and
74 deletions.
There are no files selected for viewing
Binary file not shown.
File renamed without changes
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
# Use an official NVIDIA CUDA image with cudnn8 and Ubuntu 20.04 as the base | ||
FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu20.04 | ||
|
||
# Set non-interactive installation to avoid timezone and other prompts | ||
ENV DEBIAN_FRONTEND=noninteractive | ||
|
||
# Install necessary packages including Miniconda | ||
RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
wget \ | ||
git \ | ||
espeak \ | ||
espeak-ng \ | ||
ffmpeg \ | ||
tk \ | ||
mecab \ | ||
libmecab-dev \ | ||
mecab-ipadic-utf8 \ | ||
build-essential \ | ||
calibre \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
RUN ebook-convert --version | ||
|
||
# Install Miniconda | ||
RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \ | ||
bash ~/miniconda.sh -b -p /opt/conda && \ | ||
rm ~/miniconda.sh | ||
|
||
|
||
|
||
# Set PATH to include conda | ||
ENV PATH=/opt/conda/bin:$PATH | ||
|
||
# Create a conda environment with Python 3.10 | ||
RUN conda create -n ebookenv python=3.10 -y | ||
|
||
# Activate the conda environment | ||
SHELL ["conda", "run", "-n", "ebookenv", "/bin/bash", "-c"] | ||
|
||
# Install Python dependencies using conda and pip | ||
RUN conda install -n ebookenv -c conda-forge \ | ||
pydub \ | ||
nltk \ | ||
mecab-python3 \ | ||
&& pip install --no-cache-dir \ | ||
bs4 \ | ||
beautifulsoup4 \ | ||
ebooklib \ | ||
tqdm \ | ||
tts==0.21.3 \ | ||
unidic \ | ||
gradio | ||
|
||
# Download unidic | ||
RUN python -m unidic download | ||
|
||
# Set the working directory in the container | ||
WORKDIR /ebook2audiobookXTTS | ||
|
||
# Clone the ebook2audiobookXTTS repository | ||
RUN git clone https://github.com/DrewThomasson/ebook2audiobookXTTS.git . | ||
|
||
# Copy test audio file | ||
COPY default_voice.wav /ebook2audiobookXTTS/ | ||
|
||
# Run a test to set up XTTS | ||
RUN echo "import torch" > /tmp/script1.py && \ | ||
echo "from TTS.api import TTS" >> /tmp/script1.py && \ | ||
echo "device = 'cuda' if torch.cuda.is_available() else 'cpu'" >> /tmp/script1.py && \ | ||
echo "print(TTS().list_models())" >> /tmp/script1.py && \ | ||
echo "tts = TTS('tts_models/multilingual/multi-dataset/xtts_v2').to(device)" >> /tmp/script1.py && \ | ||
echo "wav = tts.tts(text='Hello world!', speaker_wav='default_voice.wav', language='en')" >> /tmp/script1.py && \ | ||
echo "tts.tts_to_file(text='Hello world!', speaker_wav='default_voice.wav', language='en', file_path='output.wav')" >> /tmp/script1.py && \ | ||
yes | python /tmp/script1.py | ||
|
||
# Remove the test audio file | ||
RUN rm -f /ebook2audiobookXTTS/output.wav | ||
|
||
# Verify that the script exists and has the correct permissions | ||
RUN ls -la /ebook2audiobookXTTS/ | ||
|
||
# Check if the script exists and log its presence | ||
RUN if [ -f /ebook2audiobookXTTS/custom_model_ebook2audiobookXTTS_with_link_gradio.py ]; then echo "Script found."; else echo "Script not found."; exit 1; fi | ||
|
||
# Modify the Python script to set share=True | ||
RUN sed -i 's/demo.launch(share=False)/demo.launch(share=True)/' /ebook2audiobookXTTS/custom_model_ebook2audiobookXTTS_with_link_gradio.py | ||
|
||
# Download the punkt package for nltk | ||
RUN python -m nltk.downloader punkt | ||
|
||
# Set the command to run your GUI application using the conda environment | ||
CMD ["conda", "run", "--no-capture-output", "-n", "ebookenv", "python", "/ebook2audiobookXTTS/custom_model_ebook2audiobookXTTS_with_link_gradio.py"] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
MIT License | ||
|
||
Copyright (c) 2024 Drew Thomasson | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
MIT License | ||
|
||
Copyright (c) 2024 Drew Thomasson | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
# this is a sample for running on kaggle and it may not be updated frequently | ||
|
||
# ebook2audiobook kaggle eddition | ||
Generates an audiobook with chapters and ebook metadata using Calibre and Xtts from Coqui tts, and with optional voice cloning, and supports multiple languages | ||
|
||
# import this notebook to kaggle | ||
https://github.com/Rihcus/ebook2audiobookXTTS/blob/main/kaggle-ebook2audiobook-demo.ipynb | ||
|
||
## Features | ||
|
||
- Converts eBooks to text format using Calibre's `ebook-convert` tool. | ||
- Splits the eBook into chapters for structured audio conversion. | ||
- Uses XTTS from Coqui TTS for high-quality text-to-speech conversion. | ||
- Optional voice cloning feature using a provided voice file. | ||
- Supports different languages for text-to-speech conversion, with English as the default. | ||
- Confirmed to run on only 4 gb ram | ||
|
||
## Requirements | ||
|
||
- Python 3.x | ||
- `coqui-tts` Python package | ||
- Calibre (for eBook conversion) | ||
- FFmpeg (for audiobook file creation) | ||
- Optional: Custom voice file for voice cloning | ||
|
||
### Installation Instructions for Dependencies | ||
|
||
Install Python 3.x from [Python.org](https://www.python.org/downloads/). | ||
|
||
Install Calibre: | ||
- Ubuntu: `sudo apt-get install -y calibre` | ||
- macOS: `brew install calibre` | ||
- Windows(Powershell in Administrator mode): `choco install calibre` | ||
|
||
Install FFmpeg: | ||
- Ubuntu: `sudo apt-get install -y ffmpeg` | ||
- macOS: `brew install ffmpeg` | ||
- Windows(Powershell in Administrator mode): `choco install ffmpeg` | ||
|
||
Install Mecab for (Non Latin-based Languages tts support)(Optional): | ||
- Ubuntu: `sudo apt-get install -y mecab libmecab-dev mecab-ipadic-utf8` | ||
- macOS: `brew install mecab`, `brew install mecab-ipadic` | ||
- Windows(Powershell in Administrator mode no support for mecab-ipadic easy install so no Japanese for windows :/): `choco install mecab ` | ||
|
||
Install Python packages: | ||
```bash | ||
pip install tts pydub nltk beautifulsoup4 ebooklib tqdm | ||
``` | ||
(For non Latin-based Languages tts support)(Optional) | ||
`python -m unidic download` | ||
```bash | ||
pip install mecab mecab-python3 unidic | ||
``` | ||
|
||
### Supported Languages | ||
|
||
The script supports the following languages for text-to-speech conversion: | ||
|
||
English (en), | ||
Spanish (es), | ||
French (fr), | ||
German (de), | ||
Italian (it), | ||
Portuguese (pt), | ||
Polish (pl), | ||
Turkish (tr), | ||
Russian (ru), | ||
Dutch (nl), | ||
Czech (cs), | ||
Arabic (ar), | ||
Chinese (zh-cn), | ||
Japanese (ja), | ||
Hungarian (hu), | ||
Korean (ko) | ||
|
||
Specify the language code when running the script to use these languages. | ||
|
||
### Usage | ||
|
||
Navigate to the script's directory in the terminal and execute one of the following commands: | ||
If you have any trouble getting it to run in Windows then it should run fine in WSL2 | ||
|
||
Basic Usage: ALL PARAMETERS ARE MANDATORY WHEN CALLED THE SCRIPT | ||
|
||
```bash | ||
python ebook2audiobook.py <path_to_ebook_file> [path_to_voice_file] [language_code] | ||
``` | ||
Replace <path_to_ebook_file> with the path to your eBook file. | ||
include <path_to_voice_file> for voice cloning. | ||
include <language_code> to specify the language | ||
|
||
|
||
## Demo | ||
|
||
|
||
|
||
https://github.com/DrewThomasson/ebook2audiobookXTTS/assets/126999465/bccd7240-f967-4d27-a87d-445034db7d21 | ||
|
||
|
||
|
||
### Supported ebook File Types: | ||
.epub, .pdf, .mobi, .txt, .html, .rtf, .chm, .lit, .pdb, .fb2, .odt, .cbr, .cbz, .prc, .lrf, .pml, .snb, .cbc, .rb, and .tcr, | ||
(Best results are from using epub or mobi for auto chapter detection) | ||
|
||
### outputs as a m4b with all book metadata and chapters, example output file in an audiobook player app | ||
![Example_of_output_in_audiobook_program](https://github.com/DrewThomasson/VoxNovel/blob/dc5197dff97252fa44c391dc0596902d71278a88/readme_files/example_in_app.jpeg) | ||
|
||
A special thanks to the creaters of: | ||
|
||
|
||
-Coqui TTS | ||
|
||
-https://github.com/coqui-ai/TTS | ||
|
||
|
||
-Calibre | ||
|
||
-https://calibre-ebook.com |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
#!/bin/bash | ||
|
||
workers=$1 | ||
|
||
# Clean up operator directory | ||
rm -rf "./Operator" | ||
rm -rf "./Chapter_wav_files" | ||
mkdir "./Operator" | ||
mkdir "./Chapter_wav_files" | ||
|
||
|
||
# Make appropriate temp directories | ||
for i in $(seq 1 $workers); do | ||
mkdir "./Operator/$i" | ||
mkdir "./Operator/$i/temp" | ||
mkdir "./Operator/$i/temp_ebook" | ||
done | ||
|
||
echo "Created $workers directories" | ||
|
||
#Divide the chapters | ||
share=1 | ||
for FILE in ./Working_files/temp_ebook/*; do | ||
cp $FILE "./Operator/$share/temp_ebook/" | ||
if [ $share -lt $workers ]; | ||
then | ||
share=$((share+1)) | ||
else | ||
share=1 | ||
fi | ||
done | ||
|
||
echo "Split chapters into operator" | ||
|
||
#Run audio generation | ||
#for i in $(seq 1 $workers); do | ||
# echo "Starting Worker $i" | ||
# python p2a_worker.py $i & | ||
#done | ||
|
||
gpu=1 | ||
for i in $(seq 1 $workers); do | ||
if [ $gpu -lt 2 ]; | ||
then | ||
echo "Starting Worker $i on GPU 1" | ||
python p2a_worker_gpu1.py $i & #Run audio generation GPU 1 T4 | ||
gpu=2 # switch to gpu 2 on next loop | ||
else | ||
echo "Starting Worker $i on GPU 2" | ||
python p2a_worker_gpu2.py $i & #Run audio generation GPU 2 T4 | ||
gpu=1 # switch to gpu 1 on next loop | ||
fi | ||
done | ||
|
||
|
||
|
||
echo "All workers started waiting for completion...." | ||
wait | ||
echo "Done!" |
Binary file not shown.
Binary file added
BIN
+405 KB
legacy/v1.0/Notebooks/Kaggel Archive Code/demo_mini_story_chapters_Drew.epub
Binary file not shown.
File renamed without changes.
1 change: 1 addition & 0 deletions
1
legacy/v1.0/Notebooks/Kaggel Archive Code/kaggle-ebook2audiobook-demo.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[],"dockerImageVersionId":30733,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"Install depdenencies","metadata":{}},{"cell_type":"code","source":"#!DEBIAN_FRONTEND=noninteractive\n!sudo apt-get update # && sudo apt-get -y upgrade\n!sudo apt-get -y install libegl1 \n!sudo apt-get -y install libopengl0\n!sudo apt-get -y install libxcb-cursor0\n!sudo -v && wget -nv -O- https://download.calibre-ebook.com/linux-installer.sh | sudo sh /dev/stdin\n!sudo apt-get install -y ffmpeg\n!pip install tts pydub nltk beautifulsoup4 ebooklib tqdm\n!pip install numpy==1.26.4","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","execution":{"iopub.status.busy":"2024-06-17T21:17:43.474429Z","iopub.execute_input":"2024-06-17T21:17:43.474679Z","iopub.status.idle":"2024-06-17T21:20:20.992799Z","shell.execute_reply.started":"2024-06-17T21:17:43.474655Z","shell.execute_reply":"2024-06-17T21:20:20.991791Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"Download modified ebook2audiobookXTTS\nhttps://github.com/Rihcus/ebook2audiobookXTTS\n\nOrigional unmodified version\nhttps://github.com/DrewThomasson/ebook2audiobookXTTS","metadata":{}},{"cell_type":"code","source":"!git clone https://github.com/Rihcus/ebook2audiobookXTTS","metadata":{"execution":{"iopub.status.busy":"2024-03-25T23:22:24.156772Z","iopub.execute_input":"2024-03-25T23:22:24.157618Z","iopub.status.idle":"2024-03-25T23:22:26.202486Z","shell.execute_reply.started":"2024-03-25T23:22:24.157577Z","shell.execute_reply":"2024-03-25T23:22:26.201179Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"(optional) Uploading your own epub book.\n\nBy default this notebook will use a sample epub book for testing/demo. \n\nIf you want to use your own book you will need to create a private kaggle data set, upload your epub to it, attach it to this notebook, and uncomment the two lines of code bellow, and update the data set path","metadata":{}},{"cell_type":"code","source":"# !cp -r /kaggle/input/<name of your attached dataset>/*.epub /kaggle/working/ebook2audiobookXTTS #copy your custom book\n# !rm /kaggle/working/ebook2audiobookXTTS/demo_mini_story_chapters_Drew.epub #remove default sample book","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"This to install xtts_v2 models","metadata":{}},{"cell_type":"code","source":"import os\nos.environ[\"COQUI_TOS_AGREED\"] = \"1\"\n\n!cd /kaggle/working/ebook2audiobookXTTS && tts --model_name tts_models/multilingual/multi-dataset/xtts_v2 --text \"test\" --speaker_wav ./4.wav --language_idx en --use_cuda true","metadata":{"execution":{"iopub.status.busy":"2024-03-25T23:23:15.626677Z","iopub.execute_input":"2024-03-25T23:23:15.627585Z","iopub.status.idle":"2024-03-25T23:27:40.712856Z","shell.execute_reply.started":"2024-03-25T23:23:15.627548Z","shell.execute_reply":"2024-03-25T23:27:40.711852Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"This is a modified version of ebook2audiobookXTTS. \n\n- p1.py only runs the first part ebook2audiobookXTTS and generates chapter txts (I commented out other parts)\n - https://github.com/Rihcus/ebook2audiobookXTTS/blob/main/p1.py\n- Worker_2T4.sh as a basic attempt at multigpu support. The 4 argument processes of ebook2audiobook will be run in parallel\n - Worker_2T4 will try to divide the chapter in even groups based on number of workers (ex 4 group 4 workers)\n - It will try to divy up the work between kaggles two T4 GPUS\n - I'm not sure how much of a difference it makes since kaggles cpu limitations\n \nhttps://github.com/Rihcus/ebook2audiobookXTTS/blob/main/Worker_2T4.sh\n\nhttps://github.com/Rihcus/ebook2audiobookXTTS/blob/main/p2a_worker_gpu1.py\n\nhttps://github.com/Rihcus/ebook2audiobookXTTS/blob/main/p2a_worker_gpu2.py","metadata":{}},{"cell_type":"code","source":"!cd /kaggle/working/ebook2audiobookXTTS && python p1.py \"$(ls ./*.epub)\" \"4.wav\" \"en\"\n!cd /kaggle/working/ebook2audiobookXTTS && bash Worker_2T4.sh 4","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"p3.py runs the final ffmpeg command. ffmpeg has been a bit buggy\nhttps://github.com/Rihcus/ebook2audiobookXTTS/blob/main/p3.py","metadata":{}},{"cell_type":"code","source":"!cd /kaggle/working/ebook2audiobookXTTS && python p3.py \"$(ls ./*.epub)\" \"4.wav\" \"en\"","metadata":{},"execution_count":null,"outputs":[]}]} |
Oops, something went wrong.