Skip to content

Commit

Permalink
Merge pull request #4
Browse files Browse the repository at this point in the history
version 0.0.1 release
  • Loading branch information
Sanjaypranav authored Oct 16, 2022
2 parents 842709f + 830beb7 commit 8dcfe7f
Show file tree
Hide file tree
Showing 16 changed files with 131 additions and 15 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,8 @@
src/vocably/vocably.egg-info
src/vocably/__pycache__
build
src/nandini_vocably.egg-info
dist
src/vocably.egg-info
src/__init__.py
.pytest_cache
13 changes: 7 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ install:
python3 setup.py install

config:
python3 -m pip install pylint==2.15.4 pytest==7.1.3
python3 -m spacy download en_core_web_sm
python3 -m "nltk.downloader" all

Expand All @@ -13,9 +14,6 @@ all:
make config
make list

test:
python3 -m pytest -v

delete:
python3 setup.py clean --all

Expand All @@ -26,8 +24,11 @@ uninstall:
pip3 uninstall vocably

format:
flake8 --max-line-length=120 --ignore=E305,E402,W503,BLK100
flake8 --max-line-length=120 --ignore=BLK100 --exclude=build --format="%(path)s:%(row)d:%(col)d: %(code)s %(text)s" --show-source --statistics

# Language: makefile
lint:
pylint vocably --ignore=C0114,C0115,C0116,C0117,C0118



test:
python3 pytest test/
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ $ pip install vocably
$ git clone https://github.com/Nandhini25S/Vocably.git
$ cd Vocably
$ conda env create -f environment.yml
$ export PYTHONPATH=./src
$ python3 setup.py install
```

Expand All @@ -26,6 +27,7 @@ $ conda activate vocably
to install and config

```bash
$ export PYTHONPATH=./src
$ make install
$ make configure
```
Expand Down
1 change: 1 addition & 0 deletions Scripts/config.bat
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pip3 install -U pytest==7.1.3
python3 -m spacy download en_core_web_sm
python3 -m "nltk.downloader" all
1 change: 1 addition & 0 deletions Scripts/install.bat
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
set PYTHONPATH=./src
python3 setup.py install
1 change: 1 addition & 0 deletions Scripts/pytest.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pytest test/text_preprocessor.py
6 changes: 4 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,7 @@ scikit-learn==1.1.2
transformers==4.22.1
tokenizers==0.12.1
spacy==3.4.1
flake8==5.4.0
click==7.1.2
flake8==5.0.4
click==7.1.2
rich~=12.6.0
setuptools~=65.5.0
23 changes: 23 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[metadata]
name = vocably
long_description = file: README.md
long_description_content_type = text/markdown
author = Nandhini
author_email = [email protected]

[isort]
include_trailing_comma = True
use_parentheses = True
line_length = 119
description-file = README.md

[flake8]
max-line-length = 119
exclude = ./docs,
__pycache__,
.github
*.venv/*,
.venv,
.eggs
./src/vocably/__pycache__,
src/vocably/vocably.egg-info
18 changes: 12 additions & 6 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,24 @@
import setuptools
from setuptools import setup
from pathlib import Path
from src.vocably import VERSION
from src.vocably import __version__

core_requirements = [
'numpy~=1.23.3',
'pandas~=1.5.0',
'Click~=7.1.2',
'click~=7.1.2',
'torch~=1.12.1',
'gensim~=4.2.0',
'nltk~=3.7',
'scipy==1.9.1',
'scikit-learn==1.1.2',
'transformers==4.22.1',
'rich~=12.6.0',
]

setup(
name='vocably',
version=VERSION,
version=__version__,
py_modules=['command', 'core'],
install_requires=core_requirements,
description='Vocaly is a Natural Language Framework written in Python for Language based Tasks.',
Expand All @@ -26,10 +27,15 @@
python_requires='>=3.7,<4',
author="Nandhini",
author_email="[email protected]",
url="",
url="https://github.com/Nandhini25S/Vocably",
include_package_data=True,
os_type=["linux", "Windows", "MacOS", "Unix"],
license='MIT',
package_dir={'': 'src/vocably'},
packages=setuptools.find_packages(where="src/vocably"),
package_dir={'': 'src'},
packages=setuptools.find_packages(where="src"),
entry_points={
'console_scripts': [
'vocably = vocably.cli.main:main',
],
},
)
Empty file.
55 changes: 55 additions & 0 deletions src/vocably/Preprocessing/text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import re
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
import spacy
from vocably.constants import WHITELIST


class Preprocess:
def __init__(self, remove_stopwords: bool = False,
lemmatize: bool = True,
remove_links: bool = True,
remove_punctuation: bool = True,
remove_numbers: bool = True, nltk_tokenize=False):
self.remove_stopwords = remove_stopwords
self.lemmatize = lemmatize
self.remove_links = remove_links
self.remove_punctuation = remove_punctuation
self.remove_numbers = remove_numbers
self.nltk_tokenize = nltk_tokenize

def normalise(self, text):
text = text.lower()
text = text.replace('\n', ' ')
text = text.replace('\t', ' ')
if self.remove_links:
text = re.sub(r"http(s)?(:)?(\/\/)?|(\/\/)?(www\.)?(.com)?", '', text)
text = re.sub(r'\S*\s?(http|https)\S*', '', text)
if self.remove_punctuation:
text = re.sub(r'[^\w\s]', '', text)
text = re.sub(r'\s+', ' ', text)
if self.remove_numbers:
text = re.sub(r'[^a-zA-Z]', ' ', text)
return text

def tokenize(self, text):
if self.remove_stopwords:
text = self.stopwords_remove(text)
if self.nltk_tokenize:
if self.lemmatize:
lemmatizer = WordNetLemmatizer()
return [lemmatizer.lemmatize(word, pos='v') for word in word_tokenize(text)]
stemmer = PorterStemmer()
return [stemmer.stem(word) for word in word_tokenize(text)]
if self.lemmatize:
lemmatizer = WordNetLemmatizer()
return [lemmatizer.lemmatize(word, pos='v') for word in text.split()]
stemming = PorterStemmer()
return [stemming.stem(word) for word in text.split()]

def stopwords_remove(self, text):
english = spacy.load('en_core_web_sm')
stop_words = [i for i in english.Defaults.stop_words]
white_list = WHITELIST
return ' '.join([word for word in text.split() if word not in stop_words or word in white_list])
3 changes: 2 additions & 1 deletion src/vocably/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
VERSION="0.0.1"
__version__ : str = """0.0.1"""
__author__ : str = '''Nandhini'''
Empty file added src/vocably/cli/__init__.py
Empty file.
10 changes: 10 additions & 0 deletions src/vocably/cli/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import rich
import sys


def main():
rich.print("[bold purple]Welcome to vocably![/bold purple]")


if __name__ == "__main__":
sys.exit(main())
3 changes: 3 additions & 0 deletions src/vocably/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
WHITELIST : list[str] = ['not', "n't", "isnt", "isn't", "only", "about", "wouldn't",
"shouldn't", "couldn't", "weren't", "wasn't",
"hasn't", "werent", "hasnt"]
6 changes: 6 additions & 0 deletions test/text_preprocessor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from vocably.Preprocessing.text import Preprocess

preprocess = Preprocess(remove_links=True, remove_punctuation=True, remove_stopwords=True,
remove_numbers=False, nltk_tokenize=True)
text = 'Friendship is not only about caring for each other. Being with them in hard times'
print(f"{preprocess.tokenize(preprocess.normalise(text))}")

0 comments on commit 8dcfe7f

Please sign in to comment.