Skip to content

Commit

Permalink
lix/rix to readability + option for mean only
Browse files Browse the repository at this point in the history
  • Loading branch information
HLasse committed Feb 26, 2020
1 parent 4ce1348 commit 39a6e00
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 10 deletions.
16 changes: 13 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,23 @@

setup(name='textdescriptives',
version='0.1',
description='A package for calculating a wide variety of features from text',
url='https://github.com/HLasse/TextDescriptives',
author='Lasse Hansen',
author_email='[email protected]',
description='A package for calculating a wide variety of features from text',
long_description=open('README.md', encoding='utf-8').read(),
long_description_content_type='text/markdown',
url='https://github.com/HLasse/TextDescriptives',
license='MIT',
packages=['textdescriptives'],
install_requires=['pandas','numpy'],
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Topic :: Text Processing",
"Topic :: NLP"
]
install_requires=['pandas','numpy', 'pyphen', 'pycountry'],
python_requires='>=3.6'
zip_safe=False,
include_package_data=True)

28 changes: 21 additions & 7 deletions textdescriptives/textdescriptives.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,19 +57,31 @@ def basic(self, measures = 'all'):
"""
basic_calc = Calculators(lang = self.lang)

valid_measures = {'avg_word_length' : basic_calc.avg_word_length, 'median_word_length' : basic_calc.median_word_length,
'std_word_length' : basic_calc.std_word_length, 'avg_sentence_length' : basic_calc.avg_sentence_length,
valid_measures = {'mean_word_length' : basic_calc.avg_word_length, 'median_word_length' : basic_calc.median_word_length,
'std_word_length' : basic_calc.std_word_length, 'mean_sentence_length' : basic_calc.avg_sentence_length,
'median_sentence_length' : basic_calc.median_sentence_length, 'std_sentence_length' : basic_calc.std_sentence_length,
'avg_syl_per_word' : basic_calc.avg_syl_per_word, 'median_syl_per_word' : basic_calc.median_syl_per_word,
'mean_syl_per_word' : basic_calc.avg_syl_per_word, 'median_syl_per_word' : basic_calc.median_syl_per_word,
'std_syl_per_word' : basic_calc.std_syl_per_word, 'type_token_ratio' : basic_calc.type_token_ratio,
'lix' : basic_calc.lix, 'rix' : basic_calc.rix, 'n_types' : basic_calc.n_types,
'n_sentences' : basic_calc.n_sentences, 'n_tokens' : basic_calc.n_tokens, 'n_chars' : basic_calc.n_chars
'n_chars' : basic_calc.n_chars, 'n_sentences' : basic_calc.n_sentences,
'n_types' : basic_calc.n_types, 'n_tokens' : basic_calc.n_tokens
}


only_mean = {'mean_word_length' : basic_calc.avg_word_length, 'mean_sentence_length' : basic_calc.avg_sentence_length,
'mean_syl_per_word' : basic_calc.avg_syl_per_word, 'type_token_ratio' : basic_calc.type_token_ratio,
'n_chars' : basic_calc.n_chars, 'n_sentences' : basic_calc.n_sentences,
'n_types' : basic_calc.n_types, 'n_tokens' : basic_calc.n_tokens
}


if measures == 'all':
for measure, func in valid_measures.items():
self.df[measure] = [func(text) for text in self.df['Text']]

elif measures == 'only_mean':
for measure, func in only_mean.items():
self.df[measure] = [func(text) for text in self.df['Text']]

elif not (set(measures).issubset(set(valid_measures.keys()))):
raise ValueError("Invalid measures provided to self.basic")

Expand All @@ -82,10 +94,12 @@ def readability(self, measures = 'all'):
Calculates readability scores
"""
read = Readability(lang = self.lang)
basic_calc = Calculators(lang = self.lang)

valid_measures = {'gunning_fog' : read.gunning_fog, 'smog' : read.smog,
'flesch_reading_ease' : read.flesch_reading_ease, 'flesch_kincaid_grade' : read.flesch_kincaid_grade,
'automated_readability_index' : read.automated_readability_index, 'coleman_liau_index' : read.coleman_liau_index,
'lix' : basic_calc.lix, 'rix' : basic_calc.rix
}

if measures == 'all':
Expand Down Expand Up @@ -164,14 +178,14 @@ def basic_stats(texts, lang = 'en', metrics = 'all'):
"""
return Textdescriptives(texts, lang, 'basic', measures = metrics).df

def readability(texts, lang = 'en', measures = 'all'):
def readability(texts, lang = 'en', metrics = 'all'):
"""
Calculates readability metrics
texts: str/list/pd.Series of strings
lang: string, two character language code
measures: string/list of strings, which measures to calculate
"""
return Textdescriptives(texts, lang, 'readability', measures = measures).df
return Textdescriptives(texts, lang, 'readability', measures = metrics).df

def etymology(texts, lang = 'en'):
"""
Expand Down

0 comments on commit 39a6e00

Please sign in to comment.