diff --git a/proselint/checks/restricted/__init__.py b/proselint/checks/restricted/__init__.py new file mode 100644 index 000000000..4baa6db2b --- /dev/null +++ b/proselint/checks/restricted/__init__.py @@ -0,0 +1 @@ +"""Restricted word lists.""" diff --git a/proselint/checks/restricted/elementary.csv b/proselint/checks/restricted/elementary.csv new file mode 100644 index 000000000..e28fb67f4 --- /dev/null +++ b/proselint/checks/restricted/elementary.csv @@ -0,0 +1,850 @@ +a +all +am +and +at +ball +be +bed +big +book +box +boy +but +came +can +car +cat +come +cow +dad +day +did +do +dog +fat +for +fun +get +go +good +got +had +hat +he +hen +here +him +his +home +hot +I +if +in +into +is +it +its +let +like +look +man +may +me +mom +my +no +not +of +oh +old +on +one +out +pan +pet +pig +play +ran +rat +red +ride +run +sat +see +she +sit +six +so +stop +sun +ten +the +this +to +top +toy +two +up +us +was +we +will +yes +you +about +add +after +ago +an +any +apple +are +as +ask +ate +away +baby +back +bad +bag +base +bat +bee +been +before +being +best +bike +bill +bird +black +blue +boat +both +bring +brother +brown +bus +buy +by +cake +call +candy +change +child +city +clean +club +coat +cold +coming +corn +could +cry +cup +cut +daddy +dear +deep +deer +doing +doll +door +down +dress +drive +drop +dry +duck +each +eat +eating +egg +end +fall +far +farm +fast +father +feed +feel +feet +fell +find +fine +fire +first +fish +five +fix +flag +floor +fly +food +foot +four +fox +from +full +funny +game +gas +gave +girl +give +glad +goat +goes +going +gold +gone +grade +grass +green +grow +hand +happy +hard +has +have +hear +help +here +hill +hit +hold +hole +hop +hope +horse +house +how +ice +inch +inside +job +jump +just +keep +king +know +lake +land +last +late +lay +left +leg +light +line +little +live +lives +long +looking +lost +lot +love +mad +made +make +many +meat +men +met +mile +milk +mine +miss +moon +more +most +mother +move +much +must +myself +nail +name +need +new +next +nice +night +nine +north +now +nut +off +only +open +or +other +our +outside +over +page +park +part +pay +pick +plant +playing +pony +post +pull +put +rabbit +rain +read +rest +riding +road +rock +room +said +same +sang +saw +say +school +sea +seat +seem +seen +send +set +seven +sheep +ship +shoe +show +sick +side +sing +sky +sleep +small +snow +some +soon +spell +start +stay +still +store +story +take +talk +tall +teach +tell +than +thank +that +them +then +there +they +thing +think +three +time +today +told +too +took +train +tree +truck +try +use +very +walk +want +warm +wash +way +week +well +went +were +wet +what +when +while +white +who +why +wind +wish +with +woke +wood +work +yellow +yet +your +zoo +able +above +afraid +afternoon +again +age +air +airplane +almost +alone +along +already +also +always +animal +another +anything +around +art +aunt +balloon +bark +barn +basket +beach +bear +because +become +began +begin +behind +believe +below +belt +better +birthday +body +bones +born +bought +bread +bright +broke +brought +busy +cabin +cage +camp +can't +care +carry +catch +cattle +cave +children +class +close +cloth +coal +color +corner +cotton +cover +dark +desert +didn't +dinner +dishes +does +done +don't +dragon +draw +dream +drink +early +earth +east +eight +even +ever +every +everyone +everything +eyes +face +family +feeling +felt +few +fight +fishing +flower +flying +follow +forest +forgot +form +found +fourth +free +Friday +friend +front +getting +given +grandmother +great +grew +ground +guess +hair +half +having +head +heard +he's +heat +hello +high +himself +hour +hundred +hurry +hurt +I'd +I'll +I'm +inches +isn't +it's +I've +kept +kids +kind +kitten +knew +knife +lady +large +largest +later +learn +leave +let's +letter +life +list +living +lovely +loving +lunch +mail +making +maybe +mean +merry +might +mind +money +month +morning +mouse +mouth +Mr. +Mrs. +Ms. +music +near +nearly +never +news +noise +nothing +number +o'clock +often +oil +once +orange +order +own +pair +paint +paper +party +pass +past +penny +people +person +picture +place +plan +plane +please +pocket +point +poor +race +reach +reading +ready +real +rich +right +river +rocket +rode +round +rule +running +salt +says +sending +sent +seventh +sew +shall +short +shot +should +sight +sister +sitting +sixth +sled +smoke +soap +someone +something +sometime +song +sorry +sound +south +space +spelling +spent +sport +spring +stairs +stand +state +step +stick +stood +stopped +stove +street +strong +study +such +sugar +summer +Sunday +supper +table +taken +taking +talking +teacher +team +teeth +tenth +that's +their +these +thinking +third +those +thought +throw +tonight +trade +trick +trip +trying +turn +twelve +twenty +uncle +under +upon +wagon +wait +walking +wasn't +watch +water +weather +we're +west +wheat +where +which +wife +wild +win +window +winter +without +woman +won +won't +wool +word +working +world +would +write +wrong +yard +year +yesterday +you're +across +against +answer +awhile +between +board +bottom +breakfast +broken +build +building +built +captain +carried +caught +charge +chicken +circus +cities +clothes +company +couldn't +country +discover +doctor +doesn't +dollar +during +eighth +else +enjoy +enough +everybody +example +except +excuse +field +fifth +finish +following +good-by +group +happened +harden +haven't +heavy +held +hospital +idea +instead +known +laugh +middle +minute +mountain +ninth +ocean +office +parent +peanut +pencil +picnic +police +pretty +prize +quite +radio +raise +really +reason +remember +return +Saturday +scare +second +since +slowly +stories +student +sudden +suit +sure +swimming +though +threw +tired +together +tomorrow +toward +tried +trouble +truly +turtle +until +village +visit +wear +we'll +whole +whose +women +wouldn't +writing +written +wrote +yell +young +although +America +among +arrive +attention +beautiful +countries +course +cousin +decide +different +evening +favorite +finally +future +happiest +happiness +important +interest +piece +planet +present +president +principal +probably +problem +receive +sentence +several +special +suddenly +suppose +surely +surprise +they're +through +usually + diff --git a/proselint/checks/restricted/elementary.py b/proselint/checks/restricted/elementary.py new file mode 100644 index 000000000..d1069151d --- /dev/null +++ b/proselint/checks/restricted/elementary.py @@ -0,0 +1,34 @@ +"""Check if the text contains only words that elementary kids would know. + +--- +layout: Website +source: The Basic Spelling Vocabulary List +source_url: https://tinyurl.com/5n6nczv2 +title: elementary +date: 2023-04-20 11:53:00 +categories: writing +--- + +Elementary + +""" +try: + from importlib.resources import files +except ImportError: + from importlib_resources import files + +import proselint +from proselint.tools import memoize, reverse_existence_check + +_CSV_PATH = 'checks/restricted/elementary.csv' +with files(proselint).joinpath(_CSV_PATH).open('r') as data: + ELEMENTARY_WORDS = data.read().split() + + +@memoize +def check(text): + """Check the text.""" + err = "restricted.elementary" + msg = "'{}' is not a word kids learn in elementary school." + + return reverse_existence_check(text, ELEMENTARY_WORDS, err, msg) diff --git a/proselint/checks/restricted/top1000.csv b/proselint/checks/restricted/top1000.csv new file mode 100644 index 000000000..5d903d9f6 --- /dev/null +++ b/proselint/checks/restricted/top1000.csv @@ -0,0 +1,1001 @@ +a +able +about +above +accept +across +act +actually +add +admit +afraid +after +afternoon +again +against +age +ago +agree +ah +ahead +air +all +allow +almost +alone +along +already +alright +also +although +always +am +amaze +an +and +anger +angry +animal +annoy +another +answer +any +anymore +anyone +anything +anyway +apartment +apparently +appear +approach +are +area +aren't +arm +around +arrive +as +ask +asleep +ass +at +attack +attempt +attention +aunt +avoid +away +baby +back +bad +bag +ball +band +bar +barely +bathroom +be +beat +beautiful +became +because +become +bed +bedroom +been +before +began +begin +behind +believe +bell +beside +besides +best +better +between +big +bit +bite +black +blink +block +blonde +blood +blue +blush +body +book +bore +both +bother +bottle +bottom +box +boy +boyfriend +brain +break +breakfast +breath +breathe +bright +bring +broke +broken +brother +brought +brown +brush +build +burn +burst +bus +business +busy +but +buy +by +call +calm +came +can +can't +car +card +care +carefully +carry +case +cat +catch +caught +cause +cell +chair +chance +change +chase +check +cheek +chest +child +children +chuckle +city +class +clean +clear +climb +close +clothes +coffee +cold +college +color +come +comment +complete +completely +computer +concern +confuse +consider +continue +control +conversation +cool +corner +couch +could +couldn't +counter +couple +course +cover +crack +crazy +cross +crowd +cry +cup +cut +cute +dad +damn +dance +dark +date +daughter +day +dead +deal +dear +death +decide +deep +definitely +desk +did +didn't +die +different +dinner +direction +disappear +do +doctor +does +doesn't +dog +don't +done +door +doubt +down +drag +draw +dream +dress +drink +drive +drop +drove +dry +during +each +ear +early +easily +easy +eat +edge +either +else +empty +end +enjoy +enough +enter +entire +escape +especially +even +evening +eventually +ever +every +everyone +everything +exactly +except +excite +exclaim +excuse +expect +explain +expression +eye +eyebrow +face +fact +fall +family +far +fast +father +fault +favorite +fear +feel +feet +fell +felt +few +field +fight +figure +fill +finally +find +fine +finger +finish +fire +first +fit +five +fix +flash +flip +floor +fly +focus +follow +food +foot +for +force +forget +form +forward +found +four +free +friend +from +front +frown +fuck +full +fun +funny +further +game +gasp +gave +gaze +gently +get +giggle +girl +girlfriend +give +given +glad +glance +glare +glass +go +God +gone +gonna +good +got +gotten +grab +great +green +greet +grey +grin +grip +groan +ground +group +grow +guard +guess +gun +guy +had +hadn't +hair +half +hall +hallway +hand +handle +hang +happen +happy +hard +has +hate +have +haven't +he +he'd +he's +head +hear +heard +heart +heavy +held +hell +hello +help +her +here +herself +hey +hi +hide +high +him +himself +his +hit +hold +home +hope +horse +hospital +hot +hour +house +how +however +hug +huge +huh +human +hundred +hung +hurry +hurt +I +I'd +I'll +I'm +I've +ice +idea +if +ignore +imagine +immediately +important +in +inside +instead +interest +interrupt +into +is +isn't +it +it's +its +jacket +jeans +jerk +job +join +joke +jump +just +keep +kept +key +kick +kid +kill +kind +kiss +kitchen +knee +knew +knock +know +known +lady +land +large +last +late +laugh +lay +lead +lean +learn +least +leave +led +left +leg +less +let +letter +lie +life +lift +light +like +line +lip +listen +little +live +lock +locker +long +look +lose +lost +lot +loud +love +low +lunch +mad +made +make +man +manage +many +mark +marry +match +matter +may +maybe +me +mean +meant +meet +memory +men +mention +met +middle +might +mind +mine +minute +mirror +miss +mom +moment +money +month +mood +more +morning +most +mother +mouth +move +movie +Mr. +Mrs. +much +mum +mumble +music +must +mutter +my +myself +name +near +nearly +neck +need +nervous +never +new +next +nice +night +no +nod +noise +none +normal +nose +not +note +nothing +notice +now +number +obviously +of +off +offer +office +often +oh +okay +old +on +once +one +only +onto +open +or +order +other +our +out +outside +over +own +pack +pain +paint +pair +pants +paper +parents +park +part +party +pass +past +pause +pay +people +perfect +perhaps +person +phone +pick +picture +piece +pink +piss +place +plan +play +please +pocket +point +police +pop +position +possible +power +practically +present +press +pretend +pretty +probably +problem +promise +pull +punch +push +put +question +quick +quickly +quiet +quietly +quite +race +rain +raise +ran +rang +rather +reach +read +ready +real +realize +really +reason +recognize +red +relationship +relax +remain +remember +remind +repeat +reply +respond +rest +return +ride +right +ring +road +rock +roll +room +rose +round +rub +run +rush +sad +safe +said +same +sat +save +saw +say +scare +school +scream +search +seat +second +see +seem +seen +self +send +sense +sent +serious +seriously +set +settle +seven +several +shadow +shake +share +she +she'd +she's +shift +shirt +shit +shock +shoe +shook +shop +short +shot +should +shoulder +shouldn't +shout +shove +show +shower +shrug +shut +sick +side +sigh +sight +sign +silence +silent +simply +since +single +sir +sister +sit +situation +six +skin +sky +slam +sleep +slightly +slip +slow +slowly +small +smell +smile +smirk +smoke +snap +so +soft +softly +some +somehow +someone +something +sometimes +somewhere +son +song +soon +sorry +sort +sound +space +speak +spend +spent +spoke +spot +stair +stand +star +stare +start +state +stay +step +stick +still +stomach +stood +stop +store +story +straight +strange +street +strong +struggle +stuck +student +study +stuff +stupid +such +suck +sudden +suddenly +suggest +summer +sun +suppose +sure +surprise +surround +sweet +table +take +taken +talk +tall +teacher +team +tear +teeth +tell +ten +than +thank +that +that's +the +their +them +themselves +then +there +there's +these +they +they'd +they're +thick +thing +think +third +this +those +though +thought +three +threw +throat +through +throw +tie +tight +time +tiny +tire +to +today +together +told +tomorrow +tone +tongue +tonight +too +took +top +totally +touch +toward +town +track +trail +train +tree +trip +trouble +trust +truth +try +turn +TV +twenty +two +type +uncle +under +understand +until +up +upon +us +use +usual +usually +very +visit +voice +wait +wake +walk +wall +want +warm +warn +was +wasn't +watch +water +wave +way +we +we'll +we're +we've +wear +week +weird +well +went +were +weren't +wet +what +what's +whatever +when +where +whether +which +while +whisper +white +who +whole +why +wide +wife +will +wind +window +wipe +wish +with +within +without +woke +woman +women +won't +wonder +wood +word +wore +work +world +worry +worse +would +wouldn't +wow +wrap +write +wrong +yeah +year +yell +yes +yet +you +you'd +you'll +you're +you've +young +your +yourself +TRUE + diff --git a/proselint/checks/restricted/top1000.py b/proselint/checks/restricted/top1000.py new file mode 100644 index 000000000..efea3e523 --- /dev/null +++ b/proselint/checks/restricted/top1000.py @@ -0,0 +1,35 @@ +"""Check if the text contains only words in top 1000 most popular words. + +--- +layout: Website +source: THE UP-GOER FIVE TEXT EDITOR +source_url: https://splasho.com/upgoer5/ +title: ? +date: 2023-04-16 16:32:01 +categories: writing/app +--- + +Top 1000. + +""" +try: + from importlib.resources import files +except ImportError: + from importlib_resources import files + +import proselint +from proselint.tools import memoize, reverse_existence_check + +_CSV_PATH = 'checks/restricted/top1000.csv' + +with files(proselint).joinpath(_CSV_PATH).open('r') as data: + TOP1000_WORDS = data.read().split() + + +@memoize +def check(text): + """Check the text.""" + err = "restricted.top1000" + msg = "'{}' is not in the top 1000 most common words." + + return reverse_existence_check(text, TOP1000_WORDS, err, msg) diff --git a/proselint/config.py b/proselint/config.py index 9de52af7e..35daef893 100644 --- a/proselint/config.py +++ b/proselint/config.py @@ -86,6 +86,8 @@ "typography.symbols": True, "uncomparables.misc": True, "weasel_words.misc": True, - "weasel_words.very": True + "weasel_words.very": True, + "restricted.top1000": False, + "restricted.elementary": False, } } diff --git a/proselint/tools.py b/proselint/tools.py index e69bf1992..3ef186c5f 100644 --- a/proselint/tools.py +++ b/proselint/tools.py @@ -365,6 +365,46 @@ def existence_check(text, list, err, msg, ignore_case=True, str=False, return errors +def _allowed_word(permitted, match: re.Match, /, ignore_case=True): + """Determine if a match object result is in a set of strings.""" + matched = match.string[match.start():match.end()] + if ignore_case: + return matched.lower() in permitted + return matched in permitted + + +def reverse_existence_check( + text, list, err, msg, ignore_case=True, offset=0 +): + """Find all words in ``text`` that aren't on the ``list``.""" + permitted = set([word.lower() for word in list] if ignore_case else list) + allowed_word = functools.partial( + _allowed_word, permitted, ignore_case=ignore_case) + + # Match all 3+ character words that contain a hyphen or apostrophe + # only in the middle (not as the first or last character) + tokenizer = re.compile(r"\w[\w'-]+\w") + + # Ignore any that contain numerals + exclusions = re.compile(r'\d') + + errors = [ + ( + m.start() + 1 + offset, + m.end() + offset, + err, + msg.format(m.string[m.start():m.end()]), + None + ) + for m in tokenizer.finditer(text) + if ( + not exclusions.search(m.string[m.start():m.end()]) + and not allowed_word(m) + ) + ] + return errors + + def max_errors(limit): """Decorate a check to truncate error output to a specified limit.""" def wrapper(f): diff --git a/pyproject.toml b/pyproject.toml index e1ade3d5a..ff4cc011b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ exclude = ["tests/.gitignore"] [tool.poetry.dependencies] python = "^3.8.1" click = "^8.0.0" +importlib-resources = { version = "^6.0", python = "<3.9" } [tool.poetry.group.dev.dependencies] gmail = { git = "https://github.com/charlierguo/gmail.git" } diff --git a/tests/test-proselintrc.json b/tests/test-proselintrc.json index ea295608f..71b6f5921 100644 --- a/tests/test-proselintrc.json +++ b/tests/test-proselintrc.json @@ -84,6 +84,8 @@ "typography.symbols" : true, "uncomparables.misc" : false, "weasel_words.misc" : true, - "weasel_words.very" : true + "weasel_words.very" : true, + "restricted.top1000" : false, + "restricted.elementary" : false } } diff --git a/tests/test_restricted_elementary.py b/tests/test_restricted_elementary.py new file mode 100644 index 000000000..a6289ca3c --- /dev/null +++ b/tests/test_restricted_elementary.py @@ -0,0 +1,27 @@ +"""Tests reverse existence for elementary and check.""" + +from proselint.checks.restricted import elementary as chk + +from .check import Check + + +class TestCheck(Check): + """The test class for restricted.elementary.""" + + __test__ = True + + @property + def this_check(self): + """Boilerplate.""" + return chk + + def test_smoke(self): + """Basic smoke test for restricted.elementary.""" + assert self.passes("""A boy and his goat went to a farm.""") + assert self.passes("""I am tired.""") + assert self.passes("""Water make up your body.""") + assert self.passes("""""") + + assert not self.passes(""" Cells make up your body.""") + assert not self.passes("I love clowns.") + assert not self.passes(""" I hate cells and clowns.""") diff --git a/tests/test_restricted_top1000.py b/tests/test_restricted_top1000.py new file mode 100644 index 000000000..3ca2dc0e9 --- /dev/null +++ b/tests/test_restricted_top1000.py @@ -0,0 +1,27 @@ +"""Tests reverse existence for top1000 and check.""" + +from proselint.checks.restricted import top1000 as chk + +from .check import Check + + +class TestCheck(Check): + """The test class for restricted.top1000.""" + + __test__ = True + + @property + def this_check(self): + """Boilerplate.""" + return chk + + def test_smoke(self): + """Basic smoke test for restricted.top1000.""" + assert self.passes("""I am blonde.""") + assert self.passes("""I'm gonna listen to music tonight.""") + assert self.passes("""I will go to sleep because I have school.""") + assert self.passes("""""") + + assert not self.passes("""I am tired.""") + assert not self.passes("""I hate broccoli.""") + assert not self.passes("""I am tired and hate broccoli.""")