diff --git a/.github/workflows/check_duplicates.yml b/.github/workflows/check_duplicates.yml new file mode 100644 index 0000000..02448c5 --- /dev/null +++ b/.github/workflows/check_duplicates.yml @@ -0,0 +1,39 @@ +name: Check for Duplicate Facts + +# Controls when the action will run. +on: + # Triggers the workflow on push or pull request events but only for the master branch + push: + branches: [ master ] + pull_request: + branches: [ master ] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# A workflow run is made up of one or more jobs that can run sequentially or in parallel +jobs: + checkduplicates: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Cache checkduplicates binary + uses: actions/cache@v4 + id: cache + with: + path: | + tests/checkduplicates/target/release/checkduplicates + key: ${{ runner.os }}-cargo-${{ hashFiles('tests/checkduplicates/Cargo.lock', 'tests/checkduplicates/Cargo.toml', 'tests/checkduplicates/src/**') }} + restore-keys: | + ${{ runner.os }}-cargo- + + - name: Build checkduplicates test + if: steps.cache.outputs.cache-hit != 'true' + run: | + cd tests/checkduplicates + cargo build --release + + - name: Check for duplicate facts + run: ./tests/checkduplicates/target/release/checkduplicates diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml deleted file mode 100644 index abd374a..0000000 --- a/.github/workflows/codeql-analysis.yml +++ /dev/null @@ -1,67 +0,0 @@ -# For most projects, this workflow file will not need changing; you simply need -# to commit it to your repository. -# -# You may wish to alter this file to override the set of languages analyzed, -# or to provide custom queries or build logic. -# -# ******** NOTE ******** -# We have attempted to detect the languages in your repository. Please check -# the `language` matrix defined below to confirm you have the correct set of -# supported CodeQL languages. -# -name: "CodeQL" - -on: - push: - branches: [ master ] - pull_request: - # The branches below must be a subset of the branches above - branches: [ master ] - schedule: - - cron: '26 0 * * 5' - -jobs: - analyze: - name: Analyze - runs-on: ubuntu-latest - - strategy: - fail-fast: false - matrix: - language: [ 'python' ] - # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] - # Learn more: - # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed - - steps: - - name: Checkout repository - uses: actions/checkout@v2 - - # Initializes the CodeQL tools for scanning. - - name: Initialize CodeQL - uses: github/codeql-action/init@v1 - with: - languages: ${{ matrix.language }} - # If you wish to specify custom queries, you can do so here or in a config file. - # By default, queries listed here will override any specified in a config file. - # Prefix the list here with "+" to use these queries and those in the config file. - # queries: ./path/to/local/query, your-org/your-repo/queries@main - - # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). - # If this step fails, then you should remove it and run the build manually (see below) - - name: Autobuild - uses: github/codeql-action/autobuild@v1 - - # โ„น๏ธ Command-line programs to run using the OS shell. - # ๐Ÿ“š https://git.io/JvXDl - - # โœ๏ธ If the Autobuild fails above, remove it and uncomment the following three lines - # and modify them (or add more) to build your code if your project - # uses a compiled language - - #- run: | - # make bootstrap - # make release - - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2cab971..e32142b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,14 +1,8 @@ -# This is a basic workflow to help you get started with Actions - -name: CI - -# Controls when the action will run. on: # Triggers the workflow on push or pull request events but only for the master branch push: branches: [ master ] pull_request: - branches: [ master ] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: @@ -16,28 +10,80 @@ on: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: test: - # The type of runner that the job will run on + name: Test code and coverage runs-on: ubuntu-latest - - # Steps represent a sequence of tasks that will be executed as part of the job + strategy: + fail-fast: false + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: - # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - - uses: actions/checkout@v2 - - # Runs a set of commands using the runners shell - - name: Run a multi-line script - run: | - pip3 install setuptools wheel - python3 setup.py sdist - pip3 install dist/* - python3 tests/test.py - checkduplicates: - runs-on: ubuntu-latest + - uses: actions/checkout@v4 - steps: - - uses: actions/checkout@v2 - - name: Run a multi-line script - run: | - pip3 install -U setuptools wheel pip - pip3 install rapidfuzz tqdm - python3 tests/checkduplicates.py + # If you wanted to use multiple Python versions, you'd have specify a matrix in the job and + # reference the matrixe python version here. + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + allow-prereleases: true + + # Cache the installation of Poetry itself, e.g. the next step. This prevents the workflow + # from installing Poetry every time, which can be slow. Note the use of the Poetry version + # number in the cache key, and the "-0" suffix: this allows you to invalidate the cache + # manually if/when you want to upgrade Poetry, or if something goes wrong. This could be + # mildly cleaner by using an environment variable, but I don't really care. + - name: cache poetry install + uses: actions/cache@v4 + with: + path: ~/.local + key: poetry-1.1.12-0 + + # Install Poetry. You could do this manually, or there are several actions that do this. + # `snok/install-poetry` seems to be minimal yet complete, and really just calls out to + # Poetry's default install script, which feels correct. I pin the Poetry version here + # because Poetry does occasionally change APIs between versions and I don't want my + # actions to break if it does. + # + # The key configuration value here is `virtualenvs-in-project: true`: this creates the + # venv as a `.venv` in your testing directory, which allows the next step to easily + # cache it. + - uses: snok/install-poetry@v1 + with: + version: 1.5.1 + virtualenvs-create: true + virtualenvs-in-project: true + + # Cache your dependencies (i.e. all the stuff in your `pyproject.toml`). Note the cache + # key: if you're using multiple Python versions, or multiple OSes, you'd need to include + # them in the cache key. I'm not, so it can be simple and just depend on the poetry.lock. + - name: cache deps + id: cache-deps + uses: actions/cache@v4 + with: + path: .venv + key: pydeps-${{ hashFiles('**/poetry.lock') }} + + # Install dependencies. `--no-root` means "install all dependencies but not the project + # itself", which is what you want to avoid caching _your_ code. The `if` statement + # ensures this only runs on a cache miss. + - run: poetry install --no-interaction --no-root + if: steps.cache-deps.outputs.cache-hit != 'true' + + # Now install _your_ project. This isn't necessary for many types of projects -- particularly + # things like Django apps don't need this. But it's a good idea since it fully-exercises the + # pyproject.toml and makes that if you add things like console-scripts at some point that + # they'll be installed and working. + - run: poetry install --no-interaction + + # run the tests and check for 100% coverage + - run: poetry run pytest . --cov=randfacts --cov-report=term-missing --cov-report=xml + + # check for code style errors + - run: poetry run ruff check + # disable code format checking until docstrings are sorted out + # https://github.com/astral-sh/ruff/issues/8430 + # - run: poetry run ruff format --check + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v5.0.2 + with: + token: ${{ secrets.CODECOV_TOKEN }} diff --git a/.gitignore b/.gitignore index fff6711..a7b5af8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,3 @@ -# MacOS development (added by PancakesWasTaken) -.DS_Store - # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -97,7 +94,22 @@ ipython_config.py # install all needed dependencies. #Pipfile.lock -# PEP 582; used by e.g. github.com/David-OConnor/pyflow +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +# poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ # Celery stuff @@ -140,5 +152,19 @@ dmypy.json # Cython debug symbols cython_debug/ -# cargo -Cargo.lock +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + +# ruff +.ruff_cache/ + +# LSP config files +pyrightconfig.json + diff --git a/LICENSE.txt b/LICENSE similarity index 94% rename from LICENSE.txt rename to LICENSE index 4306b1f..aa9a3b3 100644 --- a/LICENSE.txt +++ b/LICENSE @@ -1,5 +1,5 @@ MIT License -Copyright (c) 2020-2021 Connor Sample +Copyright (c) 2020-2024 Connor Sample Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -14,4 +14,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file +SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index b786c31..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1 +0,0 @@ -include randfacts/*.txt \ No newline at end of file diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..724875e --- /dev/null +++ b/poetry.lock @@ -0,0 +1,233 @@ +# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "coverage" +version = "7.6.1" +description = "Code coverage measurement for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "coverage-7.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b06079abebbc0e89e6163b8e8f0e16270124c154dc6e4a47b413dd538859af16"}, + {file = "coverage-7.6.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cf4b19715bccd7ee27b6b120e7e9dd56037b9c0681dcc1adc9ba9db3d417fa36"}, + {file = "coverage-7.6.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61c0abb4c85b095a784ef23fdd4aede7a2628478e7baba7c5e3deba61070a02"}, + {file = "coverage-7.6.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fd21f6ae3f08b41004dfb433fa895d858f3f5979e7762d052b12aef444e29afc"}, + {file = "coverage-7.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f59d57baca39b32db42b83b2a7ba6f47ad9c394ec2076b084c3f029b7afca23"}, + {file = "coverage-7.6.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a1ac0ae2b8bd743b88ed0502544847c3053d7171a3cff9228af618a068ed9c34"}, + {file = "coverage-7.6.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e6a08c0be454c3b3beb105c0596ebdc2371fab6bb90c0c0297f4e58fd7e1012c"}, + {file = "coverage-7.6.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f5796e664fe802da4f57a168c85359a8fbf3eab5e55cd4e4569fbacecc903959"}, + {file = "coverage-7.6.1-cp310-cp310-win32.whl", hash = "sha256:7bb65125fcbef8d989fa1dd0e8a060999497629ca5b0efbca209588a73356232"}, + {file = "coverage-7.6.1-cp310-cp310-win_amd64.whl", hash = "sha256:3115a95daa9bdba70aea750db7b96b37259a81a709223c8448fa97727d546fe0"}, + {file = "coverage-7.6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7dea0889685db8550f839fa202744652e87c60015029ce3f60e006f8c4462c93"}, + {file = "coverage-7.6.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ed37bd3c3b063412f7620464a9ac1314d33100329f39799255fb8d3027da50d3"}, + {file = "coverage-7.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d85f5e9a5f8b73e2350097c3756ef7e785f55bd71205defa0bfdaf96c31616ff"}, + {file = "coverage-7.6.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bc572be474cafb617672c43fe989d6e48d3c83af02ce8de73fff1c6bb3c198d"}, + {file = "coverage-7.6.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c0420b573964c760df9e9e86d1a9a622d0d27f417e1a949a8a66dd7bcee7bc6"}, + {file = "coverage-7.6.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1f4aa8219db826ce6be7099d559f8ec311549bfc4046f7f9fe9b5cea5c581c56"}, + {file = "coverage-7.6.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:fc5a77d0c516700ebad189b587de289a20a78324bc54baee03dd486f0855d234"}, + {file = "coverage-7.6.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b48f312cca9621272ae49008c7f613337c53fadca647d6384cc129d2996d1133"}, + {file = "coverage-7.6.1-cp311-cp311-win32.whl", hash = "sha256:1125ca0e5fd475cbbba3bb67ae20bd2c23a98fac4e32412883f9bcbaa81c314c"}, + {file = "coverage-7.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:8ae539519c4c040c5ffd0632784e21b2f03fc1340752af711f33e5be83a9d6c6"}, + {file = "coverage-7.6.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:95cae0efeb032af8458fc27d191f85d1717b1d4e49f7cb226cf526ff28179778"}, + {file = "coverage-7.6.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5621a9175cf9d0b0c84c2ef2b12e9f5f5071357c4d2ea6ca1cf01814f45d2391"}, + {file = "coverage-7.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:260933720fdcd75340e7dbe9060655aff3af1f0c5d20f46b57f262ab6c86a5e8"}, + {file = "coverage-7.6.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07e2ca0ad381b91350c0ed49d52699b625aab2b44b65e1b4e02fa9df0e92ad2d"}, + {file = "coverage-7.6.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c44fee9975f04b33331cb8eb272827111efc8930cfd582e0320613263ca849ca"}, + {file = "coverage-7.6.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:877abb17e6339d96bf08e7a622d05095e72b71f8afd8a9fefc82cf30ed944163"}, + {file = "coverage-7.6.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3e0cadcf6733c09154b461f1ca72d5416635e5e4ec4e536192180d34ec160f8a"}, + {file = "coverage-7.6.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c3c02d12f837d9683e5ab2f3d9844dc57655b92c74e286c262e0fc54213c216d"}, + {file = "coverage-7.6.1-cp312-cp312-win32.whl", hash = "sha256:e05882b70b87a18d937ca6768ff33cc3f72847cbc4de4491c8e73880766718e5"}, + {file = "coverage-7.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:b5d7b556859dd85f3a541db6a4e0167b86e7273e1cdc973e5b175166bb634fdb"}, + {file = "coverage-7.6.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a4acd025ecc06185ba2b801f2de85546e0b8ac787cf9d3b06e7e2a69f925b106"}, + {file = "coverage-7.6.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a6d3adcf24b624a7b778533480e32434a39ad8fa30c315208f6d3e5542aeb6e9"}, + {file = "coverage-7.6.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0c212c49b6c10e6951362f7c6df3329f04c2b1c28499563d4035d964ab8e08c"}, + {file = "coverage-7.6.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e81d7a3e58882450ec4186ca59a3f20a5d4440f25b1cff6f0902ad890e6748a"}, + {file = "coverage-7.6.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78b260de9790fd81e69401c2dc8b17da47c8038176a79092a89cb2b7d945d060"}, + {file = "coverage-7.6.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a78d169acd38300060b28d600344a803628c3fd585c912cacc9ea8790fe96862"}, + {file = "coverage-7.6.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2c09f4ce52cb99dd7505cd0fc8e0e37c77b87f46bc9c1eb03fe3bc9991085388"}, + {file = "coverage-7.6.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6878ef48d4227aace338d88c48738a4258213cd7b74fd9a3d4d7582bb1d8a155"}, + {file = "coverage-7.6.1-cp313-cp313-win32.whl", hash = "sha256:44df346d5215a8c0e360307d46ffaabe0f5d3502c8a1cefd700b34baf31d411a"}, + {file = "coverage-7.6.1-cp313-cp313-win_amd64.whl", hash = "sha256:8284cf8c0dd272a247bc154eb6c95548722dce90d098c17a883ed36e67cdb129"}, + {file = "coverage-7.6.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d3296782ca4eab572a1a4eca686d8bfb00226300dcefdf43faa25b5242ab8a3e"}, + {file = "coverage-7.6.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:502753043567491d3ff6d08629270127e0c31d4184c4c8d98f92c26f65019962"}, + {file = "coverage-7.6.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a89ecca80709d4076b95f89f308544ec8f7b4727e8a547913a35f16717856cb"}, + {file = "coverage-7.6.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a318d68e92e80af8b00fa99609796fdbcdfef3629c77c6283566c6f02c6d6704"}, + {file = "coverage-7.6.1-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13b0a73a0896988f053e4fbb7de6d93388e6dd292b0d87ee51d106f2c11b465b"}, + {file = "coverage-7.6.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4421712dbfc5562150f7554f13dde997a2e932a6b5f352edcce948a815efee6f"}, + {file = "coverage-7.6.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:166811d20dfea725e2e4baa71fffd6c968a958577848d2131f39b60043400223"}, + {file = "coverage-7.6.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:225667980479a17db1048cb2bf8bfb39b8e5be8f164b8f6628b64f78a72cf9d3"}, + {file = "coverage-7.6.1-cp313-cp313t-win32.whl", hash = "sha256:170d444ab405852903b7d04ea9ae9b98f98ab6d7e63e1115e82620807519797f"}, + {file = "coverage-7.6.1-cp313-cp313t-win_amd64.whl", hash = "sha256:b9f222de8cded79c49bf184bdbc06630d4c58eec9459b939b4a690c82ed05657"}, + {file = "coverage-7.6.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6db04803b6c7291985a761004e9060b2bca08da6d04f26a7f2294b8623a0c1a0"}, + {file = "coverage-7.6.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f1adfc8ac319e1a348af294106bc6a8458a0f1633cc62a1446aebc30c5fa186a"}, + {file = "coverage-7.6.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a95324a9de9650a729239daea117df21f4b9868ce32e63f8b650ebe6cef5595b"}, + {file = "coverage-7.6.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b43c03669dc4618ec25270b06ecd3ee4fa94c7f9b3c14bae6571ca00ef98b0d3"}, + {file = "coverage-7.6.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8929543a7192c13d177b770008bc4e8119f2e1f881d563fc6b6305d2d0ebe9de"}, + {file = "coverage-7.6.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:a09ece4a69cf399510c8ab25e0950d9cf2b42f7b3cb0374f95d2e2ff594478a6"}, + {file = "coverage-7.6.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:9054a0754de38d9dbd01a46621636689124d666bad1936d76c0341f7d71bf569"}, + {file = "coverage-7.6.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0dbde0f4aa9a16fa4d754356a8f2e36296ff4d83994b2c9d8398aa32f222f989"}, + {file = "coverage-7.6.1-cp38-cp38-win32.whl", hash = "sha256:da511e6ad4f7323ee5702e6633085fb76c2f893aaf8ce4c51a0ba4fc07580ea7"}, + {file = "coverage-7.6.1-cp38-cp38-win_amd64.whl", hash = "sha256:3f1156e3e8f2872197af3840d8ad307a9dd18e615dc64d9ee41696f287c57ad8"}, + {file = "coverage-7.6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:abd5fd0db5f4dc9289408aaf34908072f805ff7792632250dcb36dc591d24255"}, + {file = "coverage-7.6.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:547f45fa1a93154bd82050a7f3cddbc1a7a4dd2a9bf5cb7d06f4ae29fe94eaf8"}, + {file = "coverage-7.6.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:645786266c8f18a931b65bfcefdbf6952dd0dea98feee39bd188607a9d307ed2"}, + {file = "coverage-7.6.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9e0b2df163b8ed01d515807af24f63de04bebcecbd6c3bfeff88385789fdf75a"}, + {file = "coverage-7.6.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:609b06f178fe8e9f89ef676532760ec0b4deea15e9969bf754b37f7c40326dbc"}, + {file = "coverage-7.6.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:702855feff378050ae4f741045e19a32d57d19f3e0676d589df0575008ea5004"}, + {file = "coverage-7.6.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:2bdb062ea438f22d99cba0d7829c2ef0af1d768d1e4a4f528087224c90b132cb"}, + {file = "coverage-7.6.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:9c56863d44bd1c4fe2abb8a4d6f5371d197f1ac0ebdee542f07f35895fc07f36"}, + {file = "coverage-7.6.1-cp39-cp39-win32.whl", hash = "sha256:6e2cd258d7d927d09493c8df1ce9174ad01b381d4729a9d8d4e38670ca24774c"}, + {file = "coverage-7.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:06a737c882bd26d0d6ee7269b20b12f14a8704807a01056c80bb881a4b2ce6ca"}, + {file = "coverage-7.6.1-pp38.pp39.pp310-none-any.whl", hash = "sha256:e9a6e0eb86070e8ccaedfbd9d38fec54864f3125ab95419970575b42af7541df"}, + {file = "coverage-7.6.1.tar.gz", hash = "sha256:953510dfb7b12ab69d20135a0662397f077c59b1e6379a768e97c59d852ee51d"}, +] + +[package.dependencies] +tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} + +[package.extras] +toml = ["tomli"] + +[[package]] +name = "exceptiongroup" +version = "1.2.2" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, + {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, +] + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "packaging" +version = "24.2" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, + {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, +] + +[[package]] +name = "pluggy" +version = "1.5.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pytest" +version = "8.3.3" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2"}, + {file = "pytest-8.3.3.tar.gz", hash = "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=1.5,<2" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-cov" +version = "5.0.0" +description = "Pytest plugin for measuring coverage." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-cov-5.0.0.tar.gz", hash = "sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857"}, + {file = "pytest_cov-5.0.0-py3-none-any.whl", hash = "sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652"}, +] + +[package.dependencies] +coverage = {version = ">=5.2.1", extras = ["toml"]} +pytest = ">=4.6" + +[package.extras] +testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"] + +[[package]] +name = "ruff" +version = "0.7.4" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.7.4-py3-none-linux_armv6l.whl", hash = "sha256:a4919925e7684a3f18e18243cd6bea7cfb8e968a6eaa8437971f681b7ec51478"}, + {file = "ruff-0.7.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:cfb365c135b830778dda8c04fb7d4280ed0b984e1aec27f574445231e20d6c63"}, + {file = "ruff-0.7.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:63a569b36bc66fbadec5beaa539dd81e0527cb258b94e29e0531ce41bacc1f20"}, + {file = "ruff-0.7.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d06218747d361d06fd2fdac734e7fa92df36df93035db3dc2ad7aa9852cb109"}, + {file = "ruff-0.7.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e0cea28d0944f74ebc33e9f934238f15c758841f9f5edd180b5315c203293452"}, + {file = "ruff-0.7.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:80094ecd4793c68b2571b128f91754d60f692d64bc0d7272ec9197fdd09bf9ea"}, + {file = "ruff-0.7.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:997512325c6620d1c4c2b15db49ef59543ef9cd0f4aa8065ec2ae5103cedc7e7"}, + {file = "ruff-0.7.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:00b4cf3a6b5fad6d1a66e7574d78956bbd09abfd6c8a997798f01f5da3d46a05"}, + {file = "ruff-0.7.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7dbdc7d8274e1422722933d1edddfdc65b4336abf0b16dfcb9dedd6e6a517d06"}, + {file = "ruff-0.7.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e92dfb5f00eaedb1501b2f906ccabfd67b2355bdf117fea9719fc99ac2145bc"}, + {file = "ruff-0.7.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:3bd726099f277d735dc38900b6a8d6cf070f80828877941983a57bca1cd92172"}, + {file = "ruff-0.7.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:2e32829c429dd081ee5ba39aef436603e5b22335c3d3fff013cd585806a6486a"}, + {file = "ruff-0.7.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:662a63b4971807623f6f90c1fb664613f67cc182dc4d991471c23c541fee62dd"}, + {file = "ruff-0.7.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:876f5e09eaae3eb76814c1d3b68879891d6fde4824c015d48e7a7da4cf066a3a"}, + {file = "ruff-0.7.4-py3-none-win32.whl", hash = "sha256:75c53f54904be42dd52a548728a5b572344b50d9b2873d13a3f8c5e3b91f5cac"}, + {file = "ruff-0.7.4-py3-none-win_amd64.whl", hash = "sha256:745775c7b39f914238ed1f1b0bebed0b9155a17cd8bc0b08d3c87e4703b990d6"}, + {file = "ruff-0.7.4-py3-none-win_arm64.whl", hash = "sha256:11bff065102c3ae9d3ea4dc9ecdfe5a5171349cdd0787c1fc64761212fc9cf1f"}, + {file = "ruff-0.7.4.tar.gz", hash = "sha256:cd12e35031f5af6b9b93715d8c4f40360070b2041f81273d0527683d5708fce2"}, +] + +[[package]] +name = "tomli" +version = "2.1.0" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tomli-2.1.0-py3-none-any.whl", hash = "sha256:a5c57c3d1c56f5ccdf89f6523458f60ef716e210fc47c4cfb188c5ba473e0391"}, + {file = "tomli-2.1.0.tar.gz", hash = "sha256:3f646cae2aec94e17d04973e4249548320197cfabdf130015d023de4b74d8ab8"}, +] + +[metadata] +lock-version = "2.0" +python-versions = "^3.6" +content-hash = "6c56ef90ed94143e1778caadc168f25265a86854273cd45c7ad53fcd08a47a08" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..6299224 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,108 @@ +[tool.poetry] +name = "randfacts" +version = "0.22.0" +description = "Package to generate random facts" +authors = ["TabulateJarl8 "] +license = "MIT" +readme = "README.md" +include = ["randfacts/*.txt"] +homepage = "https://tabulate.tech/software/randfacts/" +repository = "https://github.com/TabulateJarl8/randfacts" +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Natural Language :: English", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Topic :: Software Development :: Libraries :: Python Modules", +] +packages = [{ include = 'randfacts' }] + +[tool.poetry.scripts] +randfacts = 'randfacts.randfacts:_cli_entrypoint' + +[tool.pytest.ini_options] +asyncio_default_fixture_loop_scope = "function" + +[tool.pyright] +reportUnusedCallResult = false + +[tool.ruff.lint] +preview = true +select = ["ALL"] + +ignore = [ + # complains about tab indentation + "W191", + "D206", + # adds a line break before a class docstring + "D203", + # puts the first line summary of a docstring on a different line than the """ + "D213", + # tries to add a blank line after the last docstring section + "D413", + # yells at you if you use a bool typed function argument + "FBT001", + "FBT002", + # yells at you for using try-except in a for loop + "PERF203", + # allow for the use of Any + "ANN401", + # false positives for overriding methods (i think) + "PLR6301", + # disable too many branches check + "PLR0912", + # copyright at top of file + "CPY", + # complains about random.choice() not being good for cryptography + "S311", +] + + +[tool.ruff.lint.per-file-ignores] +"tests/*" = [ + "S101", + "ANN001", + "ANN002", + "PLC2701", + "ARG002", + "PLR2004", + "DOC", + "INP001", + "S", +] +"randfacts/randfacts.py" = ["T201"] +"randfacts/__main__.py" = ["D100"] + +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.ruff.format] +quote-style = "double" +indent-style = "tab" +line-ending = "lf" + +[tool.ruff.lint.isort] +combine-as-imports = true +force-wrap-aliases = true +order-by-type = true + +[tool.poetry.dependencies] +python = "^3.6" + +[tool.poetry.group.dev.dependencies] +ruff = { version = "^0.7.4", python = "^3.8" } +pytest = { version = "^8.3.3", python = "^3.8" } +pytest-cov = { version = "^5.0.0", python = "^3.8" } + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/randfacts/__init__.py b/randfacts/__init__.py index c4d6643..af38cd5 100644 --- a/randfacts/__init__.py +++ b/randfacts/__init__.py @@ -5,49 +5,66 @@ execution via the command line. See the examples section for more details. Code Examples: - Example usage of randfacts in code. + Example usage of randfacts in code. - Generate a random SFW (safe for work) fact. + Generate a random SFW (safe for work) fact. - >>> randfacts.get_fact() + >>> randfacts.get_fact() - Generate a random NSFW (not safe for work) fact. + Generate a random NSFW (not safe for work) fact. - >>> randfacts.get_fact(only_unsafe=True) + >>> randfacts.get_fact(only_unsafe=True) - Generate a random mixed fact (possibility of both SFW and NSFW facts) + Generate a random mixed fact (possibility of both SFW and NSFW facts) - >>> randfacts.get_fact(False) - >>> # or - >>> randfacts.get_fact(filter_enabled=False) + >>> randfacts.get_fact(filter_enabled=False) CLI Examples: - randfacts can be executed via the command line with the following commands: + randfacts can be executed via the command line with the following commands: - Normal execution; only safe facts + Normal execution; only safe facts - $ python3 -m randfacts + $ python3 -m randfacts - The unsafe argument can be supplied to provide only unsafe facts + The unsafe argument can be supplied to provide only unsafe facts - $ python3 -m randfacts --unsafe + $ python3 -m randfacts --unsafe - The mixed argument can be provided to provide both SFW and NSFW facts. + The mixed argument can be provided to provide both SFW and NSFW facts. - $ python3 -m randfacts --mixed + $ python3 -m randfacts --mixed - More help. + More help. - $ python3 -m randfacts --help + $ python3 -m randfacts --help """ -from .__version__ import __title__, __description__, __url__, __version__, __author__, __author_email__, __license__, __copyright__ -from randfacts.randfacts import get_fact, safe_facts, unsafe_facts, all_facts - import warnings as _warnings + +from randfacts.randfacts import ( + __version__, + all_facts, + get_fact, + safe_facts, + unsafe_facts, +) + +__all__ = [ + "__version__", + "all_facts", + "get_fact", + "safe_facts", + "unsafe_facts", +] + + # Deprecated methods -def getFact(filter_enabled=True, only_unsafe=False): - """This method is deprecated. Please use get_fact""" - _warnings.warn("getFact is deprecated. Please use get_fact", DeprecationWarning, stacklevel=2) - return get_fact(filter_enabled, only_unsafe) +def getFact(filter_enabled: bool = True, only_unsafe: bool = False) -> str: # noqa: N802 + """This method is deprecated. Please use get_fact.""" + _warnings.warn( + "getFact is deprecated. Please use get_fact", + DeprecationWarning, + stacklevel=2, + ) + return get_fact(filter_enabled, only_unsafe) # noqa: DOC201 diff --git a/randfacts/__main__.py b/randfacts/__main__.py index 5e670ad..4fca42c 100644 --- a/randfacts/__main__.py +++ b/randfacts/__main__.py @@ -1,2 +1,3 @@ -from .randfacts import _cli_entrypoint -_cli_entrypoint() \ No newline at end of file +from .randfacts import _cli_entrypoint # pyright: ignore[reportPrivateUsage] + +_cli_entrypoint() diff --git a/randfacts/__version__.py b/randfacts/__version__.py deleted file mode 100644 index a1c072d..0000000 --- a/randfacts/__version__.py +++ /dev/null @@ -1,8 +0,0 @@ -__title__ = "randfacts" -__description__ = "Package to generate random facts" -__url__ = "https://github.com/TabulateJarl8/randfacts" -__version__ = "0.21.0" -__author__ = "Tabulate" -__author_email__ = "tabulatejarl8@gmail.com" -__license__ = "MIT" -__copyright__ = "Copyright 2020-2023 Connor Sample" diff --git a/randfacts/randfacts.py b/randfacts/randfacts.py index ff23538..f59dfe8 100644 --- a/randfacts/randfacts.py +++ b/randfacts/randfacts.py @@ -1,22 +1,22 @@ -from random import choice -import os +"""Contains the core functionality of randfacts.""" + import argparse +import contextlib +import importlib.metadata +from pathlib import Path +from random import choice + +dir_path = Path(__file__).resolve().parent -dir_path = os.path.dirname(os.path.realpath(__file__)) +__version__ = "" +with contextlib.suppress(Exception): + __version__: str = importlib.metadata.version("randfacts") -with open(os.path.join(dir_path, 'safe.txt'), encoding='utf-8') as f: - safe_facts = [ - fact.rstrip('\r\n ') - for fact in f.readlines() - if fact.rstrip('\r\n ') != '' - ] +with (dir_path / "safe.txt").open(encoding="utf-8") as f: + safe_facts = [fact.rstrip("\r\n ") for fact in f if fact.rstrip("\r\n ")] -with open(os.path.join(dir_path, 'unsafe.txt'), encoding='utf-8') as f: - unsafe_facts = [ - fact.rstrip('\r\n ') - for fact in f.readlines() - if fact.rstrip('\r\n ') != '' - ] +with (dir_path / "unsafe.txt").open(encoding="utf-8") as f: + unsafe_facts = [fact.rstrip("\r\n ") for fact in f if fact.rstrip("\r\n ")] all_facts = safe_facts + unsafe_facts @@ -27,20 +27,19 @@ def get_fact(filter_enabled: bool = True, only_unsafe: bool = False) -> str: Parameters ---------- filter_enabled : bool - The `filter_enabled` parameter determines if the function will filter - out potentially inappropriate facts. Defaults to True. + The `filter_enabled` parameter determines if the function will filter + out potentially inappropriate facts. Defaults to True. only_unsafe : bool - The `only_unsafe` parameter determines if the function will only give - unsafe (NSFW) facts. Takes precedence over the `filter_enabled` argument. + The `only_unsafe` parameter determines if the function will only give + unsafe (NSFW) facts. Takes precedence over the `filter_enabled` argument. - Returns + Returns: ------ str - A random fact. + A random fact. """ - if only_unsafe: return choice(unsafe_facts) if filter_enabled is False: @@ -48,37 +47,46 @@ def get_fact(filter_enabled: bool = True, only_unsafe: bool = False) -> str: return choice(safe_facts) -def _cli_entrypoint(): +def _cli_entrypoint() -> None: """Entrypoint for execution via command-line.""" - parser = argparse.ArgumentParser( - description='Generate random facts from the command-line' + description="Generate random facts from the command-line", + ) + + parser.add_argument( + "-V", + "--version", + action="store_true", + help="Print the package version and exit", ) group = parser.add_mutually_exclusive_group() group.add_argument( - '-m', - '--mixed', - action='store_true', - help='Include safe and unsafe facts' + "-m", + "--mixed", + action="store_true", + help="Include safe and unsafe facts", ) group.add_argument( - '-u', - '--unsafe', - action='store_true', - help='Only include unsafe facts' + "-u", + "--unsafe", + action="store_true", + help="Only include unsafe facts", ) args = parser.parse_args() - if args.mixed: - print(get_fact(False)) - elif args.unsafe: + if args.version: # pyright: ignore[reportAny] + print(__version__) + return + if args.mixed: # pyright: ignore[reportAny] + print(get_fact(filter_enabled=False)) + elif args.unsafe: # pyright: ignore[reportAny] print(get_fact(only_unsafe=True)) else: print(get_fact()) -if __name__ == '__main__': +if __name__ == "__main__": _cli_entrypoint() diff --git a/randfacts/safe.txt b/randfacts/safe.txt index 72940dd..65ea424 100644 --- a/randfacts/safe.txt +++ b/randfacts/safe.txt @@ -378,7 +378,7 @@ Whales can suffer from sunburns In 2015, a U.S. journalist was sentenced to 5 years in jail for posting a link on the web South Koreans drink twice as much alcohol as Russians A newborn baby has about one cup of blood in his body. -4%of women in the U.S. are pregnant right now +4% of women in the U.S. are pregnant right now The world's longest hangover lasted 4 weeks after a Scotsman consumed 60 pints of beer. In 1962, John F Kennedy secretly installed a taping system in the White House. Men with shaved heads are perceived as an inch taller and 13% stronger than men with hair. @@ -1269,7 +1269,7 @@ Having an orgasm at least 3 times a week cuts in half the likelihood of coronary 30 million people in China live on less than US$1 per day, as of 2019. Brazil is defined as a "federal republic" composed of the Federal District, 26 states, and 5,570 municipalities. People with autism are less likely to catch yawns. The more severe their condition, the less common the behavior gets. -Mexico has68 official languages. +Mexico has 68 official languages. Girls who complete secondary school are 6 times less likely to become child brides La Paz, Bolivia, was the first South American city to get an electricity supply. It was powered by llama dung Penguins have an organ near the eye that filters salt from the water out of their system @@ -4397,7 +4397,6 @@ To escape the grip of a crocodile's jaws, push your thumbs into its eyeballs โ€“ Reindeer like to eat bananas. More people are killed annually by donkeys than airplane crashes. Because of the rotation of the earth, an object can be thrown farther if it is thrown west. -The average person spends 6 months of their life sitting at red lights. More Monopoly money is printed in a year, than real money throughout the world. Caesar salad has nothing to do with any of the Caesars. It was first concocted in a bar in Tijuana, Mexico, in the 1920โ€ฒs. Seattle's Fremont Bridge rises up and down more than any drawbridge in the world. @@ -5077,7 +5076,6 @@ Many insects can carry 50 times their own body weight. This would be like an adu There are over a million described species of insects. Some people estimate there are actually between 15 and 30 million species. Most insects are beneficial to people because they eat other insects, pollinate crops, are food for other animals, make products we use (like honey and silk) or have medical uses. Butterflies and insects have their skeletons on the outside of their bodies, called the exoskeleton. This protects the insect and keeps water inside their bodies so they don't dry out. -Elephants are the only mammals that cannot jump. 11% of the world is left-handed. A healthy (non-colorblind) human eye can distinguish between 500 shades of gray. Lizards can self-amputate their tails for protection. It grows back after a few months. @@ -5758,7 +5756,6 @@ Up to 20% of power outages in the U.S are due to squirrels. The Mayo Clinic made glow in the dark cats while trying to find a cure for AIDS. The Antarctic glaciers are made up of 3% penguin urine. The happiest prisoner on death row had an IQ of 46. -Violin bows are made from horsehair. IKEA is an acronym. Stephen Hawking held a reception for time travelers in 2009. A Norwegian Island made dying illegal. @@ -6212,7 +6209,6 @@ The largest known volcano in the solar system is Olympus Mons, located on Mars. On Mars, sunsets appear blue due to the way light is captured in the atmosphere. Because there is no atmosphere, wind, or water to erode them, astronaut footprints on the moon will likely remain there for hundreds of millions of years. One spacesuit for a NASA astronaut costs $12 million to make. -Uranus is the only planet to spin on its side. The diameter of Pluto is smaller than the horizontal length of the United States. The Kรกrmรกn line, the invisible boundary that officially separates Earth from outer space, is located 62 miles above sea level. So you're only about 62 miles from space right now! It's impossible to burp in space. The lack of gravity in space prevents air in your stomach from separating and rising up from food you've eaten. @@ -6502,7 +6498,6 @@ If Barbie were life-size, her measurements would be 39-23-33. She would stand 7 On average, people fear spiders more than they do death. Thirty-five percent of the people who use personal ads for dating are already married. In Tokyo you can buy a toupee for your dog. -A dime has 118 ridges around the edge. The world's oldest wooden wheel has been around for more than 5,000 years Dead skin cells are a main ingredient in household dust Sudan has more pyramids than any country in the world @@ -6714,7 +6709,6 @@ NFL refs also get Super Bowl rings. President Hubert Hoover invented a game called "Hooverball" which was a cross between tennis and volleyball and was played with a medicine ball. Only one city has won three major championships in one year. In 1935, the Detroit Lions won the Super Bowl, the Tigers won the world series, and the Red Wings won the Stanley Cup. More than 100 baseballs are used during a typical professional baseball game. -You can't hum while plugging your nose. Tomatoes have more genes than humans. We're one to two centimeters taller in the morning than at night. One quarter of all our bones are in our feet. @@ -6761,7 +6755,7 @@ Turns out, a dog's paw print is just as unique as a human's. Good news for dog d A camel can drink up to 40 gallons of water in one go. That's seriously impressive! Don't bring your crystal ball to Maryland! Fortune telling is illegal in the state. Speaking of ferrets, did you know they used to be used to protect grain stores from rodents? -The technical term for a fear of long words is ""hippopotomonstrosesquippedaliophobia." No way you can self-diagnose yourself with that one! +The technical term for a fear of long words is "hippopotomonstrosesquippedaliophobia." No way you can self-diagnose yourself with that one! The White House has 35 bathrooms. So every bathroom break can be different for POTUS! Greyhounds can run up to 45 mph. So don't challenge one to a race! Hiking naked is illegal in Switzerland. Though we must say, it doesn't seem like a good idea in general! @@ -7020,7 +7014,6 @@ Rubber bands last longer when refrigerated. Since 1896, the beginning of the modern Olympics, only Greece and Australia have participated in every Games. The average person has over 1,460 dreams a year. The band Duran Duran got their name from an astronaut in the 1968 Jane Fonda movie Barbarella. -The Earth weighs around 6,588,000,000,000,000,000,000,000 tons. The first toilet ever seen on television was on "Leave It To Beaver." The international telephone dialing code for Antarctica is 672. The name "Jeep" came from the abbreviation used in the army for the "general purpose" vehicle, G.P. @@ -7164,7 +7157,6 @@ Only animal besides a human that can get sunburn: a pig. Proportional to their weight, men are stronger than horses. It is believed that Shakespeare was 46 around the time that the King James Version of the Bible was written. In Psalms 46, the 46th word from the first word is "shake," and the 46th word from the last word is "spear." The parachute was invented by Leonardo da Vinci in 1515. -"Canada" is a Native American word meaning "big village." The symbol on the "pound" key (#) is called an octothorpe. A full 7% of the entire Irish barley crop goes to the production of Guinness beer. Tigers have striped skin, not just striped fur. @@ -7349,4 +7341,4 @@ The shortest commercial flight in the world is in Scotland. Dolphins have names for one another. The blob of toothpaste on a toothbrush has a name - a nurdle. One part of Istanbul is in Europe and the other is in Asia. -There are more than 1,000 types of bananas growing in the world. \ No newline at end of file +There are more than 1,000 types of bananas growing in the world. diff --git a/randfacts/unsafe.txt b/randfacts/unsafe.txt index 57de532..9699a7c 100644 --- a/randfacts/unsafe.txt +++ b/randfacts/unsafe.txt @@ -112,4 +112,4 @@ During the medieval times (circa 1400s), France had impotency trials. Which allo Dildos have been around for centuries. With the oldest potential dildo being 28,000 years old. Vibrators were created in the 19th century to reduce "hysteria" in women. Famous gangster, Al Capone, had undiagnosed syphillis until he went to prison. -According to a British law passed in 1845, attempting to commit suicide was a capital offense. Offenders could be hanged for trying. \ No newline at end of file +According to a British law passed in 1845, attempting to commit suicide was a capital offense. Offenders could be hanged for trying. diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 224a779..0000000 --- a/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[metadata] -description-file = README.md \ No newline at end of file diff --git a/setup.py b/setup.py deleted file mode 100644 index 1aaf2eb..0000000 --- a/setup.py +++ /dev/null @@ -1,43 +0,0 @@ -import pathlib -import setuptools - -here = pathlib.Path(__file__).parent.resolve() - -with open(here / "README.md", "r") as fh: - long_description = fh.read() - -about = {} -with open(here / "randfacts/__version__.py", "r") as f: - exec(f.read(), about) - -packages = ['randfacts'] - -setuptools.setup( - name=about["__title__"], - version=about["__version__"], - author=about["__author__"], - author_email=about["__author_email__"], - description=about["__description__"], - long_description=long_description, - long_description_content_type="text/markdown", - url=about["__url__"], - packages=packages, - package_dir={'randfacts': 'randfacts'}, - classifiers=[ - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Natural Language :: English", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers" - ], - python_requires='>=3.6', - include_package_data=True -) diff --git a/tests/checkduplicates.py b/tests/checkduplicates.py deleted file mode 100644 index 83c0737..0000000 --- a/tests/checkduplicates.py +++ /dev/null @@ -1,107 +0,0 @@ -from math import factorial -import itertools -import argparse -import pathlib -import sys - -from rapidfuzz import fuzz -from tqdm import tqdm - - -def partial_match(x_fact, y_fact, x_index, y_index): - if x_index == y_index: - # dont compare same facts - return None - - # compare facts - ratio = fuzz.token_sort_ratio(x_fact[0], y_fact[0]) - if ratio > 80: - # facts are most likely a match - return (x_fact, y_fact), (x_index, y_index), ratio - - # facts are most likely not a match, return none - return None - - -def number_of_combinations(number_of_items, choose_amount): - # calculate binomial coefficient - return factorial(number_of_items) / (factorial(choose_amount) * factorial(number_of_items - choose_amount)) - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument('--fix-duplicates', action='store_true', help='Remove duplicate facts') - args = parser.parse_args() - - # Get directory containing setup.py - parent = pathlib.Path(__file__).parents[1] - - # read safe.txt and unsafe.txt into lists - with open(parent / 'randfacts/safe.txt') as f: - safe = [(line.rstrip(), 'safe') for line in f.readlines()] - - with open(parent / 'randfacts/unsafe.txt') as f: - unsafe = [(line.rstrip(), 'unsafe') for line in f.readlines()] - - # Generate all possible pairs of the facts from safe.txt and unsafe.txt - # combined - print('Generating combinations...') - combinations = itertools.combinations(enumerate(safe + unsafe), 2) - - matches = [] - print() - # Iterate through all the combinations - with tqdm(total=int(number_of_combinations(len(safe + unsafe), 2))) as pbar: - for item in combinations: - - # Check if the two facts as similar enough to be flagged - match = partial_match(item[0][1], item[1][1], item[0][0], item[1][0]) - if match is not None: - # facts are similar enough, flag them - matches.append(match) - - # Update progress bar by 1 - pbar.update(1) - print() - - if matches: # there were flagged facts - if not args.fix_duplicates: # don't fix duplicate facts, just print them - print('\n'.join([str(match) for match in matches])) - print() - print('Number of similar facts: ' + str(len(matches))) - sys.exit(2) - else: - # iterate through matches and generate a list of indexes to remove - print('Generating list of indexes to remove...') - indexes_to_remove = [] - for match in matches: - print(match) - # keep unsafe facts over safe facts - if match[0][0][1] == 'unsafe': - indexes_to_remove.append(match[1][1]) - elif match[0][1][1] == 'unsafe': - indexes_to_remove.append(match[1][0]) - else: - indexes_to_remove.append(match[1][0]) - - # remove all indexes from combinations - print('Removing duplicates from facts...') - facts = safe + unsafe - for index in sorted(list(set(indexes_to_remove)), reverse=True): - # sort the list of indexes in reverse so that we don't have - # issues with the max index getting smaller as we delete things - del facts[index] - - # divide up the facts into their corresponding list - safe = [fact for fact, correct_list in facts if correct_list == 'safe'] - unsafe = [fact for fact, correct_list in facts if correct_list == 'unsafe'] - - # write the fixed facts back to the files - with open(parent / 'randfacts/safe.txt', 'w') as f: - f.write('\n'.join(safe)) - with open(parent / 'randfacts/unsafe.txt', 'w') as f: - f.write('\n'.join(unsafe)) - - -if __name__ == '__main__': - main() diff --git a/tests/checkduplicates/Cargo.lock b/tests/checkduplicates/Cargo.lock new file mode 100644 index 0000000..f4703fe --- /dev/null +++ b/tests/checkduplicates/Cargo.lock @@ -0,0 +1,449 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "anstream" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" + +[[package]] +name = "anstyle-parse" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "checkduplicates" +version = "0.1.0" +dependencies = [ + "clap", + "indicatif", + "num-integer", + "rayon", +] + +[[package]] +name = "clap" +version = "4.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e578d6ec4194633722ccf9544794b71b1385c3c027efe0c55db226fc880865c" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4df4df40ec50c46000231c914968278b1eb05098cf8f1b3a518a95030e71d1c7" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_lex" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + +[[package]] +name = "console" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" +dependencies = [ + "encode_unicode", + "lazy_static", + "libc", + "unicode-width 0.1.11", + "windows-sys", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" + +[[package]] +name = "either" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + +[[package]] +name = "indicatif" +version = "0.17.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbf675b85ed934d3c67b5c5469701eec7db22689d0a2139d856e0925fa28b281" +dependencies = [ + "console", + "number_prefix", + "portable-atomic", + "rayon", + "unicode-width 0.2.0", + "web-time", +] + +[[package]] +name = "js-sys" +version = "0.3.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +dependencies = [ + "autocfg", +] + +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "portable-atomic" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" + +[[package]] +name = "proc-macro2" +version = "1.0.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "syn" +version = "2.0.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" + +[[package]] +name = "unicode-width" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" + +[[package]] +name = "unicode-width" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" + +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + +[[package]] +name = "wasm-bindgen" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" diff --git a/tests/checkduplicates/Cargo.toml b/tests/checkduplicates/Cargo.toml new file mode 100644 index 0000000..6d75d7e --- /dev/null +++ b/tests/checkduplicates/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "checkduplicates" +version = "0.1.0" +edition = "2021" +authors = ["Connor Sample"] +description = "randfacts check duplicates test" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +clap = { version = "4.4.18", features = ["cargo"] } +indicatif = { version = "0.17.9", features = ["rayon"] } +num-integer = "0.1.46" +rayon = "1.10.0" + +[profile.release] +codegen-units = 1 +lto = true +opt-level = 3 +split-debuginfo = 'off' +incremental = false +panic = "abort" diff --git a/tests/checkduplicates/src/main.rs b/tests/checkduplicates/src/main.rs new file mode 100644 index 0000000..a7eef4b --- /dev/null +++ b/tests/checkduplicates/src/main.rs @@ -0,0 +1,233 @@ +//! A test for finding and managing duplicate facts across files. +//! Disclaimer: comments mostly generated by AI + +use clap::{command, Arg, ArgAction}; +use indicatif::{ParallelProgressIterator, ProgressBar, ProgressStyle}; +use rayon::iter::ParallelIterator; +use rayon::prelude::*; +use structures::{DuplicateFactMatch, FactClass, SIMILARITY_THRESHOLD}; + +mod structures; +mod util; + +/// Calculates the similarity ratio between two strings using a token sort approach. +/// +/// This function implements a modified version of token sort ratio that: +/// 1. Performs early exit optimization for strings with significantly different lengths +/// 2. Filters out non-alphanumeric characters +/// 3. Converts all characters to lowercase for comparison +/// +/// # Arguments +/// * `str1` - First string to compare +/// * `str2` - Second string to compare +/// +/// # Returns +/// A float between 0 and 100 representing the similarity percentage, +/// with a -5 offset to reduce false positives +#[inline(always)] +fn token_sort_ratio(str1: &str, str2: &str) -> f64 { + let len1 = str1.len(); + let len2 = str2.len(); + + // Early exit for obviously different strings + // if their lengths differ by more than half, they're most likely different enough + // this may lead to issues, but it lead to a ~23.33% performance improvement + if (len1 as f64 / len2 as f64) < 0.5 || (len2 as f64 / len1 as f64) < 0.5 { + return 0.0; + } + + // Preallocate vectors with capacity + let mut vec1 = Vec::with_capacity(len1); + let mut vec2 = Vec::with_capacity(len2); + + // Filter and collect characters in one pass + str1.chars() + .filter(|c| c.is_ascii_alphanumeric()) + .for_each(|c| vec1.push(c.to_ascii_lowercase())); + str2.chars() + .filter(|c| c.is_ascii_alphanumeric()) + .for_each(|c| vec2.push(c.to_ascii_lowercase())); + + // Calculate wagner fischer directly on character vectors + let dist = wagner_fischer_2row(&vec1, &vec2) as f64; + let maximum = vec1.len() + vec2.len(); + + if maximum == 0 { + return 0.0; + } + + // Convert distance to similarity ratio and subtract 5 to reduce false positives + (1.0 - (dist / maximum as f64)) * 100.0 - 5.0 +} + +/// Implements the Wagner-Fischer algorithm for calculating edit distance between two sequences, +/// optimized to use only two rows of memory. +/// +/// # Arguments +/// * `s1` - First sequence of characters +/// * `s2` - Second sequence of characters +/// +/// # Returns +/// The minimum number of single-character edits needed to transform one string into another +#[inline(always)] +fn wagner_fischer_2row(s1: &[char], s2: &[char]) -> usize { + // Ensure s1 is the shorter sequence for optimization + let (s1, s2) = if s1.len() < s2.len() { + (s1, s2) + } else { + (s2, s1) + }; + + let len1 = s1.len(); + let len2 = s2.len(); + + // handle empty string cases + if len1 == 0 { + return len2; + } + if len2 == 0 { + return len1; + } + + // Initialize two rows for the dynamic programming matrix + let mut prev_row = vec![0; len2 + 1]; + let mut curr_row = vec![0; len2 + 1]; + + // Initialize first row with incremental values + (0..=len2).for_each(|i| { + prev_row[i] = i; + }); + + // Fill the matrix using only two rows + for (i, c1) in s1.iter().enumerate() { + curr_row[0] = i + 1; + + for (j, c2) in s2.iter().enumerate() { + curr_row[j + 1] = if c1 == c2 { + // No edit needed + prev_row[j] + } else { + // Take minimum of three possible operations (insert, delete, substitute) + 1 + prev_row[j].min(prev_row[j + 1]).min(curr_row[j]) + }; + } + + // Swap rows using mem::swap for better performance + std::mem::swap(&mut prev_row, &mut curr_row); + } + + prev_row[len2] +} + +/// Finds duplicate facts across safe and unsafe fact files using parallel processing. +/// +/// This function: +/// 1. Loads facts from both safe.txt and unsafe.txt +/// 2. Generates all possible pairs of facts +/// 3. Calculates similarity ratios in parallel +/// 4. Returns matches above the similarity threshold +/// +/// # Returns +/// A vector of DuplicateFactMatch containing similar fact pairs and their similarity scores +fn find_duplicate_facts() -> Vec { + // read safe.txt and unsafe.txt into lists + let mut all_facts = util::load_fact_list("safe.txt", FactClass::Safe); + + let mut unsafe_contents = util::load_fact_list("unsafe.txt", FactClass::Unsafe); + + all_facts.append(&mut unsafe_contents); + + // Calculate total number of possible combinations for progress bar + let total_facts = all_facts.len() as u64; + let total_combinations = num_integer::binomial(total_facts as u64, 2); + + // Initialize progress bar with custom style + let pb = ProgressBar::new(total_combinations); + pb.set_style( + ProgressStyle::default_bar() + .template( + "{percent}% |{wide_bar}| {pos}/{len} [{elapsed_precise}<{eta_precise} {per_sec}]", + ) + .unwrap(), + ); + + // Generate all possible indices combinations + let indices: Vec<_> = (0..all_facts.len()) + .flat_map(|i| ((i + 1)..all_facts.len()).map(move |j| (i, j))) + .collect(); + + // Process combinations in parallel + indices + .into_par_iter() + .progress_with(pb) + .filter_map(|(i, j)| { + let facts = &all_facts; + let fact1 = &facts[i]; + let fact2 = &facts[j]; + + let ratio = token_sort_ratio(&fact1.fact, &fact2.fact); + if ratio > SIMILARITY_THRESHOLD { + Some((fact1.clone(), fact2.clone(), ratio)) + } else { + None + } + }) + .collect() +} + +fn main() { + let args = command!() + .arg( + Arg::new("fix_duplicates") + .long("fix-duplicates") + .action(ArgAction::SetTrue) + .help("Remove duplicate facts"), + ) + .get_matches(); + + let matches = find_duplicate_facts(); + + if !matches.is_empty() { + if !args.get_flag("fix_duplicates") { + println!("{:#?}", matches); + println!("\nNumber of similar facts: {}", matches.len()); + std::process::exit(1); + } + + // Fix mode: Remove duplicates + println!("Generating list of indicies to remove..."); + let mut indicies_to_remove = vec![]; + + // Determine which facts to remove, prioritizing keeping unsafe facts + for fact_match in matches { + println!("{:#?}", fact_match); + + // keep unsafe facts over safe facts + if fact_match.0.class == FactClass::Unsafe { + indicies_to_remove.push((fact_match.0.line_number, fact_match.0.class)); + } else { + // first fact isn't unsafe so we don't need to prioritize it + indicies_to_remove.push((fact_match.1.line_number, fact_match.1.class)); + } + } + + // Load current facts + let mut safe_facts = util::load_fact_list("safe.txt", FactClass::Safe); + let mut unsafe_facts = util::load_fact_list("unsafe.txt", FactClass::Unsafe); + + // sort removal indicies in reverse to maintain correct line numbers + indicies_to_remove.sort_unstable_by(|a, b| b.0.partial_cmp(&a.0).unwrap()); + + // Remove duplicates from respective files + for (index, class) in indicies_to_remove { + match class { + FactClass::Safe => safe_facts.remove(index), + FactClass::Unsafe => unsafe_facts.remove(index), + }; + } + + // Write updated facts back to files + util::write_facts_to_file("safe.txt", &safe_facts); + util::write_facts_to_file("unsafe.txt", &unsafe_facts); + } +} diff --git a/tests/checkduplicates/src/structures.rs b/tests/checkduplicates/src/structures.rs new file mode 100644 index 0000000..2d19c40 --- /dev/null +++ b/tests/checkduplicates/src/structures.rs @@ -0,0 +1,43 @@ +use std::{fmt, sync::Arc}; + +/// Type used for when a fact match is found +pub type DuplicateFactMatch = (Fact, Fact, f64); +/// Wagner-Fishcer similarity threshold +pub const SIMILARITY_THRESHOLD: f64 = 82.5; + +/// The classification of a Fact, safe or unsafe +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum FactClass { + Safe, + Unsafe, +} + +/// Struct holding information about a fact in a fact file +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Fact { + /// The fact text + pub fact: Arc, + /// The class of the fact (safe or unsafe) + pub class: FactClass, + /// The line number of the fact in it's respective file + pub line_number: usize, +} + +impl Fact { + pub fn new(fact: String, class: FactClass, line_number: usize) -> Self { + Self { + fact: Arc::new(fact), + class, + line_number, + } + } +} + +impl fmt::Display for FactClass { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + FactClass::Safe => write!(f, "Safe"), + FactClass::Unsafe => write!(f, "Unsafe"), + } + } +} diff --git a/tests/checkduplicates/src/util.rs b/tests/checkduplicates/src/util.rs new file mode 100644 index 0000000..77bdcba --- /dev/null +++ b/tests/checkduplicates/src/util.rs @@ -0,0 +1,67 @@ +use std::{ + fs::File, + io::{BufRead, BufReader, BufWriter, Write}, + path::PathBuf, + process::Command, +}; + +use crate::structures::{Fact, FactClass}; + +/// Get a file from the randfacts/ directory in the top level of the project +/// +/// # Arguments +/// +/// * `filename` - the filename to find. +/// +/// # Panics +/// +/// This function will panic if a file that doesn't exist is requested +fn get_project_path(filename: &str) -> PathBuf { + // get project's top level + let output = Command::new("git") + .args(["rev-parse", "--show-toplevel"]) + .output() + .expect("failed to execute git process"); + + if !output.status.success() { + panic!("Error: {}", String::from_utf8_lossy(&output.stderr)); + } + + let mut project_root: PathBuf = PathBuf::from(String::from_utf8(output.stdout).unwrap().trim()); + + project_root.push("randfacts"); + project_root.push(filename); + project_root +} + +/// Given an array of facts, write them separated with newlines to a file. +/// +/// # Arguments +/// +/// * `filename` - the filename in `randfacts/` to write to +/// * `facts` - The array of facts to write +pub fn write_facts_to_file(filename: &str, facts: &[Fact]) { + let file = File::create(get_project_path(filename)).expect("no such file"); + let mut writer = BufWriter::new(file); + + for fact in facts { + writeln!(writer, "{}", fact.fact).expect("error writing file"); + } +} + +/// Read facts from a file into a vector. +/// +/// # Arguments +/// +/// * `filename` - the file in `randfacts/` to read from +/// * `fact_class` - The class of the facts (safe or unsafe) +pub fn load_fact_list(filename: &str, fact_class: FactClass) -> Vec { + let file = File::open(get_project_path(filename)).expect("no such file"); + let buf = BufReader::new(file); + buf.lines() + .enumerate() + .map(|(line_number, line)| { + Fact::new(line.expect("Could not parse line"), fact_class, line_number) + }) + .collect() +} diff --git a/tests/fix_encoding.py b/tests/fix_encoding.py index 8666052..17808b2 100644 --- a/tests/fix_encoding.py +++ b/tests/fix_encoding.py @@ -1,26 +1,35 @@ -import os +"""Fixes common encoding errors that can get into the fact lists after web scraping.""" -parent = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) +from pathlib import Path -safe_path = os.path.join(parent, 'randfacts', 'safe.txt') -unsafe_path = os.path.join(parent, 'randfacts', 'unsafe.txt') +parent = Path(__file__).resolve().parents[1] -bad_characters = [("โ€˜", "'"), ("โ€™", "'"), ("โ€œ", '"'), ("โ€", '"'), ("โ€ฆ", "..."), ('โ€”', '-')] +safe_path = parent / "randfacts" / "safe.txt" +unsafe_path = parent / "randfacts" / "unsafe.txt" -with open(safe_path, encoding="utf-8") as f: +bad_characters = [ + ("โ€˜", "'"), # noqa: RUF001 + ("โ€™", "'"), # noqa: RUF001 + ("โ€œ", '"'), + ("โ€", '"'), + ("โ€ฆ", "..."), + ("โ€”", "-"), +] + +with safe_path.open("r+", encoding="utf-8") as f: safe = f.read() -for char in bad_characters: - safe = safe.replace(char[0], char[1]) + for char in bad_characters: + safe = safe.replace(char[0], char[1]) -with open(safe_path, "w") as f: + f.seek(0) f.write(safe) -with open(unsafe_path, encoding="utf-8") as f: +with unsafe_path.open("r+", encoding="utf-8") as f: unsafe = f.read() -for char in bad_characters: - unsafe = unsafe.replace(char[0], char[1]) + for char in bad_characters: + unsafe = unsafe.replace(char[0], char[1]) -with open(unsafe_path, "w") as f: - f.write(unsafe) \ No newline at end of file + f.seek(0) + f.write(unsafe) diff --git a/tests/test.py b/tests/test.py deleted file mode 100644 index 00ef8e5..0000000 --- a/tests/test.py +++ /dev/null @@ -1,45 +0,0 @@ -import unittest -import sys -import pathlib -import subprocess - -sys.path.insert(1, str(pathlib.Path(__file__).parents[1])) -from randfacts import randfacts # local randfacts instead of installed version - -class TestRandfacts(unittest.TestCase): - - def test_get_fact(self): - self.assertIsInstance(randfacts.get_fact(), str, 'get_fact() must return a string') - - def test_all_facts_list(self): - self.assertIsInstance(randfacts.all_facts, list, 'all_facts must be a list') - - def test_safe_facts_list(self): - self.assertIsInstance(randfacts.safe_facts, list, 'safe_facts must be a list') - - def test_unsafe_facts_list(self): - self.assertIsInstance(randfacts.unsafe_facts, list, 'unsafe_facts must be a list') - - def test_cli_no_args(self): - child = subprocess.Popen(['python3', '-m', 'randfacts'], stdout=subprocess.DEVNULL) - child.communicate() - self.assertEqual(child.returncode, 0, '`python3 -m randfacts` must return with exit code 0') - - def test_cli_unsafe_args(self): - child = subprocess.Popen(['python3', '-m', 'randfacts', '--unsafe'], stdout=subprocess.DEVNULL) - child.communicate() - self.assertEqual(child.returncode, 0, '`python3 -m randfacts --unsafe` must return with exit code 0') - - def test_cli_mixed_args(self): - child = subprocess.Popen(['python3', '-m', 'randfacts', '--mixed'], stdout=subprocess.DEVNULL) - child.communicate() - self.assertEqual(child.returncode, 0, '`python3 -m randfacts --mixed` must return with exit code 0') - - def test_invalid_characters(self): - bad_characters = ["โ€˜", "โ€™", "โ€œ", "โ€", "โ€ฆ", "โ€”"] - for index, fact in enumerate(randfacts.all_facts): - for char in bad_characters: - self.assertNotIn(char, fact, f'Index: {index}') - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/tests/test_general.py b/tests/test_general.py new file mode 100644 index 0000000..809f014 --- /dev/null +++ b/tests/test_general.py @@ -0,0 +1,121 @@ +"""General functionality unit tests.""" + +import pathlib +import subprocess +import sys + +import pytest + +sys.path.insert(1, str(pathlib.Path(__file__).parents[1])) +from randfacts import ( + getFact, + randfacts, # local randfacts instead of installed version +) + + +def test_get_fact() -> None: + """Make sure get_fact works without extra arguments.""" + assert isinstance(randfacts.get_fact(), str), "get_fact() must return a string" + + +def test_getFact_deprecated() -> None: # noqa: N802 + """Make sure getFact throws a deprecation warning.""" + with pytest.deprecated_call(): + _ = getFact() + + +def test_all_facts_list() -> None: + """Test that all_facts list is present in the module.""" + assert isinstance(randfacts.all_facts, list), "all_facts must be a list" + + +def test_safe_facts_list() -> None: + """Test that safe_facts list is present in the module.""" + assert isinstance(randfacts.safe_facts, list), "safe_facts must be a list" + + +def test_unsafe_facts_list() -> None: + """Test that unsafe_facts list is present in the module.""" + assert isinstance(randfacts.unsafe_facts, list), "unsafe_facts must be a list" + + +def test_cli_no_args() -> None: + """Test that a basic randfacts CLI call will work.""" + child = subprocess.Popen(["python3", "-m", "randfacts"], stdout=subprocess.DEVNULL) + child.communicate() + assert child.returncode == 0, "`python3 -m randfacts` must return with exit code 0" + + +def test_cli_script_installed() -> None: + """Test that the `randfacts` script is installed to the PATH.""" + child = subprocess.Popen(["randfacts"], stdout=subprocess.DEVNULL) + child.communicate() + assert child.returncode == 0, "`randfacts` must return with exit code 0" + + +def test_cli_unsafe_args() -> None: + """Test that CLI with --unsafe works.""" + child = subprocess.Popen( + ["python3", "-m", "randfacts", "--unsafe"], + stdout=subprocess.DEVNULL, + ) + child.communicate() + assert ( + child.returncode == 0 + ), "`python3 -m randfacts --unsafe` must return with exit code 0" + + +def test_cli_mixed_args() -> None: + """Test that CLI with --mixed works.""" + child = subprocess.Popen( + ["python3", "-m", "randfacts", "--mixed"], + stdout=subprocess.DEVNULL, + ) + child.communicate() + assert ( + child.returncode == 0 + ), "`python3 -m randfacts --mixed` must return with exit code 0" + + +def test_cli_version() -> None: + """Test that CLI with --version returns the correct version.""" + child = subprocess.Popen( + ["python3", "-m", "randfacts", "--version"], + stdout=subprocess.PIPE, + text=True, + ) + output, _ = child.communicate() + assert ( + output.strip() == randfacts.__version__ + ), f"`python3 -m randfacts --version` must return {randfacts.__version__}" + + +def test_main_entrypoint() -> None: + """Test the main entrypoint in randfacts.py.""" + # Path to the module or script you want to test + script_path = ( + pathlib.Path(__file__).resolve().parents[1] / "randfacts" / "randfacts.py" + ) + + # Run the script as a subprocess + result = subprocess.run( + ["python", str(script_path)], + capture_output=True, + text=True, + check=False, + ) + + # Assert the subprocess exits successfully + assert result.returncode == 0, f"Script failed with stderr: {result.stderr}" + + +@pytest.mark.parametrize("bad_char", ["โ€˜", "โ€™", "โ€œ", "โ€", "โ€ฆ", "โ€”"]) # noqa: RUF001 +def test_invalid_characters(bad_char: str) -> None: + """Make sure no invalid characters are present in the fact lists. + + If this test fails, try running `fix_encoding.py` + """ + for index, fact in enumerate(randfacts.all_facts): + assert ( + bad_char not in fact + ), f"Bad character '{bad_char}' found in fact at index {index}"