From 94beb1e678015852a982d737099d3a99148b2fc2 Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Mon, 9 Oct 2023 13:52:29 +0200 Subject: [PATCH] add pad argument --- HISTORY.md | 5 +++++ docs/conf.py | 2 +- setup.py | 4 ++-- src/Levenshtein/__init__.py | 10 +++++++--- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index b53f197..0b7ebb3 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,10 @@ ## Changelog +### v0.23.0 +#### Changed +- added keyword argument `pad` to Hamming distance. This controls whether sequences of different + length should be padded or lead to a `ValueError` + ### v0.22.0 #### Changed - add support for Python 3.12 diff --git a/docs/conf.py b/docs/conf.py index 1710281..4d96f77 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,7 +22,7 @@ author = 'Max Bachmann' # The full version, including alpha/beta/rc tags -release = '0.22.0' +release = '0.23.0' # -- General configuration --------------------------------------------------- diff --git a/setup.py b/setup.py index 992f39b..61b7862 100644 --- a/setup.py +++ b/setup.py @@ -5,10 +5,10 @@ setup( name="Levenshtein", - version="0.22.0", + version="0.23.0", url="https://github.com/maxbachmann/Levenshtein", author="Max Bachmann", - install_requires=["rapidfuzz >= 2.3.0, < 4.0.0"], + install_requires=["rapidfuzz >= 3.1.0, < 4.0.0"], author_email="contact@maxbachmann.de", description="Python extension for computing string edit distances and similarities.", long_description=readme, diff --git a/src/Levenshtein/__init__.py b/src/Levenshtein/__init__.py index 212d467..064c9d0 100644 --- a/src/Levenshtein/__init__.py +++ b/src/Levenshtein/__init__.py @@ -16,7 +16,7 @@ __author__: str = "Max Bachmann" __license__: str = "GPL" -__version__: str = "0.22.0" +__version__: str = "0.23.0" import rapidfuzz.distance.Levenshtein as _Levenshtein import rapidfuzz.distance.Indel as _Indel @@ -166,7 +166,7 @@ def ratio(s1, s2, *, processor=None, score_cutoff=None): ) -def hamming(s1, s2, *, processor=None, score_cutoff=None): +def hamming(s1, s2, *, pad=True, processor=None, score_cutoff=None): """ Calculates the Hamming distance between two strings. The hamming distance is defined as the number of positions @@ -179,6 +179,10 @@ def hamming(s1, s2, *, processor=None, score_cutoff=None): First string to compare. s2 : Sequence[Hashable] Second string to compare. + pad : bool, optional + should strings be padded if there is a length difference. + If pad is False and strings have a different length + a ValueError is thrown instead. Default is True. processor: callable, optional Optional callable that is used to preprocess the strings before comparing them. Default is None, which deactivates this behaviour. @@ -198,7 +202,7 @@ def hamming(s1, s2, *, processor=None, score_cutoff=None): ValueError If s1 and s2 have a different length """ - return _Hamming.distance(s1, s2, processor=processor, score_cutoff=score_cutoff) + return _Hamming.distance(s1, s2, pad=pad, processor=processor, score_cutoff=score_cutoff) def jaro(s1, s2, *, processor=None, score_cutoff=None) -> float: