❇️ Support recently added "should_rename_legacy" in function detect (…

…legacy) (#262)
jawah · Jan 29, 2023 · b250116 · b250116
1 parent 6c5c17d
commit b250116
Show file tree

Hide file tree

Showing 2 changed files with 19 additions and 5 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
 ## [3.1.0-dev0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...master) (unreleased)
 
+### Added
+- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #261)
+
 ### Removed
 - Support for Python 3.6 (PR #260)
 

diff --git a/charset_normalizer/legacy.py b/charset_normalizer/legacy.py
@@ -1,10 +1,13 @@
-from typing import Dict, Optional, Union
+from typing import Any, Dict, Optional, Union
+from warnings import warn
 
 from .api import from_bytes
 from .constant import CHARDET_CORRESPONDENCE
 
 
-def detect(byte_str: bytes) -> Dict[str, Optional[Union[str, float]]]:
+def detect(
+    byte_str: bytes, should_rename_legacy: bool = False, **kwargs: Any
+) -> Dict[str, Optional[Union[str, float]]]:
     """
     chardet legacy method
     Detect the encoding of the given byte string. It should be mostly backward-compatible.
@@ -13,7 +16,14 @@ def detect(byte_str: bytes) -> Dict[str, Optional[Union[str, float]]]:
     further information. Not planned for removal.
 
     :param byte_str:     The byte sequence to examine.
+    :param should_rename_legacy:  Should we rename legacy encodings
+                                  to their more modern equivalents?
     """
+    if len(kwargs):
+        warn(
+            f"charset-normalizer disregard arguments '{','.join(list(kwargs.keys()))}' in legacy function detect()"
+        )
+
     if not isinstance(byte_str, (bytearray, bytes)):
         raise TypeError(  # pragma: nocover
             "Expected object of type bytes or bytearray, got: "
@@ -34,10 +44,11 @@ def detect(byte_str: bytes) -> Dict[str, Optional[Union[str, float]]]:
     if r is not None and encoding == "utf_8" and r.bom:
         encoding += "_sig"
 
+    if should_rename_legacy is False and encoding in CHARDET_CORRESPONDENCE:
+        encoding = CHARDET_CORRESPONDENCE[encoding]
+
     return {
-        "encoding": encoding
-        if encoding not in CHARDET_CORRESPONDENCE
-        else CHARDET_CORRESPONDENCE[encoding],
+        "encoding": encoding,
         "language": language,
         "confidence": confidence,
     }