Make SubFox production-ready with parallel translation and UI controls

2026-03-25 11:24:54 +00:00 · 2026-03-25 11:24:54 +00:00 · 2b1d05f02c
commit 2b1d05f02c
parent c40b8bed2b
6046 changed files with 798327 additions and 0 deletions
--- a/.venv/lib/python3.10/site-packages/charset_normalizer/md.py
+++ b/.venv/lib/python3.10/site-packages/charset_normalizer/md.py
@ -0,0 +1,936 @@
+from __future__ import annotations
+
+import sys
+from functools import lru_cache
+from logging import getLogger
+
+if sys.version_info >= (3, 8):
+    from typing import final
+else:
+    try:
+        from typing_extensions import final
+    except ImportError:
+
+        def final(cls):  # type: ignore[misc,no-untyped-def]
+            return cls
+
+
+from .constant import (
+    COMMON_CJK_CHARACTERS,
+    COMMON_SAFE_ASCII_CHARACTERS,
+    TRACE,
+    UNICODE_SECONDARY_RANGE_KEYWORD,
+    _ACCENTUATED,
+    _ARABIC,
+    _ARABIC_ISOLATED_FORM,
+    _CJK,
+    _HANGUL,
+    _HIRAGANA,
+    _KATAKANA,
+    _LATIN,
+    _THAI,
+)
+from .utils import (
+    _character_flags,
+    is_emoticon,
+    is_punctuation,
+    is_separator,
+    is_symbol,
+    remove_accent,
+    unicode_range,
+)
+
+# Combined bitmask for CJK/Hangul/Katakana/Hiragana/Thai glyph detection.
+_GLYPH_MASK: int = _CJK | _HANGUL | _KATAKANA | _HIRAGANA | _THAI
+
+
+@final
+class CharInfo:
+    """Pre-computed character properties shared across all detectors.
+
+    Instantiated once and reused via :meth:`update` on every character
+    in the hot loop so that redundant calls to str methods
+    (``isalpha``, ``isupper``, …) and cached utility functions
+    (``_character_flags``, ``is_punctuation``, …) are avoided when
+    several plugins need the same information.
+    """
+
+    __slots__ = (
+        "character",
+        "printable",
+        "alpha",
+        "upper",
+        "lower",
+        "space",
+        "digit",
+        "is_ascii",
+        "case_variable",
+        "flags",
+        "accentuated",
+        "latin",
+        "is_cjk",
+        "is_arabic",
+        "is_glyph",
+        "punct",
+        "sym",
+    )
+
+    def __init__(self) -> None:
+        self.character: str = ""
+        self.printable: bool = False
+        self.alpha: bool = False
+        self.upper: bool = False
+        self.lower: bool = False
+        self.space: bool = False
+        self.digit: bool = False
+        self.is_ascii: bool = False
+        self.case_variable: bool = False
+        self.flags: int = 0
+        self.accentuated: bool = False
+        self.latin: bool = False
+        self.is_cjk: bool = False
+        self.is_arabic: bool = False
+        self.is_glyph: bool = False
+        self.punct: bool = False
+        self.sym: bool = False
+
+    def update(self, character: str) -> None:
+        """Update all properties for *character* (called once per character)."""
+        self.character = character
+
+        # ASCII fast-path: for characters with ord < 128, we can skip
+        # _character_flags() entirely and derive most properties from ord.
+        o: int = ord(character)
+        if o < 128:
+            self.is_ascii = True
+            self.accentuated = False
+            self.is_cjk = False
+            self.is_arabic = False
+            self.is_glyph = False
+            # ASCII alpha: a-z (97-122) or A-Z (65-90)
+            if 65 <= o <= 90:
+                # Uppercase ASCII letter
+                self.alpha = True
+                self.upper = True
+                self.lower = False
+                self.space = False
+                self.digit = False
+                self.printable = True
+                self.case_variable = True
+                self.flags = _LATIN
+                self.latin = True
+                self.punct = False
+                self.sym = False
+            elif 97 <= o <= 122:
+                # Lowercase ASCII letter
+                self.alpha = True
+                self.upper = False
+                self.lower = True
+                self.space = False
+                self.digit = False
+                self.printable = True
+                self.case_variable = True
+                self.flags = _LATIN
+                self.latin = True
+                self.punct = False
+                self.sym = False
+            elif 48 <= o <= 57:
+                # ASCII digit 0-9
+                self.alpha = False
+                self.upper = False
+                self.lower = False
+                self.space = False
+                self.digit = True
+                self.printable = True
+                self.case_variable = False
+                self.flags = 0
+                self.latin = False
+                self.punct = False
+                self.sym = False
+            elif o == 32 or (9 <= o <= 13):
+                # Space, tab, newline, etc.
+                self.alpha = False
+                self.upper = False
+                self.lower = False
+                self.space = True
+                self.digit = False
+                self.printable = o == 32
+                self.case_variable = False
+                self.flags = 0
+                self.latin = False
+                self.punct = False
+                self.sym = False
+            else:
+                # Other ASCII (punctuation, symbols, control chars)
+                self.printable = character.isprintable()
+                self.alpha = False
+                self.upper = False
+                self.lower = False
+                self.space = False
+                self.digit = False
+                self.case_variable = False
+                self.flags = 0
+                self.latin = False
+                self.punct = is_punctuation(character) if self.printable else False
+                self.sym = is_symbol(character) if self.printable else False
+        else:
+            # Non-ASCII path
+            self.is_ascii = False
+            self.printable = character.isprintable()
+            self.alpha = character.isalpha()
+            self.upper = character.isupper()
+            self.lower = character.islower()
+            self.space = character.isspace()
+            self.digit = character.isdigit()
+            self.case_variable = self.lower != self.upper
+
+            # Flag-based classification (single unicodedata.name() call, lru-cached)
+            flags: int
+            if self.alpha:
+                flags = _character_flags(character)
+            else:
+                flags = 0
+            self.flags = flags
+            self.accentuated = bool(flags & _ACCENTUATED)
+            self.latin = bool(flags & _LATIN)
+            self.is_cjk = bool(flags & _CJK)
+            self.is_arabic = bool(flags & _ARABIC)
+            self.is_glyph = bool(flags & _GLYPH_MASK)
+
+            # Eagerly compute punct and sym (avoids property dispatch overhead
+            # on 300K+ accesses in the hot loop).
+            self.punct = is_punctuation(character) if self.printable else False
+            self.sym = is_symbol(character) if self.printable else False
+
+
+class MessDetectorPlugin:
+    """
+    Base abstract class used for mess detection plugins.
+    All detectors MUST extend and implement given methods.
+    """
+
+    __slots__ = ()
+
+    def feed_info(self, character: str, info: CharInfo) -> None:
+        """
+        The main routine to be executed upon character.
+        Insert the logic in witch the text would be considered chaotic.
+        """
+        raise NotImplementedError  # Defensive:
+
+    def reset(self) -> None:  # Defensive:
+        """
+        Permit to reset the plugin to the initial state.
+        """
+        raise NotImplementedError
+
+    @property
+    def ratio(self) -> float:
+        """
+        Compute the chaos ratio based on what your feed() has seen.
+        Must NOT be lower than 0.; No restriction gt 0.
+        """
+        raise NotImplementedError  # Defensive:
+
+
+@final
+class TooManySymbolOrPunctuationPlugin(MessDetectorPlugin):
+    __slots__ = (
+        "_punctuation_count",
+        "_symbol_count",
+        "_character_count",
+        "_last_printable_char",
+        "_frenzy_symbol_in_word",
+    )
+
+    def __init__(self) -> None:
+        self._punctuation_count: int = 0
+        self._symbol_count: int = 0
+        self._character_count: int = 0
+
+        self._last_printable_char: str | None = None
+        self._frenzy_symbol_in_word: bool = False
+
+    def feed_info(self, character: str, info: CharInfo) -> None:
+        """Optimized feed using pre-computed character info."""
+        self._character_count += 1
+
+        if (
+            character != self._last_printable_char
+            and character not in COMMON_SAFE_ASCII_CHARACTERS
+        ):
+            if info.punct:
+                self._punctuation_count += 1
+            elif not info.digit and info.sym and not is_emoticon(character):
+                self._symbol_count += 2
+
+        self._last_printable_char = character
+
+    def reset(self) -> None:  # Abstract
+        self._punctuation_count = 0
+        self._character_count = 0
+        self._symbol_count = 0
+
+    @property
+    def ratio(self) -> float:
+        if self._character_count == 0:
+            return 0.0
+
+        ratio_of_punctuation: float = (
+            self._punctuation_count + self._symbol_count
+        ) / self._character_count
+
+        return ratio_of_punctuation if ratio_of_punctuation >= 0.3 else 0.0
+
+
+@final
+class TooManyAccentuatedPlugin(MessDetectorPlugin):
+    __slots__ = ("_character_count", "_accentuated_count")
+
+    def __init__(self) -> None:
+        self._character_count: int = 0
+        self._accentuated_count: int = 0
+
+    def feed_info(self, character: str, info: CharInfo) -> None:
+        """Optimized feed using pre-computed character info."""
+        self._character_count += 1
+
+        if info.accentuated:
+            self._accentuated_count += 1
+
+    def reset(self) -> None:  # Abstract
+        self._character_count = 0
+        self._accentuated_count = 0
+
+    @property
+    def ratio(self) -> float:
+        if self._character_count < 8:
+            return 0.0
+
+        ratio_of_accentuation: float = self._accentuated_count / self._character_count
+        return ratio_of_accentuation if ratio_of_accentuation >= 0.35 else 0.0
+
+
+@final
+class UnprintablePlugin(MessDetectorPlugin):
+    __slots__ = ("_unprintable_count", "_character_count")
+
+    def __init__(self) -> None:
+        self._unprintable_count: int = 0
+        self._character_count: int = 0
+
+    def feed_info(self, character: str, info: CharInfo) -> None:
+        """Optimized feed using pre-computed character info."""
+        if (
+            not info.space
+            and not info.printable
+            and character != "\x1a"
+            and character != "\ufeff"
+        ):
+            self._unprintable_count += 1
+        self._character_count += 1
+
+    def reset(self) -> None:  # Abstract
+        self._unprintable_count = 0
+
+    @property
+    def ratio(self) -> float:
+        if self._character_count == 0:  # Defensive:
+            return 0.0
+
+        return (self._unprintable_count * 8) / self._character_count
+
+
+@final
+class SuspiciousDuplicateAccentPlugin(MessDetectorPlugin):
+    __slots__ = (
+        "_successive_count",
+        "_character_count",
+        "_last_latin_character",
+        "_last_was_accentuated",
+    )
+
+    def __init__(self) -> None:
+        self._successive_count: int = 0
+        self._character_count: int = 0
+
+        self._last_latin_character: str | None = None
+        self._last_was_accentuated: bool = False
+
+    def feed_info(self, character: str, info: CharInfo) -> None:
+        """Optimized feed using pre-computed character info."""
+        self._character_count += 1
+        if (
+            self._last_latin_character is not None
+            and info.accentuated
+            and self._last_was_accentuated
+        ):
+            if info.upper and self._last_latin_character.isupper():
+                self._successive_count += 1
+            if remove_accent(character) == remove_accent(self._last_latin_character):
+                self._successive_count += 1
+        self._last_latin_character = character
+        self._last_was_accentuated = info.accentuated
+
+    def reset(self) -> None:  # Abstract
+        self._successive_count = 0
+        self._character_count = 0
+        self._last_latin_character = None
+        self._last_was_accentuated = False
+
+    @property
+    def ratio(self) -> float:
+        if self._character_count == 0:
+            return 0.0
+
+        return (self._successive_count * 2) / self._character_count
+
+
+@final
+class SuspiciousRange(MessDetectorPlugin):
+    __slots__ = (
+        "_suspicious_successive_range_count",
+        "_character_count",
+        "_last_printable_seen",
+        "_last_printable_range",
+    )
+
+    def __init__(self) -> None:
+        self._suspicious_successive_range_count: int = 0
+        self._character_count: int = 0
+        self._last_printable_seen: str | None = None
+        self._last_printable_range: str | None = None
+
+    def feed_info(self, character: str, info: CharInfo) -> None:
+        """Optimized feed using pre-computed character info."""
+        self._character_count += 1
+
+        if info.space or info.punct or character in COMMON_SAFE_ASCII_CHARACTERS:
+            self._last_printable_seen = None
+            self._last_printable_range = None
+            return
+
+        if self._last_printable_seen is None:
+            self._last_printable_seen = character
+            self._last_printable_range = unicode_range(character)
+            return
+
+        unicode_range_a: str | None = self._last_printable_range
+        unicode_range_b: str | None = unicode_range(character)
+
+        if is_suspiciously_successive_range(unicode_range_a, unicode_range_b):
+            self._suspicious_successive_range_count += 1
+
+        self._last_printable_seen = character
+        self._last_printable_range = unicode_range_b
+
+    def reset(self) -> None:  # Abstract
+        self._character_count = 0
+        self._suspicious_successive_range_count = 0
+        self._last_printable_seen = None
+        self._last_printable_range = None
+
+    @property
+    def ratio(self) -> float:
+        if self._character_count <= 13:
+            return 0.0
+
+        ratio_of_suspicious_range_usage: float = (
+            self._suspicious_successive_range_count * 2
+        ) / self._character_count
+
+        return ratio_of_suspicious_range_usage
+
+
+@final
+class SuperWeirdWordPlugin(MessDetectorPlugin):
+    __slots__ = (
+        "_word_count",
+        "_bad_word_count",
+        "_foreign_long_count",
+        "_is_current_word_bad",
+        "_foreign_long_watch",
+        "_character_count",
+        "_bad_character_count",
+        "_buffer_length",
+        "_buffer_last_char",
+        "_buffer_last_char_accentuated",
+        "_buffer_accent_count",
+        "_buffer_glyph_count",
+        "_buffer_upper_count",
+    )
+
+    def __init__(self) -> None:
+        self._word_count: int = 0
+        self._bad_word_count: int = 0
+        self._foreign_long_count: int = 0
+
+        self._is_current_word_bad: bool = False
+        self._foreign_long_watch: bool = False
+
+        self._character_count: int = 0
+        self._bad_character_count: int = 0
+
+        self._buffer_length: int = 0
+        self._buffer_last_char: str | None = None
+        self._buffer_last_char_accentuated: bool = False
+        self._buffer_accent_count: int = 0
+        self._buffer_glyph_count: int = 0
+        self._buffer_upper_count: int = 0
+
+    def feed_info(self, character: str, info: CharInfo) -> None:
+        """Optimized feed using pre-computed character info."""
+        if info.alpha:
+            self._buffer_length += 1
+            self._buffer_last_char = character
+
+            if info.upper:
+                self._buffer_upper_count += 1
+
+            self._buffer_last_char_accentuated = info.accentuated
+
+            if info.accentuated:
+                self._buffer_accent_count += 1
+            if (
+                not self._foreign_long_watch
+                and (not info.latin or info.accentuated)
+                and not info.is_glyph
+            ):
+                self._foreign_long_watch = True
+            if info.is_glyph:
+                self._buffer_glyph_count += 1
+            return
+        if not self._buffer_length:
+            return
+        if info.space or info.punct or is_separator(character):
+            self._word_count += 1
+            buffer_length: int = self._buffer_length
+
+            self._character_count += buffer_length
+
+            if buffer_length >= 4:
+                if self._buffer_accent_count / buffer_length >= 0.5:
+                    self._is_current_word_bad = True
+                elif (
+                    self._buffer_last_char_accentuated
+                    and self._buffer_last_char.isupper()  # type: ignore[union-attr]
+                    and self._buffer_upper_count != buffer_length
+                ):
+                    self._foreign_long_count += 1
+                    self._is_current_word_bad = True
+                elif self._buffer_glyph_count == 1:
+                    self._is_current_word_bad = True
+                    self._foreign_long_count += 1
+            if buffer_length >= 24 and self._foreign_long_watch:
+                probable_camel_cased: bool = (
+                    self._buffer_upper_count > 0
+                    and self._buffer_upper_count / buffer_length <= 0.3
+                )
+
+                if not probable_camel_cased:
+                    self._foreign_long_count += 1
+                    self._is_current_word_bad = True
+
+            if self._is_current_word_bad:
+                self._bad_word_count += 1
+                self._bad_character_count += buffer_length
+                self._is_current_word_bad = False
+
+            self._foreign_long_watch = False
+            self._buffer_length = 0
+            self._buffer_last_char = None
+            self._buffer_last_char_accentuated = False
+            self._buffer_accent_count = 0
+            self._buffer_glyph_count = 0
+            self._buffer_upper_count = 0
+        elif (
+            character not in {"<", ">", "-", "=", "~", "|", "_"}
+            and not info.digit
+            and info.sym
+        ):
+            self._is_current_word_bad = True
+            self._buffer_length += 1
+            self._buffer_last_char = character
+            self._buffer_last_char_accentuated = False
+
+    def reset(self) -> None:  # Abstract
+        self._buffer_length = 0
+        self._buffer_last_char = None
+        self._buffer_last_char_accentuated = False
+        self._is_current_word_bad = False
+        self._foreign_long_watch = False
+        self._bad_word_count = 0
+        self._word_count = 0
+        self._character_count = 0
+        self._bad_character_count = 0
+        self._foreign_long_count = 0
+        self._buffer_accent_count = 0
+        self._buffer_glyph_count = 0
+        self._buffer_upper_count = 0
+
+    @property
+    def ratio(self) -> float:
+        if self._word_count <= 10 and self._foreign_long_count == 0:
+            return 0.0
+
+        return self._bad_character_count / self._character_count
+
+
+@final
+class CjkUncommonPlugin(MessDetectorPlugin):
+    """
+    Detect messy CJK text that probably means nothing.
+    """
+
+    __slots__ = ("_character_count", "_uncommon_count")
+
+    def __init__(self) -> None:
+        self._character_count: int = 0
+        self._uncommon_count: int = 0
+
+    def feed_info(self, character: str, info: CharInfo) -> None:
+        """Optimized feed using pre-computed character info."""
+        self._character_count += 1
+
+        if character not in COMMON_CJK_CHARACTERS:
+            self._uncommon_count += 1
+
+    def reset(self) -> None:  # Abstract
+        self._character_count = 0
+        self._uncommon_count = 0
+
+    @property
+    def ratio(self) -> float:
+        if self._character_count < 8:
+            return 0.0
+
+        uncommon_form_usage: float = self._uncommon_count / self._character_count
+
+        # we can be pretty sure it's garbage when uncommon characters are widely
+        # used. otherwise it could just be traditional chinese for example.
+        return uncommon_form_usage / 10 if uncommon_form_usage > 0.5 else 0.0
+
+
+@final
+class ArchaicUpperLowerPlugin(MessDetectorPlugin):
+    __slots__ = (
+        "_buf",
+        "_character_count_since_last_sep",
+        "_successive_upper_lower_count",
+        "_successive_upper_lower_count_final",
+        "_character_count",
+        "_last_alpha_seen",
+        "_last_alpha_seen_upper",
+        "_last_alpha_seen_lower",
+        "_current_ascii_only",
+    )
+
+    def __init__(self) -> None:
+        self._buf: bool = False
+
+        self._character_count_since_last_sep: int = 0
+
+        self._successive_upper_lower_count: int = 0
+        self._successive_upper_lower_count_final: int = 0
+
+        self._character_count: int = 0
+
+        self._last_alpha_seen: str | None = None
+        self._last_alpha_seen_upper: bool = False
+        self._last_alpha_seen_lower: bool = False
+        self._current_ascii_only: bool = True
+
+    def feed_info(self, character: str, info: CharInfo) -> None:
+        """Optimized feed using pre-computed character info."""
+        is_concerned: bool = info.alpha and info.case_variable
+        chunk_sep: bool = not is_concerned
+
+        if chunk_sep and self._character_count_since_last_sep > 0:
+            if (
+                self._character_count_since_last_sep <= 64
+                and not info.digit
+                and not self._current_ascii_only
+            ):
+                self._successive_upper_lower_count_final += (
+                    self._successive_upper_lower_count
+                )
+
+            self._successive_upper_lower_count = 0
+            self._character_count_since_last_sep = 0
+            self._last_alpha_seen = None
+            self._buf = False
+            self._character_count += 1
+            self._current_ascii_only = True
+
+            return
+
+        if self._current_ascii_only and not info.is_ascii:
+            self._current_ascii_only = False
+
+        if self._last_alpha_seen is not None:
+            if (info.upper and self._last_alpha_seen_lower) or (
+                info.lower and self._last_alpha_seen_upper
+            ):
+                if self._buf:
+                    self._successive_upper_lower_count += 2
+                    self._buf = False
+                else:
+                    self._buf = True
+            else:
+                self._buf = False
+
+        self._character_count += 1
+        self._character_count_since_last_sep += 1
+        self._last_alpha_seen = character
+        self._last_alpha_seen_upper = info.upper
+        self._last_alpha_seen_lower = info.lower
+
+    def reset(self) -> None:  # Abstract
+        self._character_count = 0
+        self._character_count_since_last_sep = 0
+        self._successive_upper_lower_count = 0
+        self._successive_upper_lower_count_final = 0
+        self._last_alpha_seen = None
+        self._last_alpha_seen_upper = False
+        self._last_alpha_seen_lower = False
+        self._buf = False
+        self._current_ascii_only = True
+
+    @property
+    def ratio(self) -> float:
+        if self._character_count == 0:  # Defensive:
+            return 0.0
+
+        return self._successive_upper_lower_count_final / self._character_count
+
+
+@final
+class ArabicIsolatedFormPlugin(MessDetectorPlugin):
+    __slots__ = ("_character_count", "_isolated_form_count")
+
+    def __init__(self) -> None:
+        self._character_count: int = 0
+        self._isolated_form_count: int = 0
+
+    def reset(self) -> None:  # Abstract
+        self._character_count = 0
+        self._isolated_form_count = 0
+
+    def feed_info(self, character: str, info: CharInfo) -> None:
+        """Optimized feed using pre-computed character info."""
+        self._character_count += 1
+
+        if info.flags & _ARABIC_ISOLATED_FORM:
+            self._isolated_form_count += 1
+
+    @property
+    def ratio(self) -> float:
+        if self._character_count < 8:
+            return 0.0
+
+        isolated_form_usage: float = self._isolated_form_count / self._character_count
+
+        return isolated_form_usage
+
+
+@lru_cache(maxsize=1024)
+def is_suspiciously_successive_range(
+    unicode_range_a: str | None, unicode_range_b: str | None
+) -> bool:
+    """
+    Determine if two Unicode range seen next to each other can be considered as suspicious.
+    """
+    if unicode_range_a is None or unicode_range_b is None:
+        return True
+
+    if unicode_range_a == unicode_range_b:
+        return False
+
+    if "Latin" in unicode_range_a and "Latin" in unicode_range_b:
+        return False
+
+    if "Emoticons" in unicode_range_a or "Emoticons" in unicode_range_b:
+        return False
+
+    # Latin characters can be accompanied with a combining diacritical mark
+    # eg. Vietnamese.
+    if ("Latin" in unicode_range_a or "Latin" in unicode_range_b) and (
+        "Combining" in unicode_range_a or "Combining" in unicode_range_b
+    ):
+        return False
+
+    keywords_range_a, keywords_range_b = (
+        unicode_range_a.split(" "),
+        unicode_range_b.split(" "),
+    )
+
+    for el in keywords_range_a:
+        if el in UNICODE_SECONDARY_RANGE_KEYWORD:
+            continue
+        if el in keywords_range_b:
+            return False
+
+    # Japanese Exception
+    range_a_jp_chars, range_b_jp_chars = (
+        unicode_range_a
+        in (
+            "Hiragana",
+            "Katakana",
+        ),
+        unicode_range_b in ("Hiragana", "Katakana"),
+    )
+    if (range_a_jp_chars or range_b_jp_chars) and (
+        "CJK" in unicode_range_a or "CJK" in unicode_range_b
+    ):
+        return False
+    if range_a_jp_chars and range_b_jp_chars:
+        return False
+
+    if "Hangul" in unicode_range_a or "Hangul" in unicode_range_b:
+        if "CJK" in unicode_range_a or "CJK" in unicode_range_b:
+            return False
+        if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin":
+            return False
+
+    # Chinese/Japanese use dedicated range for punctuation and/or separators.
+    if ("CJK" in unicode_range_a or "CJK" in unicode_range_b) or (
+        unicode_range_a in ["Katakana", "Hiragana"]
+        and unicode_range_b in ["Katakana", "Hiragana"]
+    ):
+        if "Punctuation" in unicode_range_a or "Punctuation" in unicode_range_b:
+            return False
+        if "Forms" in unicode_range_a or "Forms" in unicode_range_b:
+            return False
+        if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin":
+            return False
+
+    return True
+
+
+@lru_cache(maxsize=2048)
+def mess_ratio(
+    decoded_sequence: str, maximum_threshold: float = 0.2, debug: bool = False
+) -> float:
+    """
+    Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
+    """
+
+    seq_len: int = len(decoded_sequence)
+
+    if seq_len < 511:
+        step: int = 32
+    elif seq_len < 1024:
+        step = 64
+    else:
+        step = 128
+
+    # Create each detector as a named local variable (unrolled from the generic loop).
+    # This eliminates per-character iteration over the detector list and
+    # per-character eligible() virtual dispatch, while keeping every plugin class
+    # intact and fully readable.
+    d_sp: TooManySymbolOrPunctuationPlugin = TooManySymbolOrPunctuationPlugin()
+    d_ta: TooManyAccentuatedPlugin = TooManyAccentuatedPlugin()
+    d_up: UnprintablePlugin = UnprintablePlugin()
+    d_sda: SuspiciousDuplicateAccentPlugin = SuspiciousDuplicateAccentPlugin()
+    d_sr: SuspiciousRange = SuspiciousRange()
+    d_sw: SuperWeirdWordPlugin = SuperWeirdWordPlugin()
+    d_cu: CjkUncommonPlugin = CjkUncommonPlugin()
+    d_au: ArchaicUpperLowerPlugin = ArchaicUpperLowerPlugin()
+    d_ai: ArabicIsolatedFormPlugin = ArabicIsolatedFormPlugin()
+
+    # Local references for feed_info methods called in the hot loop.
+    d_sp_feed = d_sp.feed_info
+    d_ta_feed = d_ta.feed_info
+    d_up_feed = d_up.feed_info
+    d_sda_feed = d_sda.feed_info
+    d_sr_feed = d_sr.feed_info
+    d_sw_feed = d_sw.feed_info
+    d_cu_feed = d_cu.feed_info
+    d_au_feed = d_au.feed_info
+    d_ai_feed = d_ai.feed_info
+
+    # Single reusable CharInfo object (avoids per-character allocation).
+    info: CharInfo = CharInfo()
+    info_update = info.update
+
+    mean_mess_ratio: float
+
+    for block_start in range(0, seq_len, step):
+        for character in decoded_sequence[block_start : block_start + step]:
+            # Pre-compute all character properties once (shared across all plugins).
+            info_update(character)
+
+            # Detectors with eligible() == always True
+            d_up_feed(character, info)
+            d_sw_feed(character, info)
+            d_au_feed(character, info)
+
+            # Detectors with eligible() == isprintable
+            if info.printable:
+                d_sp_feed(character, info)
+                d_sr_feed(character, info)
+
+            # Detectors with eligible() == isalpha
+            if info.alpha:
+                d_ta_feed(character, info)
+                # SuspiciousDuplicateAccent: isalpha() and is_latin()
+                if info.latin:
+                    d_sda_feed(character, info)
+                # CjkUncommon: is_cjk()
+                if info.is_cjk:
+                    d_cu_feed(character, info)
+                # ArabicIsolatedForm: is_arabic()
+                if info.is_arabic:
+                    d_ai_feed(character, info)
+
+        mean_mess_ratio = (
+            d_sp.ratio
+            + d_ta.ratio
+            + d_up.ratio
+            + d_sda.ratio
+            + d_sr.ratio
+            + d_sw.ratio
+            + d_cu.ratio
+            + d_au.ratio
+            + d_ai.ratio
+        )
+
+        if mean_mess_ratio >= maximum_threshold:
+            break
+    else:
+        # Flush last word buffer in SuperWeirdWordPlugin via trailing newline.
+        info_update("\n")
+        d_sw_feed("\n", info)
+        d_au_feed("\n", info)
+        d_up_feed("\n", info)
+
+        mean_mess_ratio = (
+            d_sp.ratio
+            + d_ta.ratio
+            + d_up.ratio
+            + d_sda.ratio
+            + d_sr.ratio
+            + d_sw.ratio
+            + d_cu.ratio
+            + d_au.ratio
+            + d_ai.ratio
+        )
+
+    if debug:  # Defensive:
+        logger = getLogger("charset_normalizer")
+
+        logger.log(
+            TRACE,
+            "Mess-detector extended-analysis start. "
+            f"intermediary_mean_mess_ratio_calc={step} mean_mess_ratio={mean_mess_ratio} "
+            f"maximum_threshold={maximum_threshold}",
+        )
+
+        if seq_len > 16:
+            logger.log(TRACE, f"Starting with: {decoded_sequence[:16]}")
+            logger.log(TRACE, f"Ending with: {decoded_sequence[-16::]}")
+
+        for dt in [d_sp, d_ta, d_up, d_sda, d_sr, d_sw, d_cu, d_au, d_ai]:
+            logger.log(TRACE, f"{dt.__class__}: {dt.ratio}")
+
+    return round(mean_mess_ratio, 3)