Source code for openrig.naming.types

"""Type definitions for the naming system.

Defines all types used internally by the naming package:

**Config types** (used by ``rules.py``):
    - ``RuleConfig``: raw deserialized configuration for a single token rule.
    - ``GlobalRules``: global constraints applied to every fully-built name.

**Token value types** (used by ``manager.py``):
    - ``TokenValue``: accepted raw input for a token before normalization.
    - ``TokenData``: canonical post-normalization form (always ``str`` values).

**Rule protocol** (used by ``manager.py``):
    - ``RuleValidator``: ``Protocol`` that every rule type must satisfy.

**Concrete rule implementations** (used by ``rules.py``, ``manager.py``):
    - ``RegexRule``: validates a token value against a regular expression.
    - ``ListRule``: validates a token value against a fixed set of allowed values.
    - ``CallableRule``: delegates validation to an arbitrary callable.
    - ``ConcreteRule``: union of all concrete rule types.
"""

import re
from dataclasses import dataclass, field
from enum import Enum
from typing import Protocol, runtime_checkable

# ---------------------------------------------------------------------------
# Config types
# ---------------------------------------------------------------------------


[docs] @dataclass class RuleConfig: """Raw deserialized configuration for a single token rule. Intermediate representation populated from ``naming/config.json``. ``rules.py`` converts each instance into a ``ConcreteRule``. Attributes: type: Rule type: ``"regex"``, ``"list"``, ``"from_enums"``, or ``"callable"``. value: Primary payload — pattern string, list of values, or dotted import path, depending on ``type``. sources: Enum class names to aggregate. Only for ``"from_enums"``. module: Dotted module path where ``sources`` are defined. Only for ``"from_enums"``. Defaults to ``"openrig.constants"``. """ type: str value: str | list[str] | None = None sources: list[str] | None = None module: str | None = None
[docs] @dataclass class GlobalRules: """Global constraints applied to every fully-built name. Attributes: max_length: Maximum allowed character length for any built name. forbidden_patterns: Substrings that must not appear in any built name. separator_rule: Optional ``RuleConfig`` constraining valid separators. """ max_length: int forbidden_patterns: list[str] = field(default_factory=list[str]) separator_rule: RuleConfig | None = None
# --------------------------------------------------------------------------- # Token value types # --------------------------------------------------------------------------- TokenValue = str | Enum TokenData = dict[str, str] # --------------------------------------------------------------------------- # Rule Protocol # ---------------------------------------------------------------------------
[docs] @runtime_checkable class RuleValidator(Protocol): """Protocol for all token-level validation rules. Any object that implements ``validate`` and ``to_regex_pattern`` satisfies this protocol. Example: >>> class MyRule: ... def validate(self, value: str) -> bool: ... return value.isalpha() ... def to_regex_pattern(self) -> str: ... return r"[a-zA-Z]+" """
[docs] def validate(self, value: str) -> bool: """Validates a normalized token value. Args: value: The normalized (post-normalization) string value to check. Returns: ``True`` if the value is acceptable, ``False`` otherwise. """ ...
[docs] def to_regex_pattern(self) -> str: """Returns a regex pattern representing the valid values for this rule. Used by the ``Manager`` to build the global matching regex without inspecting the rule's internal structure. Returns: A regex pattern string (without anchors ``^`` / ``$``). """ ...
# --------------------------------------------------------------------------- # Concrete rule implementations # ---------------------------------------------------------------------------
[docs] @dataclass(frozen=True) class RegexRule: """A rule that validates a token value against a regular expression. Attributes: pattern: The regex pattern string. Anchors ``^`` / ``$`` are optional; ``validate`` always performs a full match. Example: >>> rule = RegexRule(pattern=r"^[a-z][a-zA-Z0-9]*$") >>> rule.validate("armUpper") True >>> rule.validate("Arm Upper") False """ pattern: str
[docs] def __post_init__(self) -> None: """Validates that the pattern compiles correctly on construction.""" try: re.compile(self.pattern) except re.error as exc: raise ValueError( f"RegexRule received an invalid pattern '{self.pattern}': {exc}" ) from exc
[docs] def validate(self, value: str) -> bool: """Validates the value against the compiled regex pattern. Args: value: The normalized token value. Returns: ``True`` if the value matches the full pattern. """ return bool(re.fullmatch(self.pattern, value))
[docs] def to_regex_pattern(self) -> str: """Returns the raw pattern, stripping leading/trailing anchors. Returns: The pattern string ready for embedding in a larger regex. """ pattern = self.pattern if pattern.startswith("^"): pattern = pattern[1:] if pattern.endswith("$"): pattern = pattern[:-1] return pattern
[docs] @dataclass(frozen=True) class ListRule: """A rule that validates a token value against a fixed set of allowed values. Attributes: allowed: A frozenset of accepted string values. Example: >>> rule = ListRule(allowed=frozenset({"l", "r", "c", "m"})) >>> rule.validate("l") True >>> rule.validate("left") False """ allowed: frozenset[str]
[docs] def validate(self, value: str) -> bool: """Checks membership in the allowed set. Args: value: The normalized token value. Returns: ``True`` if ``value`` is in the allowed set. """ return value in self.allowed
[docs] def to_regex_pattern(self) -> str: """Returns an alternation pattern of all allowed values, sorted by length. Sorting by length descending prevents shorter alternatives from shadowing longer ones in the regex engine. Returns: A non-capturing alternation group, e.g. ``(?:left|right|l|r)``. """ options = sorted(self.allowed, key=len, reverse=True) escaped = [re.escape(opt) for opt in options if opt] if not escaped: return r"[^\s]+" return f"(?:{'|'.join(escaped)})"
[docs] @dataclass(frozen=True) class CallableRule: """A rule that delegates validation to an arbitrary callable. The callable must accept a single ``str`` argument and return ``bool``. Because callable rules are opaque to the regex engine, ``to_regex_pattern`` returns a catch-all pattern; validation must be performed separately via ``validate``. Attributes: func: The validation callable. Typed as ``object`` to avoid hashability issues; narrowed at call site. name: A human-readable label used in error messages and ``__repr__``. Example: >>> rule = CallableRule(func=str.isalpha, name="alpha_only") >>> rule.validate("arm") True >>> rule.validate("arm1") False """ func: object name: str
[docs] def validate(self, value: str) -> bool: """Calls the wrapped function with the value. Args: value: The normalized token value. Returns: ``True`` if the function returns a truthy result. Raises: TypeError: If the stored ``func`` is not callable. """ if not callable(self.func): raise TypeError( f"CallableRule '{self.name}' holds a non-callable: {self.func!r}" ) return bool(self.func(value))
[docs] def to_regex_pattern(self) -> str: r"""Returns a permissive catch-all pattern. Returns: A catch-all pattern ``[^\\s]+``. """ return r"[^\s]+"
ConcreteRule = RegexRule | ListRule | CallableRule