"""Type definitions for the naming system.
Defines all types used internally by the naming package:
**Config types** (used by ``rules.py``):
- ``RuleConfig``: raw deserialized configuration for a single token rule.
- ``GlobalRules``: global constraints applied to every fully-built name.
**Token value types** (used by ``manager.py``):
- ``TokenValue``: accepted raw input for a token before normalization.
- ``TokenData``: canonical post-normalization form (always ``str`` values).
**Rule protocol** (used by ``manager.py``):
- ``RuleValidator``: ``Protocol`` that every rule type must satisfy.
**Concrete rule implementations** (used by ``rules.py``, ``manager.py``):
- ``RegexRule``: validates a token value against a regular expression.
- ``ListRule``: validates a token value against a fixed set of allowed values.
- ``CallableRule``: delegates validation to an arbitrary callable.
- ``ConcreteRule``: union of all concrete rule types.
"""
import re
from dataclasses import dataclass, field
from enum import Enum
from typing import Protocol, runtime_checkable
# ---------------------------------------------------------------------------
# Config types
# ---------------------------------------------------------------------------
[docs]
@dataclass
class RuleConfig:
"""Raw deserialized configuration for a single token rule.
Intermediate representation populated from ``naming/config.json``.
``rules.py`` converts each instance into a ``ConcreteRule``.
Attributes:
type: Rule type: ``"regex"``, ``"list"``, ``"from_enums"``,
or ``"callable"``.
value: Primary payload — pattern string, list of values, or
dotted import path, depending on ``type``.
sources: Enum class names to aggregate. Only for ``"from_enums"``.
module: Dotted module path where ``sources`` are defined.
Only for ``"from_enums"``. Defaults to ``"openrig.constants"``.
"""
type: str
value: str | list[str] | None = None
sources: list[str] | None = None
module: str | None = None
[docs]
@dataclass
class GlobalRules:
"""Global constraints applied to every fully-built name.
Attributes:
max_length: Maximum allowed character length for any built name.
forbidden_patterns: Substrings that must not appear in any built name.
separator_rule: Optional ``RuleConfig`` constraining valid separators.
"""
max_length: int
forbidden_patterns: list[str] = field(default_factory=list[str])
separator_rule: RuleConfig | None = None
# ---------------------------------------------------------------------------
# Token value types
# ---------------------------------------------------------------------------
TokenValue = str | Enum
TokenData = dict[str, str]
# ---------------------------------------------------------------------------
# Rule Protocol
# ---------------------------------------------------------------------------
[docs]
@runtime_checkable
class RuleValidator(Protocol):
"""Protocol for all token-level validation rules.
Any object that implements ``validate`` and ``to_regex_pattern``
satisfies this protocol.
Example:
>>> class MyRule:
... def validate(self, value: str) -> bool:
... return value.isalpha()
... def to_regex_pattern(self) -> str:
... return r"[a-zA-Z]+"
"""
[docs]
def validate(self, value: str) -> bool:
"""Validates a normalized token value.
Args:
value: The normalized (post-normalization) string value to check.
Returns:
``True`` if the value is acceptable, ``False`` otherwise.
"""
...
[docs]
def to_regex_pattern(self) -> str:
"""Returns a regex pattern representing the valid values for this rule.
Used by the ``Manager`` to build the global matching regex without
inspecting the rule's internal structure.
Returns:
A regex pattern string (without anchors ``^`` / ``$``).
"""
...
# ---------------------------------------------------------------------------
# Concrete rule implementations
# ---------------------------------------------------------------------------
[docs]
@dataclass(frozen=True)
class RegexRule:
"""A rule that validates a token value against a regular expression.
Attributes:
pattern: The regex pattern string. Anchors ``^`` / ``$`` are
optional; ``validate`` always performs a full match.
Example:
>>> rule = RegexRule(pattern=r"^[a-z][a-zA-Z0-9]*$")
>>> rule.validate("armUpper")
True
>>> rule.validate("Arm Upper")
False
"""
pattern: str
[docs]
def __post_init__(self) -> None:
"""Validates that the pattern compiles correctly on construction."""
try:
re.compile(self.pattern)
except re.error as exc:
raise ValueError(
f"RegexRule received an invalid pattern '{self.pattern}': {exc}"
) from exc
[docs]
def validate(self, value: str) -> bool:
"""Validates the value against the compiled regex pattern.
Args:
value: The normalized token value.
Returns:
``True`` if the value matches the full pattern.
"""
return bool(re.fullmatch(self.pattern, value))
[docs]
def to_regex_pattern(self) -> str:
"""Returns the raw pattern, stripping leading/trailing anchors.
Returns:
The pattern string ready for embedding in a larger regex.
"""
pattern = self.pattern
if pattern.startswith("^"):
pattern = pattern[1:]
if pattern.endswith("$"):
pattern = pattern[:-1]
return pattern
[docs]
@dataclass(frozen=True)
class ListRule:
"""A rule that validates a token value against a fixed set of allowed values.
Attributes:
allowed: A frozenset of accepted string values.
Example:
>>> rule = ListRule(allowed=frozenset({"l", "r", "c", "m"}))
>>> rule.validate("l")
True
>>> rule.validate("left")
False
"""
allowed: frozenset[str]
[docs]
def validate(self, value: str) -> bool:
"""Checks membership in the allowed set.
Args:
value: The normalized token value.
Returns:
``True`` if ``value`` is in the allowed set.
"""
return value in self.allowed
[docs]
def to_regex_pattern(self) -> str:
"""Returns an alternation pattern of all allowed values, sorted by length.
Sorting by length descending prevents shorter alternatives from
shadowing longer ones in the regex engine.
Returns:
A non-capturing alternation group, e.g. ``(?:left|right|l|r)``.
"""
options = sorted(self.allowed, key=len, reverse=True)
escaped = [re.escape(opt) for opt in options if opt]
if not escaped:
return r"[^\s]+"
return f"(?:{'|'.join(escaped)})"
[docs]
@dataclass(frozen=True)
class CallableRule:
"""A rule that delegates validation to an arbitrary callable.
The callable must accept a single ``str`` argument and return ``bool``.
Because callable rules are opaque to the regex engine,
``to_regex_pattern`` returns a catch-all pattern; validation must be
performed separately via ``validate``.
Attributes:
func: The validation callable. Typed as ``object`` to avoid
hashability issues; narrowed at call site.
name: A human-readable label used in error messages and ``__repr__``.
Example:
>>> rule = CallableRule(func=str.isalpha, name="alpha_only")
>>> rule.validate("arm")
True
>>> rule.validate("arm1")
False
"""
func: object
name: str
[docs]
def validate(self, value: str) -> bool:
"""Calls the wrapped function with the value.
Args:
value: The normalized token value.
Returns:
``True`` if the function returns a truthy result.
Raises:
TypeError: If the stored ``func`` is not callable.
"""
if not callable(self.func):
raise TypeError(
f"CallableRule '{self.name}' holds a non-callable: {self.func!r}"
)
return bool(self.func(value))
[docs]
def to_regex_pattern(self) -> str:
r"""Returns a permissive catch-all pattern.
Returns:
A catch-all pattern ``[^\\s]+``.
"""
return r"[^\s]+"
ConcreteRule = RegexRule | ListRule | CallableRule