Source code for openrig.naming.manager

"""Module for managing naming conventions in rigging.

This module provides the ``Manager`` class to validate, parse, and build
strings based on configurable naming rules and tokens.

The key design decisions that enable strict typing throughout:

- Token values flowing **in** (raw input from callers) are typed as
  ``TokenValue = Union[str, Enum]``, defined in ``naming.types``.
- Token values flowing **internally** (after normalization) are always
  plain ``str``, represented as ``TokenData = dict[str, str]``.
- Rules are ``ConcreteRule`` instances (``RegexRule``, ``ListRule``,
  ``CallableRule``) that expose a uniform ``validate`` / ``to_regex_pattern``
  interface, eliminating all ``isinstance`` branching over rule types.
- Global rules are stored as a typed ``GlobalRules`` dataclass instead of
  ``dict[str, Any]``, so attribute access is fully type-safe.
- Serialization uses a ``ManagerConfig`` ``TypedDict`` so ``from_dict`` /
  ``to_dict`` round-trips are fully typed without ``Any``.
"""

from __future__ import annotations

import re
from enum import Enum
from typing import Sequence, TypedDict

from openrig.naming.types import (
    ConcreteRule,
    GlobalRules,
    TokenData,
    TokenValue,
)

from .normalizers import Normalizer

# ---------------------------------------------------------------------------
# Exceptions
# ---------------------------------------------------------------------------


[docs] class NamingError(Exception): """Base exception for naming-related errors."""
[docs] class NamingConfigError(NamingError): """Raised when the Manager is configured with invalid parameters."""
[docs] class NamingValidationError(NamingError): """Raised when a name or token value fails validation."""
# --------------------------------------------------------------------------- # Serialization contract # ---------------------------------------------------------------------------
[docs] class ManagerConfig(TypedDict, total=False): """Typed dictionary representing the serializable state of a ``Manager``. Used by ``Manager.from_dict`` and ``Manager.to_dict`` to provide a fully typed round-trip without resorting to ``dict[str, Any]``. Attributes: tokens: Ordered list of token name strings. separator: The separator character. rules: Mapping of token name to its ``ConcreteRule``. normalizers: Mapping of token name to its normalizer callable. global_rules: The global naming constraints. """ tokens: list[str] separator: str rules: dict[str, ConcreteRule] normalizers: dict[str, Normalizer] global_rules: GlobalRules
# --------------------------------------------------------------------------- # Manager # ---------------------------------------------------------------------------
[docs] class Manager: """Manages naming conventions by validating and operating on strings. Builds, parses, and validates names according to a configurable token structure. All token values are normalized to plain strings before validation, so callers may pass raw ``str`` or ``Enum`` members. Attributes: tokens: Ordered list of token name strings. separator: Character used to join token values. rules: Mapping of token name to its ``ConcreteRule``. normalizers: Mapping of token name to its normalizer callable. global_rules: Global constraints applied to every built name. Example: >>> from openrig.naming.types import RegexRule, ListRule, GlobalRules >>> manager = Manager( ... tokens=["descriptor", "side", "usage"], ... separator="_", ... rules={ ... "descriptor": RegexRule(pattern=r"^[a-z][a-zA-Z0-9]*$"), ... "side": ListRule(allowed=frozenset({"l", "r", "c"})), ... }, ... global_rules=GlobalRules(max_length=80), ... ) >>> manager.build_name(descriptor="arm", side="l", usage="jnt") 'arm_l_jnt' """ tokens: list[str] separator: str rules: dict[str, ConcreteRule] normalizers: dict[str, Normalizer] global_rules: GlobalRules
[docs] def __init__( self, tokens: Sequence[str] | None = None, separator: str | None = None, rules: dict[str, ConcreteRule] | None = None, normalizers: dict[str, Normalizer] | None = None, global_rules: GlobalRules | None = None, ) -> None: """Initializes the naming Manager. Args: tokens: Ordered sequence of token name strings. separator: Character used to join token values. Must not be empty. rules: Mapping of token name to its ``ConcreteRule``. Tokens without a rule accept any value. normalizers: Mapping of token name to its normalizer callable. Normalizers are applied before validation. global_rules: Global constraints (max length, forbidden patterns). Defaults to an unconstrained ``GlobalRules`` instance. Raises: NamingConfigError: If ``separator`` is ``None`` or empty. """ self.tokens = list(tokens or []) if not separator: raise NamingConfigError("Separator must be a non-empty string.") self.separator = separator self.rules = dict(rules or {}) self.normalizers = dict(normalizers or {}) self.global_rules = global_rules or GlobalRules(max_length=0) self._regex_cache: dict[tuple[bool, bool, bool], str] = {}
[docs] def __repr__(self) -> str: """Returns a developer-friendly string representation.""" return ( f"<{self.__class__.__name__} " f"tokens={self.tokens} separator={self.separator!r}>" )
# ----------------------------------------------------------------------- # Serialization # -----------------------------------------------------------------------
[docs] @classmethod def from_dict(cls, data: ManagerConfig) -> Manager: """Creates a ``Manager`` instance from a ``ManagerConfig`` dictionary. Args: data: A ``ManagerConfig`` typed dict containing the configuration. Returns: A new ``Manager`` instance. """ return cls( tokens=data.get("tokens"), separator=data.get("separator"), rules=data.get("rules"), normalizers=data.get("normalizers"), global_rules=data.get("global_rules"), )
[docs] def to_dict(self) -> ManagerConfig: """Exports the current configuration as a ``ManagerConfig`` dictionary. Returns: A ``ManagerConfig`` typed dict representing the current state. """ return ManagerConfig( tokens=self.tokens, separator=self.separator, rules=self.rules, normalizers=self.normalizers, global_rules=self.global_rules, )
# ----------------------------------------------------------------------- # Internal helpers # ----------------------------------------------------------------------- def _join_tokens(self, data: TokenData) -> str: """Joins normalized token values into a name string. Skips tokens whose value is an empty string. Args: data: A ``TokenData`` mapping of token name → normalized value. Returns: The assembled name string. """ parts = [data[token] for token in self.tokens if data.get(token)] return self.separator.join(parts) def _normalize_token_value(self, token: str, value: TokenValue) -> str: """Normalizes a raw token value to a plain string. Processing order: 1. Extract ``.value`` from ``Enum`` members. 2. Apply the token's registered normalizer (if any). 3. Convert to ``str`` and strip surrounding whitespace. Args: token: The token name. value: The raw input value (``str`` or ``Enum``). Returns: The normalized string value. Returns ``""`` for empty inputs. """ # Step 1: unwrap Enum — isinstance narrows the type so Pylance # knows value.value is str after this branch. raw: str = str(value.value) if isinstance(value, Enum) else value if not raw: return "" # Step 2: apply registered normalizer normalizer = self.normalizers.get(token) if normalizer is not None: raw = normalizer(raw) # Step 3: final string coercion + strip return str(raw).strip() # ----------------------------------------------------------------------- # Validators # -----------------------------------------------------------------------
[docs] def is_valid(self, name: str) -> bool: """Returns ``True`` if ``name`` is fully valid against all tokens. A name is fully valid when it contains a non-empty, rule-conforming value for every token defined in the convention. For partial validation use ``get_data`` and inspect individual token values. Args: name: The candidate name string. Returns: ``True`` if the name is strictly valid, ``False`` otherwise. """ if not name: return False regex = self.get_matching_regex(full_match=True, strict=True) if not re.match(regex, name): return False # Second pass: callable rules cannot be fully represented as regex, # so we validate each extracted token value explicitly. try: data = self.get_data(name) return all( self.is_valid_token(token, value) for token, value in data.items() if value ) except Exception: return False
[docs] def is_valid_token(self, token: str, value: str) -> bool: """Returns ``True`` if ``value`` is valid for the given token. Checks that the value does not contain the separator, then delegates to the token's ``ConcreteRule.validate`` method. Args: token: The token name. value: The normalized (post-normalization) string value. Returns: ``True`` if valid, ``False`` otherwise. """ if self.separator in value: return False rule = self.rules.get(token) if rule is None: return True return rule.validate(value)
# ----------------------------------------------------------------------- # Getters # -----------------------------------------------------------------------
[docs] def get_data(self, name: str) -> TokenData: """Extracts token values from a name string using the matching regex. Args: name: The name string to parse. Returns: A ``TokenData`` dict mapping every token name to its extracted value, or ``""`` for tokens not present in the name. """ data: TokenData = {token: "" for token in self.tokens} regex = self.get_matching_regex(capture_groups=True, full_match=True) match = re.match(regex, name) if match: data.update({k: v for k, v in match.groupdict().items() if v is not None}) return data
[docs] def parse(self, name: str) -> TokenData | str: """Attempts to parse a name into its constituent token values. Tries in order: 1. Regex-based extraction (strict structural match). 2. Split-based extraction (loose positional match). 3. Returns the original string if both fail. Args: name: The name string to parse. Returns: A ``TokenData`` dict if parsing succeeds, or the original ``str`` if the name does not conform to the convention. """ if not name: return name # 1. Regex extraction data = self.get_data(name) if any(data.values()): return data # 2. Split extraction parts = name.split(self.separator) if len(parts) <= len(self.tokens): temp_data: TokenData = {token: "" for token in self.tokens} for i, part in enumerate(parts): token = self.tokens[i] if self.is_valid_token(token, part): temp_data[token] = part else: return name return temp_data return name
[docs] def get_token_value(self, name: str, token_name: str) -> str: """Returns the value of a specific token extracted from a name. Args: name: The name string to extract from. token_name: The name of the token to retrieve. Returns: The extracted token value, or ``""`` if not present. Raises: NamingValidationError: If ``token_name`` is not defined in this manager's token list. """ if token_name not in self.tokens: raise NamingValidationError( f"Token '{token_name}' is not defined. Available tokens: {self.tokens}." ) return self.get_data(name)[token_name]
[docs] def get_errors(self, name: str) -> list[str]: """Returns a list of human-readable validation errors for a name. Args: name: The name string to validate. Returns: A list of error message strings. Empty if the name is valid. """ errors: list[str] = [] if not name: errors.append("Name must be a non-empty string.") return errors parts = name.split(self.separator) if len(parts) > len(self.tokens): errors.append( f"Name has too many parts: expected at most {len(self.tokens)}, " f"got {len(parts)}." ) for i, part in enumerate(parts): if i >= len(self.tokens): break if not self.is_valid_token(self.tokens[i], part): errors.append(f"Invalid value '{part}' for token '{self.tokens[i]}'.") if not errors and not self.is_valid(name): errors.append("Name does not match the required naming pattern.") return errors
[docs] def get_matching_regex( self, capture_groups: bool = False, full_match: bool = True, strict: bool = False, ) -> str: """Generates a regex pattern matching names against this convention. Tokens are optional from right to left: for tokens ``["descriptor", "side", "usage"]`` the pattern matches ``"arm"``, ``"arm_l"``, and ``"arm_l_jnt"``, but never ``"arm__jnt"``. The pattern for each token is derived from its ``ConcreteRule`` via ``rule.to_regex_pattern()``. Tokens without a rule use a catch-all pattern that excludes the separator. Results are cached by ``(capture_groups, full_match, strict)``. Args: capture_groups: If ``True``, wraps each token in a named capture group ``(?P<token_name>...)``. full_match: If ``True``, anchors the pattern with ``^`` / ``$``. strict: If ``True``, all tokens are required (no optional groups). Returns: The assembled regex pattern string. """ cache_key = (capture_groups, full_match, strict) if cache_key in self._regex_cache: return self._regex_cache[cache_key] if not self.tokens: result = "^$" if full_match else "" self._regex_cache[cache_key] = result return result # 1. Build raw pattern per token via ConcreteRule.to_regex_pattern() raw_patterns: dict[str, str] = {} for token in self.tokens: rule = self.rules.get(token) if rule is not None: raw_patterns[token] = rule.to_regex_pattern() else: # No rule: match anything except the separator if len(self.tokens) == 1: raw_patterns[token] = ".+" elif len(self.separator) > 1: raw_patterns[token] = f"(?:(?!{re.escape(self.separator)}).)+" else: raw_patterns[token] = f"[^{re.escape(self.separator)}]+" # 2. Wrap in optional named capture groups token_patterns: dict[str, str] = {} for token in self.tokens: raw = raw_patterns[token] if capture_groups: token_patterns[token] = f"(?P<{token}>{raw})" else: token_patterns[token] = f"(?:{raw})" # 3. Assemble final regex sep = re.escape(self.separator) if strict: final_regex = sep.join(token_patterns[t] for t in self.tokens) else: if len(self.tokens) == 1: final_regex = token_patterns[self.tokens[0]] else: final_regex = f"(?:{sep}{token_patterns[self.tokens[-1]]})?" for i in range(len(self.tokens) - 2, 0, -1): tok = self.tokens[i] final_regex = f"(?:{sep}{token_patterns[tok]}{final_regex})?" final_regex = token_patterns[self.tokens[0]] + final_regex result = f"^{final_regex}$" if full_match else final_regex self._regex_cache[cache_key] = result return result
# ----------------------------------------------------------------------- # Setters # -----------------------------------------------------------------------
[docs] def add_rule(self, token: str, rule: ConcreteRule) -> None: """Adds or replaces the validation rule for a token. Clears the regex cache because the pattern depends on all rules. Args: token: The token name. rule: The ``ConcreteRule`` instance to associate with the token. """ self._regex_cache.clear() self.rules[token] = rule
[docs] def remove_rule(self, token: str) -> None: """Removes the validation rule for a token, if present. Clears the regex cache because the pattern depends on all rules. Args: token: The token name whose rule should be removed. """ self._regex_cache.clear() self.rules.pop(token, None)
# ----------------------------------------------------------------------- # Builders # -----------------------------------------------------------------------
[docs] def build_name(self, **kwargs: TokenValue) -> str: """Builds a name string from token values. Normalizes each provided value, validates it against the token's rule, joins all non-empty values with the separator, and then checks the result against the global rules. Args: **kwargs: Token values keyed by token name. Values may be ``str`` or ``Enum`` members; they are normalized before validation. Returns: The assembled name string. Raises: NamingValidationError: If unknown tokens are provided, if a normalized value fails its rule, or if the assembled name violates a global constraint. """ unknown = [key for key in kwargs if key not in self.tokens] if unknown: raise NamingValidationError( f"Unknown tokens: {unknown}. Expected one of: {self.tokens}." ) data: TokenData = {token: "" for token in self.tokens} for token in self.tokens: raw_value = kwargs.get(token, "") normalized = self._normalize_token_value(token, raw_value) if normalized and not self.is_valid_token(token, normalized): raise NamingValidationError( f"Invalid value '{normalized}' for token '{token}' " f"(separator: '{self.separator}')." ) data[token] = normalized final_name = self._join_tokens(data) self._validate_global_rules(final_name) return final_name
[docs] def update_name(self, name: str, **kwargs: TokenValue) -> str: """Updates specific token values in an existing name. Parses ``name`` into its token values, applies ``kwargs`` as overrides, then rebuilds the name. If ``name`` cannot be parsed against the full convention, it is treated as a bare descriptor (first token) provided it passes the first token's rule. Args: name: The existing name string to update. **kwargs: Token values to override. Same rules as ``build_name``. Returns: The updated name string. Raises: NamingValidationError: If ``name`` cannot be interpreted and is not empty, or if the updated values are invalid. """ data: TokenData = self.get_data(name) is_parsed = any(data.values()) if not is_parsed: if not name: # Empty name: build from kwargs only return self.build_name(**kwargs) first_token = self.tokens[0] if self.is_valid_token(first_token, name): data[first_token] = name else: raise NamingValidationError( f"Could not parse '{name}' against tokens {self.tokens}, " f"and it is not a valid value for the first token " f"'{first_token}'." ) # Merge overrides: normalize kwargs to str before updating TokenData for token, raw_value in kwargs.items(): data[token] = self._normalize_token_value(token, raw_value) return self.build_name(**data)
[docs] def resolve_name( self, value: str | TokenData | Sequence[TokenValue], tokens: list[str] | None = None, rules: dict[str, ConcreteRule] | None = None, normalizers: dict[str, Normalizer] | None = None, ) -> str: """Resolves a flexible input into a final name string. Provides a unified entry point for callers that may supply names as dicts, sequences, or plain strings. When ``tokens``, ``rules``, or ``normalizers`` overrides are provided, a temporary ``Manager`` is created with those overrides applied on top of the current config. Supported ``value`` types: - ``dict`` (``TokenData``): passed directly to ``build_name``. - ``list`` / ``tuple`` (``Sequence[TokenValue]``): values are mapped positionally to tokens, then passed to ``build_name``. - ``str``: returned as-is (no building or validation). Args: value: The input to resolve. tokens: Optional token list override. rules: Optional rules override. normalizers: Optional normalizers override. Returns: The resolved name string. Raises: NamingValidationError: If a sequence input has more items than there are tokens. """ if tokens is not None or rules is not None or normalizers is not None: temp = Manager( tokens=tokens if tokens is not None else self.tokens, separator=self.separator, rules=rules if rules is not None else self.rules, normalizers=normalizers if normalizers is not None else self.normalizers, global_rules=self.global_rules, ) return temp.resolve_name(value) if isinstance(value, dict): return self.build_name(**value) if isinstance(value, (list, tuple)): if len(value) > len(self.tokens): raise NamingValidationError( f"Input sequence has {len(value)} items but only " f"{len(self.tokens)} tokens are defined." ) token_data: TokenData = { token: self._normalize_token_value(token, val) for token, val in zip(self.tokens, value) } return self.build_name(**token_data) # Plain string: return as-is return str(value)
# ----------------------------------------------------------------------- # Private validation helpers # ----------------------------------------------------------------------- def _validate_global_rules(self, name: str) -> None: """Validates a fully assembled name against the global rules. Args: name: The assembled name string. Raises: NamingValidationError: If ``name`` exceeds the maximum length or contains a forbidden pattern. """ max_length = self.global_rules.max_length if max_length and len(name) > max_length: raise NamingValidationError( f"Name '{name}' exceeds the maximum length of {max_length} " f"(got {len(name)})." ) for pattern in self.global_rules.forbidden_patterns: if pattern in name: raise NamingValidationError( f"Name '{name}' contains the forbidden pattern '{pattern}'." )