Source code for openrig.naming.rules

"""Configuration rules for the naming convention.

Responsible for loading ``naming/config.json``, building the rule set, and
validating the assembled convention at import time, so any configuration
error is caught immediately rather than at the first call to ``build_name``.

Exported constants (consumed by ``__init__.py`` and ``Manager``):
    - ``TOKENS``: ordered list of token names.
    - ``SEPARATOR``: the separator character.
    - ``TOKEN_RULES``: mapping of token name → ``ConcreteRule``.
    - ``NORMALIZERS``: mapping of token name → normalizer callable.
    - ``GLOBAL_RULES``: the validated ``GlobalRules`` instance.
"""

import importlib
import inspect
import json
import re
from enum import Enum
from pathlib import Path
from typing import Callable, Mapping, TypeGuard

from openrig.naming.types import (
    CallableRule,
    ConcreteRule,
    GlobalRules,
    ListRule,
    RegexRule,
    RuleConfig,
)

Normalizer = Callable[[object], str]


# ---------------------------------------------------------------------------
# Exceptions
# ---------------------------------------------------------------------------


[docs] class RuleBuilderError(Exception): """Raised when the naming convention cannot be assembled from configuration."""
# --------------------------------------------------------------------------- # JSON helpers # --------------------------------------------------------------------------- def _is_list(value: object) -> TypeGuard[list[object]]: return isinstance(value, list) def _is_dict(value: object) -> TypeGuard[dict[object, object]]: return isinstance(value, dict) def _require_str(value: object, path: str) -> str: if not isinstance(value, str): raise RuleBuilderError( f"Expected a string at '{path}', got {type(value).__name__!r}: {value!r}." ) return value def _require_list_of_str(value: object, path: str) -> list[str]: if not _is_list(value): raise RuleBuilderError( f"Expected a list at '{path}', got {type(value).__name__!r}: {value!r}." ) result: list[str] = [] for i, item in enumerate(value): result.append(_require_str(item, f"{path}[{i}]")) return result def _require_dict(value: object, path: str) -> dict[str, object]: if not _is_dict(value): raise RuleBuilderError( f"Expected a dict at '{path}', got {type(value).__name__!r}: {value!r}." ) result: dict[str, object] = {} for k, v in value.items(): result[_require_str(k, f"{path}[key]")] = v return result # --------------------------------------------------------------------------- # Config parsers # --------------------------------------------------------------------------- def _parse_rule_config(data: dict[str, object], path: str) -> RuleConfig: rule_type = _require_str(data.get("type"), f"{path}.type") raw_value = data.get("value") value: str | list[str] | None if raw_value is None: value = None elif isinstance(raw_value, str): value = raw_value elif _is_list(raw_value): value = _require_list_of_str(raw_value, f"{path}.value") else: raise RuleBuilderError( f"Expected a string or list at '{path}.value', " f"got {type(raw_value).__name__!r}." ) raw_sources = data.get("sources") sources: list[str] | None = None if raw_sources is not None: sources = _require_list_of_str(raw_sources, f"{path}.sources") raw_module = data.get("module") module: str | None = None if raw_module is not None: module = _require_str(raw_module, f"{path}.module") return RuleConfig(type=rule_type, value=value, sources=sources, module=module) def _parse_global_rules(data: dict[str, object], path: str) -> GlobalRules: raw_max = data.get("max_length") if not isinstance(raw_max, int): raise RuleBuilderError( f"Expected an int at '{path}.max_length', got {type(raw_max).__name__!r}." ) forbidden = _require_list_of_str( data.get("forbidden_patterns", []), f"{path}.forbidden_patterns" ) separator_rule: RuleConfig | None = None raw_sep_rule = data.get("separator_rule") if raw_sep_rule is not None: sep_dict = _require_dict(raw_sep_rule, f"{path}.separator_rule") separator_rule = _parse_rule_config(sep_dict, f"{path}.separator_rule") return GlobalRules( max_length=raw_max, forbidden_patterns=forbidden, separator_rule=separator_rule, ) def _parse_naming_rules( raw_rules: dict[str, object], path: str ) -> Mapping[str, RuleConfig | GlobalRules]: parsed: dict[str, RuleConfig | GlobalRules] = {} for key, raw_value in raw_rules.items(): entry_path = f"{path}.{key}" entry = _require_dict(raw_value, entry_path) if key == "__global__": parsed[key] = _parse_global_rules(entry, entry_path) else: parsed[key] = _parse_rule_config(entry, entry_path) return parsed # --------------------------------------------------------------------------- # Config loader # --------------------------------------------------------------------------- def _load_config() -> tuple[ str, tuple[str, ...], Mapping[str, RuleConfig | GlobalRules], dict[str, str], ]: """Reads and parses ``naming/config.json``. Returns: A tuple of ``(separator, tokens, rules, normalizers)``. Raises: RuleBuilderError: If the file is missing, malformed, or structurally invalid. """ config_path = Path(__file__).parent / "config.json" if not config_path.exists(): raise RuleBuilderError( f"Configuration file not found: '{config_path}'. " "Ensure 'config.json' exists in the openrig/naming package directory." ) try: with open(config_path, "r", encoding="utf-8") as fh: raw: object = json.load(fh) except json.JSONDecodeError as exc: raise RuleBuilderError( f"Failed to parse '{config_path}' as JSON: {exc}" ) from exc except OSError as exc: raise RuleBuilderError(f"Could not read '{config_path}': {exc}") from exc section = _require_dict(raw, "<naming/config.json>") separator = _require_str(section.get("separator"), "separator") tokens: tuple[str, ...] = tuple( _require_list_of_str(section.get("tokens", []), "tokens") ) raw_rules = _require_dict(section.get("rules", {}), "rules") rules = _parse_naming_rules(raw_rules, "rules") raw_normalizers = _require_dict(section.get("normalizers", {}), "normalizers") normalizers: dict[str, str] = { k: _require_str(v, f"normalizers.{k}") for k, v in raw_normalizers.items() } return separator, tokens, rules, normalizers # --------------------------------------------------------------------------- # Providers — each converts a RuleConfig into a ConcreteRule # --------------------------------------------------------------------------- def _provider_regex(config: RuleConfig) -> RegexRule: """Converts a ``"regex"`` RuleConfig into a ``RegexRule``. Args: config: The raw rule configuration from ``naming/config.json``. Returns: A ``RegexRule`` instance wrapping the pattern. Raises: RuleBuilderError: If ``config.value`` is not a string, or if the pattern is not a valid regular expression. """ if not isinstance(config.value, str): raise RuleBuilderError( f"Regex rule requires a string 'value', " f"got: {type(config.value).__name__!r}." ) try: return RegexRule(pattern=config.value) except ValueError as exc: raise RuleBuilderError(str(exc)) from exc def _provider_list(config: RuleConfig) -> ListRule: """Converts a ``"list"`` RuleConfig into a ``ListRule``. Args: config: The raw rule configuration from ``naming/config.json``. Returns: A ``ListRule`` instance with the allowed values as a ``frozenset``. Raises: RuleBuilderError: If ``config.value`` is not a list. """ if not isinstance(config.value, list): raise RuleBuilderError( f"List rule requires a list 'value', got: {type(config.value).__name__!r}." ) return ListRule(allowed=frozenset(config.value)) def _provider_from_enums(config: RuleConfig) -> ListRule: """Aggregates enum member values into a ``ListRule``. Loads the module specified by ``config.module`` (defaults to ``"openrig.constants"``) via ``importlib`` and collects the string values of all enum classes listed in ``config.sources``. Args: config: The raw rule configuration. ``config.sources`` must be a non-empty list of enum class names present in the target module. Returns: A ``ListRule`` whose allowed set is the union of all member values. Raises: RuleBuilderError: If ``config.sources`` is missing or empty, if the module cannot be imported, or if a source name is not a valid ``Enum`` subclass in that module. """ if not isinstance(config.sources, list) or not config.sources: raise RuleBuilderError( "'sources' for a 'from_enums' rule must be a non-empty list of enum names." ) module_path = config.module or "openrig.constants" try: module = importlib.import_module(module_path) except ImportError as exc: raise RuleBuilderError( f"Could not import module '{module_path}' for 'from_enums' rule: {exc}" ) from exc all_values: frozenset[str] = frozenset() for name in config.sources: enum_cls = getattr(module, name, None) if ( enum_cls is None or not inspect.isclass(enum_cls) or not issubclass(enum_cls, Enum) ): raise RuleBuilderError( f"'{name}' is not a valid Enum subclass in '{module_path}'." ) all_values = all_values | frozenset(str(member.value) for member in enum_cls) return ListRule(allowed=all_values) def _provider_callable(config: RuleConfig) -> CallableRule: """Imports a callable by dotted path and wraps it in a ``CallableRule``. Args: config: The raw rule configuration. ``config.value`` must be a dotted import path string (e.g. ``"mypackage.module.my_func"``). Returns: A ``CallableRule`` wrapping the imported function. Raises: RuleBuilderError: If ``config.value`` is not a string, if the module cannot be imported, or if the resolved attribute is not callable. """ if not isinstance(config.value, str): raise RuleBuilderError( f"Callable rule requires a dotted-path string 'value', " f"got: {type(config.value).__name__!r}." ) import_path = config.value try: module_path, func_name = import_path.rsplit(".", 1) except ValueError as exc: raise RuleBuilderError( f"Callable rule 'value' must be a dotted path (e.g. 'pkg.mod.func'), " f"got: {import_path!r}." ) from exc try: module = importlib.import_module(module_path) except ImportError as exc: raise RuleBuilderError( f"Could not import module '{module_path}' for callable rule: {exc}" ) from exc func = getattr(module, func_name, None) if func is None: raise RuleBuilderError( f"Module '{module_path}' has no attribute '{func_name}'." ) if not callable(func): raise RuleBuilderError( f"'{import_path}' resolves to a non-callable object: {func!r}." ) return CallableRule(func=func, name=import_path) # --------------------------------------------------------------------------- # Provider dispatch table # --------------------------------------------------------------------------- _PROVIDER_MAP: dict[str, Callable[[RuleConfig], ConcreteRule]] = { "regex": _provider_regex, "list": _provider_list, "from_enums": _provider_from_enums, "callable": _provider_callable, } # --------------------------------------------------------------------------- # Public builders # ---------------------------------------------------------------------------
[docs] def build_rules( rules_config: Mapping[str, RuleConfig | GlobalRules], ) -> tuple[dict[str, ConcreteRule], GlobalRules]: """Builds the token rule mapping and global rules from parsed configuration. Args: rules_config: The parsed rules mapping from ``_load_config()``. Returns: A tuple of ``(token_rules, global_rules)`` where ``token_rules`` maps each token name to its ``ConcreteRule`` and ``global_rules`` is the validated ``GlobalRules`` dataclass. Raises: RuleBuilderError: If ``__global__`` is missing or invalid, if an unknown rule type is encountered, or if a provider fails. """ global_rules_config = rules_config.get("__global__") if not isinstance(global_rules_config, GlobalRules): raise RuleBuilderError( "Configuration is missing or invalid for '__global__' in 'rules'. " "Expected a GlobalRules instance." ) global_rules: GlobalRules = global_rules_config token_rules: dict[str, ConcreteRule] = {} for token, config in rules_config.items(): if token == "__global__": continue if not isinstance(config, RuleConfig): raise RuleBuilderError( f"Expected a RuleConfig for token '{token}', " f"got: {type(config).__name__!r}." ) provider = _PROVIDER_MAP.get(config.type) if provider is None: raise RuleBuilderError( f"Unknown rule type '{config.type}' for token '{token}'. " f"Valid types are: {sorted(_PROVIDER_MAP.keys())!r}." ) token_rules[token] = provider(config) return token_rules, global_rules
[docs] def build_normalizers(normalizers_config: dict[str, str]) -> dict[str, Normalizer]: """Resolves normalizer function names into callable normalizers. Args: normalizers_config: The parsed normalizers mapping from ``_load_config()``. Returns: A mapping of token name → normalizer callable. Raises: RuleBuilderError: If a declared normalizer name is not found in ``NORMALIZER_MAP``. """ from openrig.naming.normalizers import NORMALIZER_MAP final_normalizers: dict[str, Normalizer] = {} for token, func_name in normalizers_config.items(): normalizer_func = NORMALIZER_MAP.get(func_name) if normalizer_func is None: raise RuleBuilderError( f"Normalizer '{func_name}' for token '{token}' not found " f"in NORMALIZER_MAP. Available: {sorted(NORMALIZER_MAP.keys())!r}." ) final_normalizers[token] = normalizer_func return final_normalizers
[docs] def validate_convention( tokens: list[str], separator: str, token_rules: Mapping[str, ConcreteRule], global_rules: GlobalRules, ) -> None: """Validates the structural integrity of the assembled naming convention. Args: tokens: The ordered list of token names. separator: The separator character. token_rules: The assembled token rule mapping. global_rules: The global rules dataclass. Raises: RuleBuilderError: If any structural constraint is violated. """ if not tokens: raise RuleBuilderError("'tokens' cannot be an empty list.") if len(set(tokens)) != len(tokens): duplicates = [t for t in tokens if tokens.count(t) > 1] raise RuleBuilderError( f"'tokens' contains duplicate entries: {sorted(set(duplicates))!r}." ) if not separator: raise RuleBuilderError("'separator' cannot be empty.") sep_rule = global_rules.separator_rule if sep_rule is not None and sep_rule.type == "regex": if not isinstance(sep_rule.value, str): raise RuleBuilderError( "The separator_rule has type 'regex' but its 'value' is not a string." ) if not re.fullmatch(sep_rule.value, separator): raise RuleBuilderError( f"Separator '{separator}' does not satisfy its own rule " f"(pattern: '{sep_rule.value}')." ) _identifier_re = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$") for token in tokens: if not _identifier_re.match(token): raise RuleBuilderError( f"Token name '{token}' is not a valid Python identifier. " "Token names are used as regex named groups and must match " r"'^[a-zA-Z_][a-zA-Z0-9_]*$'." ) rule_less = [t for t in tokens if t not in token_rules] if rule_less: import logging logging.getLogger(__name__).warning( "The following tokens have no validation rule " "and will accept any value: %s", rule_less, )
# --------------------------------------------------------------------------- # Module-level assembly — executed once at import time # --------------------------------------------------------------------------- _separator, _tokens, _rules_config, _normalizers_config = _load_config() TOKENS: list[str] = list(_tokens) SEPARATOR: str = _separator TOKEN_RULES, _GLOBAL_RULES_OBJ = build_rules(_rules_config) NORMALIZERS: dict[str, Normalizer] = build_normalizers(_normalizers_config) validate_convention(TOKENS, SEPARATOR, TOKEN_RULES, _GLOBAL_RULES_OBJ) GLOBAL_RULES: GlobalRules = _GLOBAL_RULES_OBJ