"""Configuration rules for the naming convention.
Responsible for loading ``naming/config.json``, building the rule set, and
validating the assembled convention at import time, so any configuration
error is caught immediately rather than at the first call to ``build_name``.
Exported constants (consumed by ``__init__.py`` and ``Manager``):
- ``TOKENS``: ordered list of token names.
- ``SEPARATOR``: the separator character.
- ``TOKEN_RULES``: mapping of token name → ``ConcreteRule``.
- ``NORMALIZERS``: mapping of token name → normalizer callable.
- ``GLOBAL_RULES``: the validated ``GlobalRules`` instance.
"""
import importlib
import inspect
import json
import re
from enum import Enum
from pathlib import Path
from typing import Callable, Mapping, TypeGuard
from openrig.naming.types import (
CallableRule,
ConcreteRule,
GlobalRules,
ListRule,
RegexRule,
RuleConfig,
)
Normalizer = Callable[[object], str]
# ---------------------------------------------------------------------------
# Exceptions
# ---------------------------------------------------------------------------
[docs]
class RuleBuilderError(Exception):
"""Raised when the naming convention cannot be assembled from configuration."""
# ---------------------------------------------------------------------------
# JSON helpers
# ---------------------------------------------------------------------------
def _is_list(value: object) -> TypeGuard[list[object]]:
return isinstance(value, list)
def _is_dict(value: object) -> TypeGuard[dict[object, object]]:
return isinstance(value, dict)
def _require_str(value: object, path: str) -> str:
if not isinstance(value, str):
raise RuleBuilderError(
f"Expected a string at '{path}', got {type(value).__name__!r}: {value!r}."
)
return value
def _require_list_of_str(value: object, path: str) -> list[str]:
if not _is_list(value):
raise RuleBuilderError(
f"Expected a list at '{path}', got {type(value).__name__!r}: {value!r}."
)
result: list[str] = []
for i, item in enumerate(value):
result.append(_require_str(item, f"{path}[{i}]"))
return result
def _require_dict(value: object, path: str) -> dict[str, object]:
if not _is_dict(value):
raise RuleBuilderError(
f"Expected a dict at '{path}', got {type(value).__name__!r}: {value!r}."
)
result: dict[str, object] = {}
for k, v in value.items():
result[_require_str(k, f"{path}[key]")] = v
return result
# ---------------------------------------------------------------------------
# Config parsers
# ---------------------------------------------------------------------------
def _parse_rule_config(data: dict[str, object], path: str) -> RuleConfig:
rule_type = _require_str(data.get("type"), f"{path}.type")
raw_value = data.get("value")
value: str | list[str] | None
if raw_value is None:
value = None
elif isinstance(raw_value, str):
value = raw_value
elif _is_list(raw_value):
value = _require_list_of_str(raw_value, f"{path}.value")
else:
raise RuleBuilderError(
f"Expected a string or list at '{path}.value', "
f"got {type(raw_value).__name__!r}."
)
raw_sources = data.get("sources")
sources: list[str] | None = None
if raw_sources is not None:
sources = _require_list_of_str(raw_sources, f"{path}.sources")
raw_module = data.get("module")
module: str | None = None
if raw_module is not None:
module = _require_str(raw_module, f"{path}.module")
return RuleConfig(type=rule_type, value=value, sources=sources, module=module)
def _parse_global_rules(data: dict[str, object], path: str) -> GlobalRules:
raw_max = data.get("max_length")
if not isinstance(raw_max, int):
raise RuleBuilderError(
f"Expected an int at '{path}.max_length', got {type(raw_max).__name__!r}."
)
forbidden = _require_list_of_str(
data.get("forbidden_patterns", []), f"{path}.forbidden_patterns"
)
separator_rule: RuleConfig | None = None
raw_sep_rule = data.get("separator_rule")
if raw_sep_rule is not None:
sep_dict = _require_dict(raw_sep_rule, f"{path}.separator_rule")
separator_rule = _parse_rule_config(sep_dict, f"{path}.separator_rule")
return GlobalRules(
max_length=raw_max,
forbidden_patterns=forbidden,
separator_rule=separator_rule,
)
def _parse_naming_rules(
raw_rules: dict[str, object], path: str
) -> Mapping[str, RuleConfig | GlobalRules]:
parsed: dict[str, RuleConfig | GlobalRules] = {}
for key, raw_value in raw_rules.items():
entry_path = f"{path}.{key}"
entry = _require_dict(raw_value, entry_path)
if key == "__global__":
parsed[key] = _parse_global_rules(entry, entry_path)
else:
parsed[key] = _parse_rule_config(entry, entry_path)
return parsed
# ---------------------------------------------------------------------------
# Config loader
# ---------------------------------------------------------------------------
def _load_config() -> tuple[
str,
tuple[str, ...],
Mapping[str, RuleConfig | GlobalRules],
dict[str, str],
]:
"""Reads and parses ``naming/config.json``.
Returns:
A tuple of ``(separator, tokens, rules, normalizers)``.
Raises:
RuleBuilderError: If the file is missing, malformed, or structurally invalid.
"""
config_path = Path(__file__).parent / "config.json"
if not config_path.exists():
raise RuleBuilderError(
f"Configuration file not found: '{config_path}'. "
"Ensure 'config.json' exists in the openrig/naming package directory."
)
try:
with open(config_path, "r", encoding="utf-8") as fh:
raw: object = json.load(fh)
except json.JSONDecodeError as exc:
raise RuleBuilderError(
f"Failed to parse '{config_path}' as JSON: {exc}"
) from exc
except OSError as exc:
raise RuleBuilderError(f"Could not read '{config_path}': {exc}") from exc
section = _require_dict(raw, "<naming/config.json>")
separator = _require_str(section.get("separator"), "separator")
tokens: tuple[str, ...] = tuple(
_require_list_of_str(section.get("tokens", []), "tokens")
)
raw_rules = _require_dict(section.get("rules", {}), "rules")
rules = _parse_naming_rules(raw_rules, "rules")
raw_normalizers = _require_dict(section.get("normalizers", {}), "normalizers")
normalizers: dict[str, str] = {
k: _require_str(v, f"normalizers.{k}") for k, v in raw_normalizers.items()
}
return separator, tokens, rules, normalizers
# ---------------------------------------------------------------------------
# Providers — each converts a RuleConfig into a ConcreteRule
# ---------------------------------------------------------------------------
def _provider_regex(config: RuleConfig) -> RegexRule:
"""Converts a ``"regex"`` RuleConfig into a ``RegexRule``.
Args:
config: The raw rule configuration from ``naming/config.json``.
Returns:
A ``RegexRule`` instance wrapping the pattern.
Raises:
RuleBuilderError: If ``config.value`` is not a string, or if the
pattern is not a valid regular expression.
"""
if not isinstance(config.value, str):
raise RuleBuilderError(
f"Regex rule requires a string 'value', "
f"got: {type(config.value).__name__!r}."
)
try:
return RegexRule(pattern=config.value)
except ValueError as exc:
raise RuleBuilderError(str(exc)) from exc
def _provider_list(config: RuleConfig) -> ListRule:
"""Converts a ``"list"`` RuleConfig into a ``ListRule``.
Args:
config: The raw rule configuration from ``naming/config.json``.
Returns:
A ``ListRule`` instance with the allowed values as a ``frozenset``.
Raises:
RuleBuilderError: If ``config.value`` is not a list.
"""
if not isinstance(config.value, list):
raise RuleBuilderError(
f"List rule requires a list 'value', got: {type(config.value).__name__!r}."
)
return ListRule(allowed=frozenset(config.value))
def _provider_from_enums(config: RuleConfig) -> ListRule:
"""Aggregates enum member values into a ``ListRule``.
Loads the module specified by ``config.module`` (defaults to
``"openrig.constants"``) via ``importlib`` and collects the string
values of all enum classes listed in ``config.sources``.
Args:
config: The raw rule configuration. ``config.sources`` must be a
non-empty list of enum class names present in the target module.
Returns:
A ``ListRule`` whose allowed set is the union of all member values.
Raises:
RuleBuilderError: If ``config.sources`` is missing or empty, if the
module cannot be imported, or if a source name is not a valid
``Enum`` subclass in that module.
"""
if not isinstance(config.sources, list) or not config.sources:
raise RuleBuilderError(
"'sources' for a 'from_enums' rule must be a non-empty list of enum names."
)
module_path = config.module or "openrig.constants"
try:
module = importlib.import_module(module_path)
except ImportError as exc:
raise RuleBuilderError(
f"Could not import module '{module_path}' for 'from_enums' rule: {exc}"
) from exc
all_values: frozenset[str] = frozenset()
for name in config.sources:
enum_cls = getattr(module, name, None)
if (
enum_cls is None
or not inspect.isclass(enum_cls)
or not issubclass(enum_cls, Enum)
):
raise RuleBuilderError(
f"'{name}' is not a valid Enum subclass in '{module_path}'."
)
all_values = all_values | frozenset(str(member.value) for member in enum_cls)
return ListRule(allowed=all_values)
def _provider_callable(config: RuleConfig) -> CallableRule:
"""Imports a callable by dotted path and wraps it in a ``CallableRule``.
Args:
config: The raw rule configuration. ``config.value`` must be a
dotted import path string (e.g. ``"mypackage.module.my_func"``).
Returns:
A ``CallableRule`` wrapping the imported function.
Raises:
RuleBuilderError: If ``config.value`` is not a string, if the module
cannot be imported, or if the resolved attribute is not callable.
"""
if not isinstance(config.value, str):
raise RuleBuilderError(
f"Callable rule requires a dotted-path string 'value', "
f"got: {type(config.value).__name__!r}."
)
import_path = config.value
try:
module_path, func_name = import_path.rsplit(".", 1)
except ValueError as exc:
raise RuleBuilderError(
f"Callable rule 'value' must be a dotted path (e.g. 'pkg.mod.func'), "
f"got: {import_path!r}."
) from exc
try:
module = importlib.import_module(module_path)
except ImportError as exc:
raise RuleBuilderError(
f"Could not import module '{module_path}' for callable rule: {exc}"
) from exc
func = getattr(module, func_name, None)
if func is None:
raise RuleBuilderError(
f"Module '{module_path}' has no attribute '{func_name}'."
)
if not callable(func):
raise RuleBuilderError(
f"'{import_path}' resolves to a non-callable object: {func!r}."
)
return CallableRule(func=func, name=import_path)
# ---------------------------------------------------------------------------
# Provider dispatch table
# ---------------------------------------------------------------------------
_PROVIDER_MAP: dict[str, Callable[[RuleConfig], ConcreteRule]] = {
"regex": _provider_regex,
"list": _provider_list,
"from_enums": _provider_from_enums,
"callable": _provider_callable,
}
# ---------------------------------------------------------------------------
# Public builders
# ---------------------------------------------------------------------------
[docs]
def build_rules(
rules_config: Mapping[str, RuleConfig | GlobalRules],
) -> tuple[dict[str, ConcreteRule], GlobalRules]:
"""Builds the token rule mapping and global rules from parsed configuration.
Args:
rules_config: The parsed rules mapping from ``_load_config()``.
Returns:
A tuple of ``(token_rules, global_rules)`` where ``token_rules`` maps
each token name to its ``ConcreteRule`` and ``global_rules`` is the
validated ``GlobalRules`` dataclass.
Raises:
RuleBuilderError: If ``__global__`` is missing or invalid, if an
unknown rule type is encountered, or if a provider fails.
"""
global_rules_config = rules_config.get("__global__")
if not isinstance(global_rules_config, GlobalRules):
raise RuleBuilderError(
"Configuration is missing or invalid for '__global__' in 'rules'. "
"Expected a GlobalRules instance."
)
global_rules: GlobalRules = global_rules_config
token_rules: dict[str, ConcreteRule] = {}
for token, config in rules_config.items():
if token == "__global__":
continue
if not isinstance(config, RuleConfig):
raise RuleBuilderError(
f"Expected a RuleConfig for token '{token}', "
f"got: {type(config).__name__!r}."
)
provider = _PROVIDER_MAP.get(config.type)
if provider is None:
raise RuleBuilderError(
f"Unknown rule type '{config.type}' for token '{token}'. "
f"Valid types are: {sorted(_PROVIDER_MAP.keys())!r}."
)
token_rules[token] = provider(config)
return token_rules, global_rules
[docs]
def build_normalizers(normalizers_config: dict[str, str]) -> dict[str, Normalizer]:
"""Resolves normalizer function names into callable normalizers.
Args:
normalizers_config: The parsed normalizers mapping from ``_load_config()``.
Returns:
A mapping of token name → normalizer callable.
Raises:
RuleBuilderError: If a declared normalizer name is not found in
``NORMALIZER_MAP``.
"""
from openrig.naming.normalizers import NORMALIZER_MAP
final_normalizers: dict[str, Normalizer] = {}
for token, func_name in normalizers_config.items():
normalizer_func = NORMALIZER_MAP.get(func_name)
if normalizer_func is None:
raise RuleBuilderError(
f"Normalizer '{func_name}' for token '{token}' not found "
f"in NORMALIZER_MAP. Available: {sorted(NORMALIZER_MAP.keys())!r}."
)
final_normalizers[token] = normalizer_func
return final_normalizers
[docs]
def validate_convention(
tokens: list[str],
separator: str,
token_rules: Mapping[str, ConcreteRule],
global_rules: GlobalRules,
) -> None:
"""Validates the structural integrity of the assembled naming convention.
Args:
tokens: The ordered list of token names.
separator: The separator character.
token_rules: The assembled token rule mapping.
global_rules: The global rules dataclass.
Raises:
RuleBuilderError: If any structural constraint is violated.
"""
if not tokens:
raise RuleBuilderError("'tokens' cannot be an empty list.")
if len(set(tokens)) != len(tokens):
duplicates = [t for t in tokens if tokens.count(t) > 1]
raise RuleBuilderError(
f"'tokens' contains duplicate entries: {sorted(set(duplicates))!r}."
)
if not separator:
raise RuleBuilderError("'separator' cannot be empty.")
sep_rule = global_rules.separator_rule
if sep_rule is not None and sep_rule.type == "regex":
if not isinstance(sep_rule.value, str):
raise RuleBuilderError(
"The separator_rule has type 'regex' but its 'value' is not a string."
)
if not re.fullmatch(sep_rule.value, separator):
raise RuleBuilderError(
f"Separator '{separator}' does not satisfy its own rule "
f"(pattern: '{sep_rule.value}')."
)
_identifier_re = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$")
for token in tokens:
if not _identifier_re.match(token):
raise RuleBuilderError(
f"Token name '{token}' is not a valid Python identifier. "
"Token names are used as regex named groups and must match "
r"'^[a-zA-Z_][a-zA-Z0-9_]*$'."
)
rule_less = [t for t in tokens if t not in token_rules]
if rule_less:
import logging
logging.getLogger(__name__).warning(
"The following tokens have no validation rule "
"and will accept any value: %s",
rule_less,
)
# ---------------------------------------------------------------------------
# Module-level assembly — executed once at import time
# ---------------------------------------------------------------------------
_separator, _tokens, _rules_config, _normalizers_config = _load_config()
TOKENS: list[str] = list(_tokens)
SEPARATOR: str = _separator
TOKEN_RULES, _GLOBAL_RULES_OBJ = build_rules(_rules_config)
NORMALIZERS: dict[str, Normalizer] = build_normalizers(_normalizers_config)
validate_convention(TOKENS, SEPARATOR, TOKEN_RULES, _GLOBAL_RULES_OBJ)
GLOBAL_RULES: GlobalRules = _GLOBAL_RULES_OBJ