Source code for openrig.naming.utils

"""String manipulation utilities for the OpenRig naming system.

Provides validators, getters, converters, and incrementers for building
and transforming name strings used throughout the rigging pipeline.

Categories:
    Validators: ``is_string``, ``is_digit``, ``is_camel_case``, etc.
    Getters: ``get_digits``, ``get_version``, ``get_namespace``, etc.
    Converters: ``to_camel_case``, ``to_snake_case``, ``to_kebab_case``, etc.
    Incrementers: ``increment_digit``, ``increment_character``, etc.
    Decrementers: ``decrement_digit``, ``decrement_character``, etc.
"""

import re
from typing import Iterable

# Compiled Regex Patterns for performance
_CAMEL_CASE_RE = re.compile(r"^[a-z][a-z0-9]*([A-Z][a-z0-9]*)*$")
_PASCAL_CASE_RE = re.compile(r"^[A-Z][a-z0-9]*([A-Z][a-z0-9]*)*$")
_SNAKE_CASE_RE = re.compile(r"^[a-z]+(_[a-z0-9]+)*$")
_KEBAB_CASE_RE = re.compile(r"^[a-z]+(-[a-z0-9]+)*$")
_DIGITS_RE = re.compile(r"\d+")
_BRACKETS_DIGITS_RE = re.compile(r"\[([^\]]+)\]")
_UNDERSCORE_DATA_RE = re.compile(r"(?<=_)[^_]+(?=_)")
_NORMALIZE_RE = re.compile(r"[^a-zA-Z0-9]+")
_SPLIT_TEXT_DELIM_RE = re.compile(r"[_\-\s\.]+")
_SPLIT_TEXT_WORDS_RE = re.compile(
    r"[A-Z]+(?=[A-Z][a-z])|[A-Z][a-z0-9]+|[A-Z]+|[a-z]+|[0-9]+"
)
_CLEAN_TXT_INVALID_RE = re.compile(r"[^a-zA-Z0-9_]")
_INCREMENT_DIGIT_RE = re.compile(r"(\d+)$")
_VERSION_RE = re.compile(r"[vV](\d+)")
_SIDE_MAPPING = {
    "_L_": "_R_",
    "_R_": "_L_",
    "_l_": "_r_",
    "_r_": "_l_",
    "L_": "R_",
    "R_": "L_",
    "l_": "r_",
    "r_": "l_",
    "_L": "_R",
    "_R": "_L",
    "_l": "_r",
    "_r": "_l",
    "_C_": "_C_",
    "_c_": "_c_",
    "Left": "Right",
    "Right": "Left",
    "_Left": "_Right",
    "_Right": "_Left",
    "Center": "Center",
    "center": "center",
    "Middle": "Middle",
    "middle": "middle",
    "_M_": "_M_",
    "_m_": "_m_",
    "M_": "M_",
    "m_": "m_",
    "_M": "_M",
    "_m": "_m",
    "left": "right",
    "right": "left",
    "_left": "_right",
    "_right": "_left",
}


# VALIDATORS

[docs]
def is_string(text: object) -> bool:
    """Validates whether a variable is a string.

    Args:
        text: The value to validate.

    Returns:
        True if ``text`` is a ``str`` instance, False otherwise.
    """
    return isinstance(text, str)




[docs]
def is_digit(value: object) -> bool:
    """Validates whether a variable is a number.

    Args:
        value: The value to validate.

    Returns:
        True if ``value`` is an ``int`` or ``float`` instance, False otherwise.
    """
    return isinstance(value, (int, float))




[docs]
def is_camel_case(text: str) -> bool:
    """Validates if a text is in camelCase style.

    Args:
        text: Text to validate.

    Returns:
        True if the text is in camelCase style, False otherwise.
    """
    return bool(_CAMEL_CASE_RE.fullmatch(text))




[docs]
def is_pascal_case(text: str) -> bool:
    """Validates if a text is in PascalCase style.

    Args:
        text: String to validate.

    Returns:
        True if the text is in PascalCase style, False otherwise.
    """
    return bool(_PASCAL_CASE_RE.fullmatch(text))




[docs]
def is_snake_case(text: str) -> bool:
    """Validates if a text is in snake_case style.

    Args:
        text: String to validate.

    Returns:
        True if the text is in snake_case style, False otherwise.
    """
    return bool(_SNAKE_CASE_RE.fullmatch(text))




[docs]
def is_kebab_case(text: str) -> bool:
    """Validates if a text is in kebab-case style.

    Args:
        text: String to validate.

    Returns:
        True if the text is in kebab-case style, False otherwise.
    """
    return bool(_KEBAB_CASE_RE.fullmatch(text))




[docs]
def is_character_in(text: str, character: str = "_") -> bool:
    """Validates if a character is in a text.

    Args:
        text: Text to look for.
        character: Character to validate. Defaults to '_'.

    Returns:
        True if the character is in the text, False otherwise.
    """
    return bool(character in text)



# GETTERS

[docs]
def get_case_style(text: str) -> str:
    """Determines the naming style of a given text.

    Args:
        text: The text to determine style.

    Returns:
        The naming style of the text. Possible values are:
        "camelCase", "PascalCase", "snake_case", "kebab-case", or "unknown".
    """
    if is_camel_case(text):
        return "camelCase"
    if is_pascal_case(text):
        return "PascalCase"
    if is_snake_case(text):
        return "snake_case"
    if is_kebab_case(text):
        return "kebab-case"

    return "unknown"




[docs]
def get_digits(text: str) -> list[str]:
    """Gets digits inside a text.

    Args:
        text: Text to extract digits.

    Returns:
        List of string digits from the original text.
    """
    return _DIGITS_RE.findall(text)




[docs]
def get_digit_by_index(text: str, index: int = 0) -> str | None:
    """Gets the digit inside a text at a specific index.

    Args:
        text: Text to extract digits.
        index: Index of the digit to extract. Defaults to 0.

    Returns:
        Digit from the original text.
    """
    digit = get_digits(text)

    if digit and 0 <= index < len(digit):
        return digit[index]
    return None




[docs]
def get_first_digit(text: str) -> str | None:
    """Gets the first digit inside a text.

    Args:
        text: Text to extract digits.

    Returns:
        First digit from the original string.
    """
    digit = get_digits(text)

    return digit[0] if digit else None




[docs]
def get_last_digit(text: str) -> str | None:
    """Gets the last digit inside a string.

    Args:
        text: Text to extract digits.

    Returns:
        Last digit from the original text.
    """
    digit = get_digits(text=text)

    return digit[-1] if digit else None




[docs]
def get_digits_between_brackets(text: str) -> list[str]:
    """Gets all digits between brackets inside a text.

    Args:
        text: The text to extract digits from.

    Returns:
        List of strings representing the numeric digits found between square brackets.
    """
    results: list[str] = _BRACKETS_DIGITS_RE.findall(text)

    numeric_results: list[str] = []
    for item in results:
        digits: list[str] = _DIGITS_RE.findall(item)
        numeric_results.extend(digits)

    return numeric_results if numeric_results else []




[docs]
def get_data_between_underscores(text: str) -> list[str]:
    """Gets all data between underscores inside a text.

    Args:
        text: The text to extract data from.

    Returns:
        List of strings representing the data found between underscores.
    """
    # Use lookarounds to handle overlapping matches (e.g. 'a_b_c' -> 'b')
    return _UNDERSCORE_DATA_RE.findall(text)




[docs]
def get_version(text: str) -> int | None:
    """Extracts the version number from a text (e.g., 'v001' -> 1).

    Args:
        text: Text to extract version from.

    Returns:
        The version number as an integer, or None if not found.
    """
    match = _VERSION_RE.search(text)
    if match:
        return int(match.group(1))
    return None




[docs]
def get_namespace(text: str, separator: str = ":") -> str:
    """Extracts the namespace prefix from a string.

    Args:
        text: The string to process (e.g., 'namespace:item').
        separator: The separator character. Defaults to ':'.

    Returns:
        The namespace string, or empty string if none.
    """
    if separator in text:
        return text.rpartition(separator)[0]
    return ""



# CONVERTERS

[docs]
def replace_spaces(text: str, replacement: str) -> str:
    """Replaces spaces in the given text with a specified character.

    Args:
        text: The input text.
        replacement: Specified character to replace with.

    Returns:
        The input text with spaces replaced by the given replacement.
    """
    return text.replace(" ", replacement)




[docs]
def normalize_text(text: str) -> str:
    """Normalizes text by replacing non-alphanumeric characters with spaces.

    Args:
        text: Text to normalize.

    Returns:
        Normalized text.
    """
    return _NORMALIZE_RE.sub(" ", text)




[docs]
def to_upper(text: str) -> str:
    """Converts a text to uppercase.

    Args:
        text: Text to convert.

    Returns:
        Uppercase text.
    """
    return text.upper()




[docs]
def to_lower(text: str) -> str:
    """Converts a text to lowercase.

    Args:
        text: Text to convert.

    Returns:
        Lowercase text.
    """
    return text.lower()




[docs]
def to_camel_case(text: str, remove_digits: bool = False) -> str:
    """Converts a text to camelCase.

    Args:
        text: Text to convert to camelCase.
        remove_digits: Remove digits from the text. Defaults to False.

    Returns:
        camelCase string.
    """
    split_words = split_text(text)

    camel_case = split_words[0].lower()

    for word in split_words[1:]:
        camel_case += word.capitalize()

    if remove_digits:
        camel_case = strip_digits(camel_case)

    return camel_case




[docs]
def to_pascal_case(text: str, remove_digits: bool = False) -> str:
    """Converts a text to PascalCase.

    Args:
        text: Text to convert to PascalCase.
        remove_digits: Remove digits from the text. Defaults to False.

    Returns:
        PascalCase string.
    """
    split_words = split_text(text)

    pascal_case = ""
    for word in split_words:
        pascal_case += word.capitalize()

    if remove_digits:
        pascal_case = strip_digits(pascal_case)

    return pascal_case




[docs]
def to_snake_case(text: str, remove_digits: bool = False) -> str:
    """Converts a text to snake_case.

    Args:
        text: Text to convert to snake_case.
        remove_digits: Remove digits from the text. Defaults to False.

    Returns:
        snake_case string.
    """
    split_words = split_text(text)
    snake_case = "_".join(word.lower() for word in split_words)

    if remove_digits:
        snake_case = strip_digits(snake_case)
        # Re-split and join to handle multiple separators from stripping digits
        snake_case = "_".join(word.lower() for word in split_text(snake_case))

    return snake_case




[docs]
def to_kebab_case(text: str, remove_digits: bool = False) -> str:
    """Converts a text to kebab-case.

    Args:
        text: Text to convert to kebab-case.
        remove_digits: Remove digits from the text. Defaults to False.

    Returns:
        kebab-case string.
    """
    split_words = split_text(text)
    kebab_case = "-".join(word.lower() for word in split_words)

    if remove_digits:
        kebab_case = strip_digits(kebab_case)
        # Re-split and join to handle multiple separators from stripping digits
        kebab_case = "-".join(word.lower() for word in split_text(kebab_case))

    return kebab_case




[docs]
def value_to_str(value: float | int) -> str:
    """Converts a numeric value to a filesystem-safe string in ``MxDx`` format.

    The ``M`` prefix indicates a negative value; the decimal point is
    replaced by ``d`` (e.g. ``-1.5`` → ``"M1d5"``, ``2.0`` → ``"2d0"``).

    Args:
        value: The numeric value to encode.

    Returns:
        The encoded string representation.
    """
    sign = ""
    if value < 0:
        sign = "M"
        value = abs(value)  # Convert value to positive

    return f"{sign}{value}".replace(".", "d")




[docs]
def str_to_value(text: str) -> float:
    """Converts a formatted ``MxDx`` string back into a numeric value.

    Reverses the encoding applied by ``value_to_str``: strips the ``M``
    sign prefix and replaces ``d`` with a decimal point.

    Args:
        text: Formatted text to convert into a digit.

    Returns:
        The corresponding numerical value.
    """
    sign = 1
    if text[0] == "M":
        sign = -1
        text = text[1:]

    return float(text.replace("d", ".")) * sign




[docs]
def strip_digits(text: str) -> str:
    """Removes digits from a text.

    Args:
        text: Text to remove digits from.

    Returns:
        Text without digits.
    """
    return _DIGITS_RE.sub("", text)




[docs]
def split_text(text: str) -> list[str]:
    """Splits text into individual words.

    Handles camelCase, PascalCase, and delimiters like ``_`` and ``-``.

    Args:
        text: The text to split.

    Returns:
        A list of words.
    """
    text = _SPLIT_TEXT_DELIM_RE.sub(" ", text)
    return _SPLIT_TEXT_WORDS_RE.findall(text)




[docs]
def join_tokens(tokens: Iterable[str | None], separator: str = "_") -> str:
    """Joins a sequence of tokens, filtering out None or empty strings.

    Args:
        tokens: A sequence of tokens (strings or None).
        separator: The separator to use. Defaults to '_'.

    Returns:
        The joined string.
    """
    return separator.join(t for t in tokens if t)




[docs]
def capitalize_first(text: str) -> str:
    """Capitalizes the first letter of a text.

    Args:
        text: Text to capitalize.

    Returns:
        Text with the first letter capitalized.
    """
    if len(text) == 1:
        return text.upper()

    return text[0].upper() + text[1:]




[docs]
def remove_prefix(text: str, prefix: str) -> str:
    """Removes a prefix from a text.

    Args:
        text: Text to remove prefix from.
        prefix: Prefix to remove.

    Returns:
        Text without prefix.
    """
    return text.removeprefix(prefix)




[docs]
def remove_suffix(text: str, suffix: str) -> str:
    """Removes a suffix from a text.

    Args:
        text: Text to remove suffix from.
        suffix: Suffix to remove.

    Returns:
        Text without suffix.
    """
    return text.removesuffix(suffix)




[docs]
def clean_txt(text: str, replace_with: str = "_") -> str:
    """Cleans a string by replacing illegal characters.

    Ensures the result does not start with a digit by prepending
    ``replace_with`` if necessary.

    Args:
        text: Text to clean.
        replace_with: Character to replace illegal characters with. Defaults to '_'.

    Returns:
        Cleaned text.
    """
    text = _CLEAN_TXT_INVALID_RE.sub(replace_with, text)
    if replace_with:
        text = re.sub(f"{re.escape(replace_with)}+", replace_with, text)
    if text and text[0].isdigit():
        text = f"{replace_with}{text}"
    return text




[docs]
def strip_namespace(text: str, separator: str = ":") -> str:
    """Removes the namespace prefix from a string.

    Args:
        text: The string to process.
        separator: The separator character. Defaults to ':'.

    Returns:
        The string without the namespace prefix.
    """
    if separator in text:
        return text.rpartition(separator)[2]
    return text




[docs]
def get_base_name(text: str, separator: str = "|") -> str:
    """Returns the last component of a path-like string.

    Args:
        text: The path string (e.g. 'path/to/item').
        separator: The path separator. Defaults to '|'.

    Returns:
        The base name (leaf node).
    """
    return text.split(separator)[-1]




[docs]
def truncate(text: str, max_length: int, ellipsis: str = "...") -> str:
    """Truncates a string to a maximum length.

    Args:
        text: The text to truncate.
        max_length: The maximum length including the ellipsis.
        ellipsis: The string to append to truncated text. Defaults to "...".

    Returns:
        The truncated string.
    """
    if len(text) <= max_length:
        return text
    return text[: max(0, max_length - len(ellipsis))] + ellipsis




[docs]
def split_name_number(text: str) -> tuple[str, str | None]:
    """Splits a string into its name and trailing number components.

    Args:
        text: The text to split (e.g., 'arm01').

    Returns:
        A tuple containing the name and the number string (or None).
    """
    match = _INCREMENT_DIGIT_RE.search(text)
    if match:
        number = match.group(1)
        name = text[: match.start()]
        return name, number
    return text, None




[docs]
def get_unique_name(name: str, existing_names: Iterable[str]) -> str:
    """Returns a unique name by incrementing the digit if it exists in the list.

    Args:
        name: The desired name.
        existing_names: Iterable of names that already exist.

    Returns:
        A unique name.
    """
    existing_set = set(existing_names)
    while name in existing_set:
        name = increment_digit(name)
    return name



# INCREMENTERS

[docs]
def increment_character(text: str) -> str:
    """Increments a letter sequence in a manner similar to Excel column naming.

    - Works for both uppercase and lowercase sequences
    - Handles cases like 'ZZ' → 'AAA' and 'zz' → 'aaa'

    Args:
        text: A text containing only letters ('A'-'Z' or 'a'-'z').

    Returns:
        The incremented letter sequence.
    """
    # Convert string to list for mutability
    chars = list(text)
    # Check if the input is uppercase or lowercase
    is_upper = chars[0].isupper()
    start_char = "A" if is_upper else "a"
    end_char = "Z" if is_upper else "z"
    # Start from the last character
    i = len(chars) - 1
    while i >= 0:
        if chars[i] != end_char:
            chars[i] = chr(ord(chars[i]) + 1)  # Increment character
            return "".join(chars)
        else:
            chars[i] = start_char  # Reset current position to start character
            i -= 1
    # Prepend 'A' or 'a' if all characters were 'Z' or 'z'
    return start_char + "".join(chars)




[docs]
def increment_digit(text: str, pads: int | None = None) -> str:
    """Increments the last digit in a text.

    Args:
        text: Text to increment.
        pads: Number of digits for padding.
              If None, uses existing padding or defaults to 2.

    Returns:
        Text with the last digit incremented, or a digit appended if none existed.
    """
    match = _INCREMENT_DIGIT_RE.search(text)
    if match:
        current_digit = match.group(1)
        padding = pads if pads is not None else len(current_digit)
        new_digit = int(current_digit) + 1
        incremented_digit = str(new_digit).zfill(padding)
        return text[: match.start()] + incremented_digit

    padding = pads if pads is not None else 2
    return f"{text}{'1'.zfill(padding)}"




[docs]
def replace_padding(text: str, padding: int = 2) -> str:
    """Replaces the padding of the last number in the text.

    Args:
        text: Text to modify.
        padding: New padding size. Defaults to 2.

    Returns:
        Text with updated padding.
    """
    match = _INCREMENT_DIGIT_RE.search(text)
    if match:
        digit = match.group(1)
        new_digit = str(int(digit)).zfill(padding)
        return text[: match.start()] + new_digit
    return text




[docs]
def add_suffix(text: str, suffix: str, separator: str = "_") -> str:
    """Adds a suffix to a text using a separator.

    Args:
        text: Text to add the suffix to.
        suffix: The suffix to add.
        separator: The separator to use. Defaults to '_'.

    Returns:
        Text with the suffix added.
    """
    text = to_camel_case(text=text)
    if not suffix.isupper():
        suffix = to_camel_case(text=suffix)

    return f"{text}{separator}{suffix}"




[docs]
def add_prefix(text: str, prefix: str, separator: str = "_") -> str:
    """Adds a prefix to a text using a separator.

    Args:
        text: Text to add the prefix to.
        prefix: The prefix to add.
        separator: The separator to use. Defaults to '_'.

    Returns:
        Text with the prefix added.
    """
    text = to_camel_case(text=text)
    if not prefix.isupper():
        prefix = to_camel_case(text=prefix)

    return f"{prefix}{separator}{text}"




[docs]
def add_text(text: str, text_to_add: str = "") -> str:
    """Adds a text to another text in PascalCase.

    Args:
        text: The text to add to.
        text_to_add: The text to add. Defaults to ''.

    Returns:
        The text with the text_to_add added.
    """
    if text_to_add:
        text_to_add = to_pascal_case(text=text_to_add)

    return f"{text}{text_to_add}"




[docs]
def swap_substrings(text: str, mapping: dict[str, str] | None = None) -> str:
    """Swaps substrings in a text based on a mapping dictionary.

    Performs a simultaneous replacement of all keys found in the mapping.
    This prevents double-swapping (e.g. L->R then R->L) and ensures robustness.

    Args:
        text: The text to process.
        mapping: Dictionary mapping substrings (e.g., {'_L_': '_R_'}).
                 If None, defaults to a standard side mapping (L/R, Left/Right).

    Returns:
        The text with substrings swapped if matches are found.
    """
    mapping = mapping or _SIDE_MAPPING

    # Create a regex pattern that matches any of the keys
    # Sort keys by length descending to match longest keys first (e.g. 'Left' - 'L')
    pattern = re.compile(
        "|".join(re.escape(k) for k in sorted(mapping, key=len, reverse=True))
    )

    return pattern.sub(lambda m: mapping[m.group(0)], text)



# DECREMENTERS

[docs]
def decrement_character(text: str) -> str:
    """Decrements a letter sequence in a manner similar to Excel column naming.

    - Works for both uppercase and lowercase sequences
    - Handles cases like 'AAA' → 'ZZ' and 'aaa' → 'zz'

    Args:
        text: A text containing only letters ('A'-'Z' or 'a'-'z').

    Returns:
        The decremented letter sequence.
    """
    chars = list(text)
    is_upper = chars[0].isupper()
    start_char = "A" if is_upper else "a"
    end_char = "Z" if is_upper else "z"
    i = len(chars) - 1
    while i >= 0:
        if chars[i] != start_char:
            chars[i] = chr(ord(chars[i]) - 1)
            return "".join(chars)
        else:
            chars[i] = end_char
            i -= 1
    return "".join(chars[1:])




[docs]
def decrement_digit(text: str, remove_if_one: bool = True) -> str:
    """Decrements the last digit in a text.

    Args:
        text: Text to decrement.
        remove_if_one: If True, removes the digit if it becomes 0 or 1.
                       Defaults to True.

    Returns:
        Text with the decremented digit, or the original text if no digit is found.
    """
    match = _INCREMENT_DIGIT_RE.search(text)
    if match:
        current_digit = match.group(1)
        current_value = int(current_digit)
        if remove_if_one and current_value <= 1:
            return text[: match.start()]
        if current_value <= 0:
            return text
        digits = len(current_digit)
        decremented_digit = str(current_value - 1).zfill(digits)
        return text[: match.start()] + decremented_digit

    return text