Source code for nv_ingest_api.util.string_processing.yaml
import os
import re
from typing import Optional
# This regex finds all forms of environment variables:
# $VAR, ${VAR}, $VAR|default, and ${VAR|default}
# It avoids matching escaped variables like $$.
# Default values can be quoted or unquoted.
_ENV_VAR_PATTERN = re.compile(
r"""(?<!\$)\$(?:
{(?P<braced>\w+)(?:\|(?P<braced_default>[^}]+))?}
|
(?P<named>\w+)(?:\|(?P<named_default>"[^"\\]*(?:\\.[^"\\]*)*"|'[^'\\]*(?:\\.[^'\\]*)*'|\S+))?
)""",
re.VERBOSE,
)
# YAML special characters that require quoting to prevent parsing errors
# Focus on characters that cause parsing issues when unquoted in value positions
_YAML_SPECIAL_CHARS = re.compile(r'[:\[\]{}#&*!|>\'"%@`]')
def _quote_if_needed(value: str) -> str:
"""
Quote value if it contains YAML special characters that could cause parsing errors.
Specifically handles cases like values ending with ':' which YAML interprets as mappings.
"""
if not value:
return value
# Quote if value ends with ':' (prevents mapping interpretation - the specific bug case)
# or contains other problematic chars that would cause parsing errors
if value.endswith(":") or _YAML_SPECIAL_CHARS.search(value):
# Escape internal double quotes and wrap in double quotes
escaped = value.replace('"', '\\"')
return f'"{escaped}"'
return value
def _replacer(match: re.Match) -> str:
"""Replaces a regex match with the corresponding environment variable."""
var_name = match.group("braced") or match.group("named")
default_val = match.group("braced_default") or match.group("named_default")
# First try the primary env var
value = os.environ.get(var_name)
if value is not None:
return _quote_if_needed(value)
# If primary is missing, try the default.
resolved_default = _resolve_default_with_single_fallback(default_val)
if resolved_default is None:
return ""
# If default was already quoted, preserve it as-is
if resolved_default and (
(resolved_default.startswith('"') and resolved_default.endswith('"'))
or (resolved_default.startswith("'") and resolved_default.endswith("'"))
):
return resolved_default
return _quote_if_needed(resolved_default)
def _is_var_ref(token: str) -> Optional[str]:
"""If token is a $VAR or ${VAR} reference, return VAR name; else None."""
if not token:
return None
if token.startswith("${") and token.endswith("}"):
inner = token[2:-1]
return inner if re.fullmatch(r"\w+", inner) else None
if token.startswith("$"):
inner = token[1:]
return inner if re.fullmatch(r"\w+", inner) else None
return None
def _resolve_default_with_single_fallback(default_val: Optional[str]) -> Optional[str]:
"""
Support a single-level fallback where the default itself can be another env var.
For example, in $A|$B or ${A|$B}, we try B if A missing.
"""
if default_val is None:
return None
var = _is_var_ref(default_val)
if var is not None:
return os.environ.get(var, None)
return default_val
[docs]
def substitute_env_vars_in_yaml_content(raw_content: str) -> str:
"""
Substitutes environment variables in a YAML string.
This function finds all occurrences of environment variable placeholders
($VAR, ${VAR}, $VAR|default, ${VAR|default}) in the input string
and replaces them with their corresponding environment variable values.
Also supports a single fallback to another env var: $VAR|$OTHER, ${VAR|$OTHER}
Quoted defaults are preserved EXACTLY as written (e.g., 'a,b' keeps quotes).
Args:
raw_content: The raw string content of a YAML file.
Returns:
The YAML string with environment variables substituted.
"""
return _ENV_VAR_PATTERN.sub(_replacer, raw_content)