nemo_curator.stages.text.experimental.translation.pipeline

View as Markdown

Experimental translation pipeline composition.

Module Contents

Classes

NameDescription
TranslationStageExperimental composite stage for translation and optional quality scoring.

Data

_VALID_OUTPUT_MODES

API

class nemo_curator.stages.text.experimental.translation.pipeline.TranslationStage(
name: str = 'TranslationStage',
source_lang: str,
target_lang: str,
text_field: str | list[str] = 'text',
output_field: str = 'translated_text',
segmentation_mode: str = 'coarse',
min_segment_chars: int = 0,
client: nemo_curator.models.client.llm_client.AsyncLLMClient | None = None,
model_name: str = '',
generation_config: nemo_curator.models.client.llm_client.GenerationConfig | None = None,
backend_type: str = 'llm',
backend_config: dict = dict(),
enable_faith_eval: bool = False,
faith_threshold: float = 2.5,
faith_model_name: str = '',
filter_enabled: bool = True,
output_mode: str = 'replaced',
merge_scores: bool = False,
reconstruct_messages: bool = False,
messages_field: str = 'messages',
messages_content_field: str = 'content',
skip_translated: bool = False,
translation_column: str = 'translated_text'
)
Dataclass

Bases: CompositeStage[DocumentBatch, DocumentBatch]

Experimental composite stage for translation and optional quality scoring.

backend_config
dict = field(default_factory=dict)
backend_type
str = 'llm'
client
AsyncLLMClient | None = None
enable_faith_eval
bool = False
faith_model_name
str = ''
faith_threshold
float = 2.5
filter_enabled
bool = True
generation_config
GenerationConfig | None = None
merge_scores
bool = False
messages_content_field
str = 'content'
messages_field
str = 'messages'
min_segment_chars
int = 0
model_name
str = ''
name
str = 'TranslationStage'
output_field
str = 'translated_text'
output_mode
str = 'replaced'
reconstruct_messages
bool = False
segmentation_mode
str = 'coarse'
skip_translated
bool = False
source_lang
str
target_lang
str
text_field
str | list[str] = 'text'
translation_column
str = 'translated_text'
nemo_curator.stages.text.experimental.translation.pipeline.TranslationStage.__post_init__() -> None
nemo_curator.stages.text.experimental.translation.pipeline.TranslationStage._build_stages() -> list[nemo_curator.stages.base.ProcessingStage]

Construct the ordered list of sub-stages.

nemo_curator.stages.text.experimental.translation.pipeline.TranslationStage._validate_faith_config() -> None

Validate optional FAITH scoring configuration.

nemo_curator.stages.text.experimental.translation.pipeline.TranslationStage._validate_languages() -> None

Validate source and target language codes.

nemo_curator.stages.text.experimental.translation.pipeline.TranslationStage._validate_output_mode() -> None

Validate requested output mode.

nemo_curator.stages.text.experimental.translation.pipeline.TranslationStage._validate_score_merging() -> None

Validate score-merging options.

nemo_curator.stages.text.experimental.translation.pipeline.TranslationStage._validate_translation_backend() -> None

Validate backend-specific translation requirements.

nemo_curator.stages.text.experimental.translation.pipeline.TranslationStage.decompose() -> list[nemo_curator.stages.base.ProcessingStage]

Return the ordered sub-stages for pipeline execution.

nemo_curator.stages.text.experimental.translation.pipeline._VALID_OUTPUT_MODES = {'replaced', 'raw', 'both'}