nemo_rl.environments.code_environment#

Module Contents#

Classes#

CodeEnvConfig

CodeEnvMetadata

CodeExecutionWorker

Helper class to process individual code execution steps.

CodeEnvironment

Code execution environment that maintains state between steps.

API#

class nemo_rl.environments.code_environment.CodeEnvConfig#

Bases: typing.TypedDict

num_workers: int#

None

terminate_on_evaluation: bool#

None

class nemo_rl.environments.code_environment.CodeEnvMetadata#

Bases: typing.TypedDict

context: Dict[str, Any]#

None

working_dir: str#

None

class nemo_rl.environments.code_environment.CodeExecutionWorker#

Helper class to process individual code execution steps.

Initialization

sanitize(obj: Any) Any#
format_result(
result: Any,
code: Optional[str] = None,
lookahead: Optional[str] = None,
) str#
execute(
message_batch: str,
metadata_batch: List[nemo_rl.environments.code_environment.CodeEnvMetadata],
) Tuple[List[Dict[str, str]], List[bool], List[Any]]#

Execute code in a sandboxed environment.

chdir(dir: str)#

Change to temporary directory for file operations.

safe_open(file: str, *args, **kwargs)#

Safe version of open() that only allows access to temporary directory.

safe_import(name: str, *args, **kwargs)#

Safe version of import that blocks risky modules.

class nemo_rl.environments.code_environment.CodeEnvironment(
cfg: nemo_rl.environments.code_environment.CodeEnvConfig,
)#

Bases: nemo_rl.environments.interfaces.EnvironmentInterface

Code execution environment that maintains state between steps.

Initialization

step(
message_log_batch: List[nemo_rl.data.interfaces.LLMMessageLogType],
metadata_batch: List[nemo_rl.environments.code_environment.CodeEnvMetadata],
) nemo_rl.environments.interfaces.EnvironmentReturn#

Process a batch of code execution steps.

shutdown()#
global_post_process_and_metrics(
batch: nemo_rl.distributed.batched_data_dict.BatchedDataDict,
) Tuple[nemo_rl.distributed.batched_data_dict.BatchedDataDict, dict]#

Compute metrics for the batch.