nemo_rl.models.generation.vllm.vllm_backend#

Module Contents#

Classes#

API#

class nemo_rl.models.generation.vllm.vllm_backend.VllmInternalWorkerExtension#
init_collective(
rank_prefix: int,
ip: str,
port: int,
world_size: int,
train_world_size: int,
) None#

Initialize the collective communication.

report_device_id() str#

Retrieve the UUID of the current CUDA device.

get_zmq_address()#

Get the ZMQ address for the current device.

maybe_init_zmq()#

Initialize the ZMQ socket if it doesn’t exist.

prepare_refit_info(state_dict_info: dict[str, Any]) None#

Prepare state dict metadata for weight refitting and IPC streaming.

Parameters:

state_dict_info (dict) – A dictionary containing the info for refit. e.g. {tensor_name: (shape, dtype)}

update_weights_via_ipc_zmq() bool#

Receive and update model weights via ZMQ IPC socket.

Returns:

True if weights were successfully updated.

Return type:

bool

update_weights_from_collective() bool#

Update the model weights from collective communication.

cleanup() None#

Shutdown and cleanup resources.

start_gpu_profiling() None#

Start GPU profiling.

stop_gpu_profiling() None#

Stop GPU profiling.