nemo_rl.utils.r3_trace#

Module Contents#

Functions#

Data#

API#

nemo_rl.utils.r3_trace._TRACE_ENV#

‘NRL_R3_TRACE’

nemo_rl.utils.r3_trace._TRACE_STEPS_ENV#

‘NRL_R3_TRACE_STEPS’

nemo_rl.utils.r3_trace._TRACE_SAMPLES_ENV#

‘NRL_R3_TRACE_SAMPLES’

nemo_rl.utils.r3_trace._TRACE_DIR_ENV#

‘NRL_R3_TRACE_DIR’

nemo_rl.utils.r3_trace._TRACE_MICROBATCHES_ENV#

‘NRL_R3_TRACE_MICROBATCHES’

nemo_rl.utils.r3_trace._TRACE_VERIFY_FORWARD_ENV#

‘NRL_R3_TRACE_VERIFY_FORWARD’

nemo_rl.utils.r3_trace._DEFAULT_TRACE_DIR#

‘logs/r3_trace’

nemo_rl.utils.r3_trace._DEFAULT_TRACE_STEPS#

1

nemo_rl.utils.r3_trace._DEFAULT_TRACE_SAMPLES#

2

nemo_rl.utils.r3_trace._DEFAULT_TRACE_MICROBATCHES#

2

nemo_rl.utils.r3_trace._write_lock#

‘Lock(…)’

nemo_rl.utils.r3_trace._patch_lock#

‘Lock(…)’

nemo_rl.utils.r3_trace._router_replay_patch_depth#

0

nemo_rl.utils.r3_trace._original_get_replay_topk: Optional[Any]#

None

nemo_rl.utils.r3_trace._event_counts: dict[str, int]#

‘defaultdict(…)’

nemo_rl.utils.r3_trace._context: contextvars.ContextVar[Optional[dict[str, Any]]]#

‘ContextVar(…)’

nemo_rl.utils.r3_trace.r3_trace_enabled() bool[source]#
nemo_rl.utils.r3_trace.r3_trace_verify_forward_enabled() bool[source]#
nemo_rl.utils.r3_trace._env_int(name: str, default: int) int[source]#
nemo_rl.utils.r3_trace._trace_steps() int[source]#
nemo_rl.utils.r3_trace._trace_samples() int[source]#
nemo_rl.utils.r3_trace._trace_microbatches() int[source]#
nemo_rl.utils.r3_trace._next_count(name: str) int[source]#
nemo_rl.utils.r3_trace._should_trace_step(counter_name: str) tuple[bool, int][source]#
nemo_rl.utils.r3_trace._current_context() Optional[dict[str, Any]][source]#
nemo_rl.utils.r3_trace._torch_rank_info() dict[str, Any][source]#
nemo_rl.utils.r3_trace._trace_path() pathlib.Path[source]#
nemo_rl.utils.r3_trace._write_record(record: dict[str, Any]) None[source]#
nemo_rl.utils.r3_trace._tensor_sha256(tensor: Any) str[source]#
nemo_rl.utils.r3_trace._tensor_preview(
tensor: Any,
limit: int = 16,
) list[Any][source]#
nemo_rl.utils.r3_trace._tensor_record(
tensor: Any,
*,
preview_limit: int = 16,
) dict[str, Any][source]#
nemo_rl.utils.r3_trace._shape(
tensor: Optional[Any],
) Optional[list[int]][source]#
nemo_rl.utils.r3_trace._valid_sample_record(
tensor: Any,
*,
sample_idx: int,
valid_length: int,
preview_limit: int = 16,
) dict[str, Any][source]#
nemo_rl.utils.r3_trace._length_at(lengths: Any, sample_idx: int) int[source]#
nemo_rl.utils.r3_trace._tensors_equal(lhs: Any, rhs: Any) bool[source]#
nemo_rl.utils.r3_trace._expected_with_missing_route_fallback(
expected: Any,
actual: Any,
) tuple[Any, int][source]#
nemo_rl.utils.r3_trace._router_replay_action_name(action: Any) str[source]#
nemo_rl.utils.r3_trace._trace_router_replay_topk_use(
*,
replay_instance: Any,
action: Any,
scores: Any,
topk: int,
expected: Optional[Any],
actual: Any,
backward_list_len_before: Optional[int],
backward_list_len_after: Optional[int],
) None[source]#
nemo_rl.utils.r3_trace._verify_router_replay_forward_context() collections.abc.Iterator[None][source]#
nemo_rl.utils.r3_trace.trace_rollout_payload(
*,
keys: collections.abc.Sequence[str],
data: Any,
) None[source]#
nemo_rl.utils.r3_trace.trace_tq_fetch_payload(
*,
stage: str,
keys: collections.abc.Sequence[str],
data: Any,
) None[source]#
nemo_rl.utils.r3_trace.r3_trace_stage(stage: str) collections.abc.Iterator[None][source]#
nemo_rl.utils.r3_trace.maybe_r3_trace_stage(stage: str, *, enabled: bool) Any[source]#
nemo_rl.utils.r3_trace.trace_cp_routed_experts(
*,
routed_experts_cp_sharded: Any,
token_identity_cp_sharded: Optional[Any] = None,
input_ids_cp_sharded: Optional[Any] = None,
cp_token_identity_verified_count: Optional[int] = None,
cp_rank: int,
cp_size: int,
) None[source]#
nemo_rl.utils.r3_trace.trace_router_replay_assignment(
*,
layer_number: int,
payload_idx: int,
replay_tensor: Any,
) None[source]#
nemo_rl.utils.r3_trace.trace_router_replay_action(
*,
action: str,
layer_number: Optional[int],
replay_tensor: Optional[Any] = None,
replay_backward_list_len: Optional[int] = None,
) None[source]#