nat.profiler.callbacks.langchain_callback_handler#

Attributes#

logger

Classes#

LangchainProfilerHandler

Callback Handler that tracks NIM info.

Functions#

_extract_tools_schema(→ list)

Module Contents#

logger#

_extract_tools_schema(invocation_params: dict) → list#

class LangchainProfilerHandler#

Bases: langchain_core.callbacks.AsyncCallbackHandler, nat.profiler.callbacks.base_callback_class.BaseProfilerCallback

Callback Handler that tracks NIM info.

total_tokens: int = 0#

prompt_tokens: int = 0#

completion_tokens: int = 0#

successful_requests: int = 0#

raise_error = True#: Whether to raise an error if an exception occurs.

run_inline = True#: Whether to run the callback inline.

_lock#

last_call_ts#

step_manager#

_state#

_run_id_to_model_name#

_run_id_to_llm_input#

_run_id_to_tool_input#

_run_id_to_start_time#

property always_verbose: bool#: Whether to call verbose callbacks even if verbose is False.

_extract_token_base_model( usage_metadata: dict[str, Any], ) → nat.profiler.callbacks.token_usage_base_model.TokenUsageBaseModel#

async on_llm_start(serialized: dict[str, Any], prompts: list[str], \*\*kwargs: Any) → None#

Run when the model starts running.

!!! warning: This method is called for non-chat models (regular LLMs). If you’re implementing a handler for a chat model, you should use on_chat_model_start instead.
Args:: serialized: The serialized LLM. prompts: The prompts. run_id: The run ID. This is the ID of the current run. parent_run_id: The parent run ID. This is the ID of the parent run. tags: The tags. metadata: The metadata. **kwargs: Additional keyword arguments.

async on_chat_model_start(serialized: dict[str, Any], messages: list[list[langchain_core.messages.BaseMessage]], *, run_id: uuid.UUID, parent_run_id: uuid.UUID | None = None, tags: list[str] | None = None, metadata: dict[str, Any] | None = None, \*\*kwargs: Any) → Any#

Run when a chat model starts running.

!!! warning: This method is called for chat models. If you’re implementing a handler for a non-chat model, you should use on_llm_start instead.
Args:: serialized: The serialized chat model. messages: The messages. run_id: The run ID. This is the ID of the current run. parent_run_id: The parent run ID. This is the ID of the parent run. tags: The tags. metadata: The metadata. **kwargs: Additional keyword arguments.

async on_llm_new_token(token: str, \*\*kwargs: Any) → None#: Collect stats for just the token

async on_llm_end( response: langchain_core.outputs.LLMResult, \*\*kwargs: Any, ) → None#: Collect token usage.

async on_tool_start(serialized: dict[str, Any], input_str: str, *, run_id: uuid.UUID, parent_run_id: uuid.UUID | None = None, tags: list[str] | None = None, metadata: dict[str, Any] | None = None, inputs: dict[str, Any] | None = None, \*\*kwargs: Any) → Any#

Run when the tool starts running.

Args:: serialized: The serialized tool. input_str: The input string. run_id: The run ID. This is the ID of the current run. parent_run_id: The parent run ID. This is the ID of the parent run. tags: The tags. metadata: The metadata. inputs: The inputs. **kwargs: Additional keyword arguments.

async on_tool_end( output: Any, *, run_id: uuid.UUID, parent_run_id: uuid.UUID | None = None, \*\*kwargs: Any, ) → Any#

Run when the tool ends running.

Args:: output: The output of the tool. run_id: The run ID. This is the ID of the current run. parent_run_id: The parent run ID. This is the ID of the parent run. tags: The tags. **kwargs: Additional keyword arguments.