module nemoguardrails.streaming#
class StreamingHandler#
Streaming async handler.
Implements the LangChain AsyncCallbackHandler, so it can be notified of new tokens. It also implements the AsyncIterator interface, so it can be used directly to stream back the response.
method StreamingHandler.__init__#
__init__(enable_print: bool = False, enable_buffer: bool = False)
property StreamingHandler.ignore_agent#
Whether to ignore agent callbacks.
property StreamingHandler.ignore_chain#
Whether to ignore chain callbacks.
property StreamingHandler.ignore_chat_model#
Whether to ignore chat model callbacks.
property StreamingHandler.ignore_llm#
Whether to ignore LLM callbacks.
property StreamingHandler.ignore_retriever#
Whether to ignore retriever callbacks.
property StreamingHandler.ignore_retry#
Whether to ignore retry callbacks.
method StreamingHandler.disable_buffering#
disable_buffering()
When we disable the buffer, we process the buffer as a chunk.
method StreamingHandler.enable_buffering#
enable_buffering()
method StreamingHandler.on_chat_model_start#
on_chat_model_start(
serialized: Dict[str, Any],
messages: List[List[langchain.schema.messages.BaseMessage]],
run_id: uuid.UUID,
parent_run_id: Optional[uuid.UUID] = None,
tags: Optional[List[str]] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs: Any
) → Any
method StreamingHandler.on_llm_end#
on_llm_end(
response: langchain.schema.output.LLMResult,
run_id: uuid.UUID,
parent_run_id: Optional[uuid.UUID] = None,
tags: Optional[List[str]] = None,
**kwargs: Any
) → None
Run when LLM ends running.
method StreamingHandler.on_llm_new_token#
on_llm_new_token(
token: str,
chunk: Optional[langchain.schema.output.GenerationChunk, langchain.schema.output.ChatGenerationChunk] = None,
run_id: uuid.UUID,
parent_run_id: Optional[uuid.UUID] = None,
tags: Optional[List[str]] = None,
**kwargs: Any
) → None
Run on new LLM token. Only available when streaming is enabled.
method StreamingHandler.push_chunk#
push_chunk(
chunk: Optional[str, langchain.schema.output.GenerationChunk, langchain.schema.messages.AIMessageChunk]
)
Push a new chunk to the stream.
method StreamingHandler.set_pattern#
set_pattern(prefix: Optional[str] = None, suffix: Optional[str] = None)
Sets the patter that is expected.
If a prefix or a suffix are specified, they will be removed from the output.
method StreamingHandler.set_pipe_to#
set_pipe_to(another_handler)
method StreamingHandler.wait#
wait()
Waits until the stream finishes and returns the full completion.
method StreamingHandler.wait_top_k_nonempty_lines#
wait_top_k_nonempty_lines(k: int)
Waits for top k non-empty lines from the LLM.
When k lines have been received (and k+1 has been started) it will return and remove them from the buffer