`services.model_client`#

Module Contents#

`AsyncLLMClient`	Interface representing a client connecting to an LLM inference server and making requests asynchronously
`LLMClient`	Interface representing a client connecting to an LLM inference server and making requests synchronously

class services.model_client.AsyncLLMClient#

Bases: abc.ABC

Interface representing a client connecting to an LLM inference server and making requests asynchronously

abstract async query_model( *, messages: collections.abc.Iterable, model: str, conversation_formatter: nemo_curator.services.conversation_formatter.ConversationFormatter | None = None, max_tokens: int | None = None, n: int | None = 1, seed: int | None = None, stop: str | None | list[str] = None, stream: bool = False, temperature: float | None = None, top_k: int | None = None, top_p: float | None = None, ) → list[str]#

abstract async query_reward_model( *, messages: collections.abc.Iterable, model: str, conversation_formatter: nemo_curator.services.conversation_formatter.ConversationFormatter | None = None, ) → dict#

class services.model_client.LLMClient#

Bases: abc.ABC

Interface representing a client connecting to an LLM inference server and making requests synchronously

abstract query_model( *, messages: collections.abc.Iterable, model: str, conversation_formatter: nemo_curator.services.conversation_formatter.ConversationFormatter | None = None, max_tokens: int | None = None, n: int | None = 1, seed: int | None = None, stop: str | None | list[str] = None, stream: bool = False, temperature: float | None = None, top_k: int | None = None, top_p: float | None = None, ) → list[str]#

abstract query_reward_model( *, messages: collections.abc.Iterable, model: str, conversation_formatter: nemo_curator.services.conversation_formatter.ConversationFormatter | None = None, ) → dict#