services.model_client#

Module Contents#

Classes#

AsyncLLMClient

Interface representing a client connecting to an LLM inference server and making requests asynchronously

LLMClient

Interface representing a client connecting to an LLM inference server and making requests synchronously

API#

class services.model_client.AsyncLLMClient#

Bases: abc.ABC

Interface representing a client connecting to an LLM inference server and making requests asynchronously

abstractmethod async query_model(
*,
messages: collections.abc.Iterable,
model: str,
conversation_formatter: nemo_curator.services.conversation_formatter.ConversationFormatter | None = None,
max_tokens: int | None = None,
n: int | None = 1,
seed: int | None = None,
stop: str | None | list[str] = None,
stream: bool = False,
temperature: float | None = None,
top_k: int | None = None,
top_p: float | None = None,
) list[str]#
abstractmethod async query_reward_model(
*,
messages: collections.abc.Iterable,
model: str,
conversation_formatter: nemo_curator.services.conversation_formatter.ConversationFormatter | None = None,
) dict#
class services.model_client.LLMClient#

Bases: abc.ABC

Interface representing a client connecting to an LLM inference server and making requests synchronously

abstractmethod query_model(
*,
messages: collections.abc.Iterable,
model: str,
conversation_formatter: nemo_curator.services.conversation_formatter.ConversationFormatter | None = None,
max_tokens: int | None = None,
n: int | None = 1,
seed: int | None = None,
stop: str | None | list[str] = None,
stream: bool = False,
temperature: float | None = None,
top_k: int | None = None,
top_p: float | None = None,
) list[str]#
abstractmethod query_reward_model(
*,
messages: collections.abc.Iterable,
model: str,
conversation_formatter: nemo_curator.services.conversation_formatter.ConversationFormatter | None = None,
) dict#