services.model_client
#
Module Contents#
Classes#
Interface representing a client connecting to an LLM inference server and making requests asynchronously |
|
Interface representing a client connecting to an LLM inference server and making requests synchronously |
API#
- class services.model_client.AsyncLLMClient#
Bases:
abc.ABC
Interface representing a client connecting to an LLM inference server and making requests asynchronously
- abstractmethod async query_model(
- *,
- messages: collections.abc.Iterable,
- model: str,
- conversation_formatter: nemo_curator.services.conversation_formatter.ConversationFormatter | None = None,
- max_tokens: int | None = None,
- n: int | None = 1,
- seed: int | None = None,
- stop: str | None | list[str] = None,
- stream: bool = False,
- temperature: float | None = None,
- top_k: int | None = None,
- top_p: float | None = None,
- abstractmethod async query_reward_model(
- *,
- messages: collections.abc.Iterable,
- model: str,
- conversation_formatter: nemo_curator.services.conversation_formatter.ConversationFormatter | None = None,
- class services.model_client.LLMClient#
Bases:
abc.ABC
Interface representing a client connecting to an LLM inference server and making requests synchronously
- abstractmethod query_model(
- *,
- messages: collections.abc.Iterable,
- model: str,
- conversation_formatter: nemo_curator.services.conversation_formatter.ConversationFormatter | None = None,
- max_tokens: int | None = None,
- n: int | None = 1,
- seed: int | None = None,
- stop: str | None | list[str] = None,
- stream: bool = False,
- temperature: float | None = None,
- top_k: int | None = None,
- top_p: float | None = None,
- abstractmethod query_reward_model(
- *,
- messages: collections.abc.Iterable,
- model: str,
- conversation_formatter: nemo_curator.services.conversation_formatter.ConversationFormatter | None = None,