Multi-Turn Chat | NVIDIA NeMo Data Designer

Download Recipe

1 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 # SPDX-License-Identifier: Apache-2.0
3 # /// script
4 # requires-python = ">=3.10"
5 # dependencies = [
6 #     "data-designer",
7 #     "pydantic",
8 # ]
9 # ///
10 """Multi-Turn Chat Generation Recipe
11 
12 Generate synthetic multi-turn conversations between users and AI assistants across
13 different domains (Tech Support, Personal Finances, Educational Guidance). Each
14 conversation varies in length, complexity, and user mood. Includes toxicity
15 evaluation of user messages using an LLM judge.
16 
17 Prerequisites:
18     - OPENAI_API_KEY environment variable for OpenAI provider model aliases (default model alias is "openai-text").
19     - NVIDIA_API_KEY environment variable for NVIDIA provider model aliases.
20 
21 Run:
22     # Basic usage (generates 5 records by default)
23     uv run multi_turn_chat.py
24 
25     # For help message and available options
26     uv run multi_turn_chat.py --help
27 """
28 
29 from pathlib import Path
30 from typing import Literal
31 
32 from pydantic import BaseModel, Field
33 
34 import data_designer.config as dd
35 from data_designer.interface import DataDesigner, DatasetCreationResults
36 
37 
38 def build_config(model_alias: str) -> dd.DataDesignerConfigBuilder:
39     config_builder = dd.DataDesignerConfigBuilder()
40 
41     config_builder.add_column(
42         dd.SamplerColumnConfig(
43             name="domain",
44             sampler_type=dd.SamplerType.CATEGORY,
45             params=dd.CategorySamplerParams(values=["Tech Support", "Personal Finances", "Educational Guidance"]),
46         )
47     )
48 
49     config_builder.add_column(
50         dd.SamplerColumnConfig(
51             name="topic",
52             sampler_type=dd.SamplerType.SUBCATEGORY,
53             params=dd.SubcategorySamplerParams(
54                 category="domain",
55                 values={
56                     "Tech Support": [
57                         "Troubleshooting a Laptop",
58                         "Setting Up a Home Wi-Fi Network",
59                         "Installing Software Updates",
60                     ],
61                     "Personal Finances": [
62                         "Budgeting Advice",
63                         "Understanding Taxes",
64                         "Investment Strategies",
65                     ],
66                     "Educational Guidance": [
67                         "Choosing a College Major",
68                         "Effective Studying Techniques",
69                         "Learning a New Language",
70                     ],
71                 },
72             ),
73         )
74     )
75 
76     config_builder.add_column(
77         dd.SamplerColumnConfig(
78             name="complexity",
79             sampler_type=dd.SamplerType.CATEGORY,
80             params=dd.CategorySamplerParams(values=["Basic", "Intermediate", "Advanced"]),
81         )
82     )
83 
84     config_builder.add_column(
85         dd.SamplerColumnConfig(
86             name="conversation_length",
87             sampler_type=dd.SamplerType.CATEGORY,
88             params=dd.CategorySamplerParams(values=[2, 4, 6, 8]),
89         )
90     )
91 
92     config_builder.add_column(
93         dd.SamplerColumnConfig(
94             name="user_mood",
95             sampler_type=dd.SamplerType.CATEGORY,
96             params=dd.CategorySamplerParams(
97                 values=["happy", "silly", "sarcastic", "combative", "disappointed", "toxic"]
98             ),
99         )
100     )
101 
102     config_builder.add_column(
103         dd.LLMTextColumnConfig(
104             name="assistant_system_prompt",
105             prompt=(
106                 "Write a reasonable system prompt for a helpful AI assistant with expertise in "
107                 "{{domain}} and {{topic}}. The AI assistant must not engage in harmful behaviors."
108             ),
109             model_alias=model_alias,
110         )
111     )
112 
113     config_builder.add_column(
114         dd.LLMTextColumnConfig(
115             name="user_task",
116             prompt="Define a simple task related to {{topic}} of {{complexity}} complexity for the user.",
117             model_alias=model_alias,
118         )
119     )
120 
121     config_builder.add_column(
122         dd.LLMStructuredColumnConfig(
123             name="conversation",
124             prompt=(
125                 "<task>\n{{user_task}}\n</task>\n\n"
126                 "<system_prompt>{{assistant_system_prompt}}</system_prompt>\n\n"
127                 "Generate a conversation between a user and an AI assistant with <system_prompt> about <task>.\n"
128                 "User is asking the assistant for advice and is in a {{user_mood}} mood.\n"
129                 "The conversation must be {{conversation_length}} messages in length.\n"
130                 "The conversation must come to a natural end in {{conversation_length}} messages and if the assistant is unable "
131                 "to solve the user's needs by then, they should offer to continue the conversation later or redirect them to additional resources.\n"
132                 "The conversation must be realistic and natural, incorporating details from both the user's and assistant's profiles.\n"
133                 "The AI assistant role has the system prompt defined in <system_prompt>. All of its responses must conform to <system_prompt>:\n"
134             ),
135             output_format=ChatConversation,
136             model_alias=model_alias,
137         )
138     )
139 
140     config_builder.add_column(
141         dd.LLMJudgeColumnConfig(
142             name="toxicity_evaluation",
143             prompt=TOXICITY_PROMPT_TEMPLATE,
144             scores=[toxicity_score],
145             model_alias=model_alias,
146         )
147     )
148 
149     return config_builder
150 
151 
152 def create_dataset(
153     config_builder: dd.DataDesignerConfigBuilder,
154     num_records: int,
155     artifact_path: Path | str | None = None,
156 ) -> DatasetCreationResults:
157     data_designer = DataDesigner(artifact_path=artifact_path)
158     results = data_designer.create(config_builder, num_records=num_records)
159     return results
160 
161 
162 toxicity_score = dd.Score(
163     name="Toxicity",
164     description="Evaluation of harmful, harassing, or hateful language in conversations",
165     options={
166         4: "No toxicity detected; conversation is respectful and appropriate.",
167         3: "Mild toxicity; slightly rude or sarcastic but not hateful or harmful.",
168         2: "Moderate toxicity; contains disrespectful or harassing language.",
169         1: "Severe toxicity; contains overt hate, harassment, or harmful content.",
170         0: "Extreme toxicity; contains dangerous, threatening, or abusive content.",
171     },
172 )
173 
174 TOXICITY_PROMPT_TEMPLATE = (
175     "You are an expert in content moderation and digital communication ethics. "
176     "You are evaluating the toxicity level in user messages.\n"
177     "Take a deep breath and carefully analyze the conversation provided. \n"
178     "Use the rubric to score the level of toxicity in the USER'S messages only.\n"
179     "Consider:\n"
180     "- Focus primarily on harassment, hate, or harmful language\n"
181     "- Evaluate the full context of the conversation\n"
182     "- Be objective and consistent in your assessment\n"
183     "- If any single message exhibits toxicity, it should influence the final score\n"
184     "## CONVERSATION\n"
185     "{{ conversation }}"
186 )
187 
188 
189 class Message(BaseModel):
190     """A single message turn in the conversation."""
191 
192     role: Literal["user", "assistant"] = Field(..., description="Which role is writing the message.")
193     content: str = Field(..., description="Message contents.")
194 
195 
196 class ChatConversation(BaseModel):
197     """A chat conversation between a specific user and an AI assistant.
198     * All conversations are initiated by the user role.
199     * The assistant role always responds to the user message.
200     * Turns alternate between user and assistant roles.
201     * The last message is always from the assistant role.
202     * Message content can be long or short.
203     * All assistant messages are faithful responses and must be answered fully.
204     """
205 
206     conversation: list[Message] = Field(..., description="List of all messages in the conversation.")
207 
208 
209 if __name__ == "__main__":
210     from argparse import ArgumentParser
211 
212     parser = ArgumentParser()
213     parser.add_argument("--model-alias", type=str, default="openai-text")
214     parser.add_argument("--num-records", type=int, default=5)
215     parser.add_argument("--artifact-path", type=str, default=None)
216     args = parser.parse_args()
217 
218     config_builder = build_config(model_alias=args.model_alias)
219     results = create_dataset(config_builder, num_records=args.num_records, artifact_path=args.artifact_path)
220 
221     print(f"Dataset saved to: {results.artifact_storage.final_dataset_path}")
222 
223     results.load_analysis().to_report()