Product Info QA | NVIDIA NeMo Data Designer

Download Recipe

1 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 # SPDX-License-Identifier: Apache-2.0
3 # /// script
4 # requires-python = ">=3.10"
5 # dependencies = [
6 #     "data-designer",
7 #     "pydantic",
8 # ]
9 # ///
10 """Product Info Q&A Recipe
11 
12 Generate synthetic product Q&A pairs with controlled hallucination for training
13 and evaluating AI assistants. Each record includes a generated product (with name,
14 features, description, price), a user question, and an AI answer. Half of the
15 answers are generated without product context (hallucinated). Includes LLM judge
16 evaluation for completeness and accuracy.
17 
18 Prerequisites:
19     - OPENAI_API_KEY environment variable for OpenAI provider model aliases (default model alias is "openai-text").
20     - NVIDIA_API_KEY environment variable for NVIDIA provider model aliases.
21 
22 Run:
23     # Basic usage (generates 5 records by default)
24     uv run product_info_qa.py
25 
26     # For help message and available options
27     uv run product_info_qa.py --help
28 """
29 
30 import string
31 from pathlib import Path
32 
33 from pydantic import BaseModel, Field
34 
35 import data_designer.config as dd
36 from data_designer.interface import DataDesigner, DatasetCreationResults
37 
38 
39 def build_config(model_alias: str) -> dd.DataDesignerConfigBuilder:
40     config_builder = dd.DataDesignerConfigBuilder()
41     config_builder.add_column(
42         dd.SamplerColumnConfig(
43             name="category",
44             sampler_type=dd.SamplerType.CATEGORY,
45             params=dd.CategorySamplerParams(
46                 values=[
47                     "Electronics",
48                     "Clothing",
49                     "Home Appliances",
50                     "Groceries",
51                     "Toiletries",
52                     "Sports Equipment",
53                     "Toys",
54                     "Books",
55                     "Pet Supplies",
56                     "Tools & Home Improvement",
57                     "Beauty",
58                     "Health & Wellness",
59                     "Outdoor Gear",
60                     "Automotive",
61                     "Jewelry",
62                     "Watches",
63                     "Office Supplies",
64                     "Gifts",
65                     "Arts & Crafts",
66                     "Baby & Kids",
67                     "Music",
68                     "Video Games",
69                     "Movies",
70                     "Software",
71                     "Tech Devices",
72                 ]
73             ),
74         )
75     )
76 
77     config_builder.add_column(
78         dd.SamplerColumnConfig(
79             name="price_tens_of_dollars",
80             sampler_type=dd.SamplerType.UNIFORM,
81             params=dd.UniformSamplerParams(low=1, high=200),
82         )
83     )
84 
85     config_builder.add_column(
86         dd.ExpressionColumnConfig(
87             name="product_price",
88             expr="{{ (price_tens_of_dollars * 10) - 0.01 | round(2) }}",
89             dtype="float",
90         )
91     )
92 
93     config_builder.add_column(
94         dd.SamplerColumnConfig(
95             name="first_letter",
96             sampler_type=dd.SamplerType.CATEGORY,
97             params=dd.CategorySamplerParams(values=list(string.ascii_uppercase)),
98         )
99     )
100 
101     config_builder.add_column(
102         dd.SamplerColumnConfig(
103             name="is_hallucination",
104             sampler_type=dd.SamplerType.BERNOULLI,
105             params=dd.BernoulliSamplerParams(p=0.5),
106         )
107     )
108 
109     config_builder.add_column(
110         dd.LLMStructuredColumnConfig(
111             name="product_info",
112             model_alias=model_alias,
113             prompt=(
114                 "Generate a realistic product description for a product in the {{ category }} "
115                 "category that costs {{ product_price }}.\n"
116                 "The name of the product MUST start with the letter {{ first_letter }}.\n"
117             ),
118             output_format=ProductInfo,
119         )
120     )
121 
122     config_builder.add_column(
123         dd.LLMTextColumnConfig(
124             name="question",
125             model_alias=model_alias,
126             prompt=("Ask a question about the following product:\n\n {{ product_info }}"),
127         )
128     )
129 
130     config_builder.add_column(
131         dd.LLMTextColumnConfig(
132             name="answer",
133             model_alias=model_alias,
134             prompt=(
135                 "{%- if is_hallucination == 0 -%}\n"
136                 "<product_info>\n"
137                 "{{ product_info }}\n"
138                 "</product_info>\n"
139                 "{%- endif -%}\n"
140                 "User Question: {{ question }}\n"
141                 "Directly and succinctly answer the user's question.\n"
142                 "{%- if is_hallucination == 1 -%}\n"
143                 "Make up whatever information you need to in order to answer the user's request.\n"
144                 "{%- endif -%}"
145             ),
146         )
147     )
148 
149     # Evaluate answer quality
150     config_builder.add_column(
151         dd.LLMJudgeColumnConfig(
152             name="llm_answer_metrics",
153             model_alias=model_alias,
154             prompt=(
155                 "<product_info>\n"
156                 "{{ product_info }}\n"
157                 "</product_info>\n"
158                 "User Question: {{question }}\n"
159                 "AI Assistant Answer: {{ answer }}\n"
160                 "Judge the AI assistant's response to the user's question about the product described in <product_info>."
161             ),
162             scores=answer_quality_scores,
163         )
164     )
165 
166     config_builder.add_column(
167         dd.ExpressionColumnConfig(
168             name="completeness_result",
169             expr="{{ llm_answer_metrics.Completeness.score }}",
170         )
171     )
172 
173     config_builder.add_column(
174         dd.ExpressionColumnConfig(
175             name="accuracy_result",
176             expr="{{ llm_answer_metrics.Accuracy.score }}",
177         )
178     )
179 
180     return config_builder
181 
182 
183 def create_dataset(
184     config_builder: dd.DataDesignerConfigBuilder,
185     num_records: int,
186     artifact_path: Path | str | None = None,
187 ) -> DatasetCreationResults:
188     data_designer = DataDesigner(artifact_path=artifact_path)
189     results = data_designer.create(config_builder, num_records=num_records)
190     return results
191 
192 
193 class ProductInfo(BaseModel):
194     product_name: str = Field(..., description="A realistic product name for the market.")
195     key_features: list[str] = Field(..., min_length=1, max_length=3, description="Key product features.")
196     description: str = Field(
197         ...,
198         description="A short, engaging description of what the product does, highlighting a unique but believable feature.",
199     )
200     price_usd: float = Field(..., description="The price of the product", ge=10, le=1000, decimal_places=2)
201 
202 
203 completeness_score = dd.Score(
204     name="Completeness",
205     description="Evaluation of AI assistant's thoroughness in addressing all aspects of the user's query.",
206     options={
207         "Complete": "The response thoroughly covers all key points requested in the question, providing sufficient detail to satisfy the user's information needs.",
208         "PartiallyComplete": "The response addresses the core question but omits certain important details or fails to elaborate on relevant aspects that were requested.",
209         "Incomplete": "The response significantly lacks necessary information, missing major components of what was asked and leaving the query largely unanswered.",
210     },
211 )
212 
213 accuracy_score = dd.Score(
214     name="Accuracy",
215     description="Evaluation of how factually correct the AI assistant's response is relative to the product information.",
216     options={
217         "Accurate": "The information provided aligns perfectly with the product specifications without introducing any misleading or incorrect details.",
218         "PartiallyAccurate": "While some information is correctly stated, the response contains minor factual errors or potentially misleading statements about the product.",
219         "Inaccurate": "The response presents significantly wrong information about the product, with claims that contradict the actual product details.",
220     },
221 )
222 
223 answer_quality_scores = [completeness_score, accuracy_score]
224 
225 
226 if __name__ == "__main__":
227     from argparse import ArgumentParser
228 
229     parser = ArgumentParser()
230     parser.add_argument("--model-alias", type=str, default="openai-text")
231     parser.add_argument("--num-records", type=int, default=5)
232     parser.add_argument("--artifact-path", type=str, default=None)
233     args = parser.parse_args()
234 
235     config_builder = build_config(model_alias=args.model_alias)
236     results = create_dataset(config_builder, num_records=args.num_records, artifact_path=args.artifact_path)
237 
238     print(f"Dataset saved to: {results.artifact_storage.final_dataset_path}")
239 
240     results.load_analysis().to_report()