| 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
| 2 | # SPDX-License-Identifier: Apache-2.0 |
| 3 | # /// script |
| 4 | # requires-python = ">=3.10" |
| 5 | # dependencies = [ |
| 6 | # "data-designer", |
| 7 | # "pydantic", |
| 8 | # ] |
| 9 | # /// |
| 10 | """Product Info Q&A Recipe |
| 11 | |
| 12 | Generate synthetic product Q&A pairs with controlled hallucination for training |
| 13 | and evaluating AI assistants. Each record includes a generated product (with name, |
| 14 | features, description, price), a user question, and an AI answer. Half of the |
| 15 | answers are generated without product context (hallucinated). Includes LLM judge |
| 16 | evaluation for completeness and accuracy. |
| 17 | |
| 18 | Prerequisites: |
| 19 | - OPENAI_API_KEY environment variable for OpenAI provider model aliases (default model alias is "openai-text"). |
| 20 | - NVIDIA_API_KEY environment variable for NVIDIA provider model aliases. |
| 21 | |
| 22 | Run: |
| 23 | # Basic usage (generates 5 records by default) |
| 24 | uv run product_info_qa.py |
| 25 | |
| 26 | # For help message and available options |
| 27 | uv run product_info_qa.py --help |
| 28 | """ |
| 29 | |
| 30 | import string |
| 31 | from pathlib import Path |
| 32 | |
| 33 | from pydantic import BaseModel, Field |
| 34 | |
| 35 | import data_designer.config as dd |
| 36 | from data_designer.interface import DataDesigner, DatasetCreationResults |
| 37 | |
| 38 | |
| 39 | def build_config(model_alias: str) -> dd.DataDesignerConfigBuilder: |
| 40 | config_builder = dd.DataDesignerConfigBuilder() |
| 41 | config_builder.add_column( |
| 42 | dd.SamplerColumnConfig( |
| 43 | name="category", |
| 44 | sampler_type=dd.SamplerType.CATEGORY, |
| 45 | params=dd.CategorySamplerParams( |
| 46 | values=[ |
| 47 | "Electronics", |
| 48 | "Clothing", |
| 49 | "Home Appliances", |
| 50 | "Groceries", |
| 51 | "Toiletries", |
| 52 | "Sports Equipment", |
| 53 | "Toys", |
| 54 | "Books", |
| 55 | "Pet Supplies", |
| 56 | "Tools & Home Improvement", |
| 57 | "Beauty", |
| 58 | "Health & Wellness", |
| 59 | "Outdoor Gear", |
| 60 | "Automotive", |
| 61 | "Jewelry", |
| 62 | "Watches", |
| 63 | "Office Supplies", |
| 64 | "Gifts", |
| 65 | "Arts & Crafts", |
| 66 | "Baby & Kids", |
| 67 | "Music", |
| 68 | "Video Games", |
| 69 | "Movies", |
| 70 | "Software", |
| 71 | "Tech Devices", |
| 72 | ] |
| 73 | ), |
| 74 | ) |
| 75 | ) |
| 76 | |
| 77 | config_builder.add_column( |
| 78 | dd.SamplerColumnConfig( |
| 79 | name="price_tens_of_dollars", |
| 80 | sampler_type=dd.SamplerType.UNIFORM, |
| 81 | params=dd.UniformSamplerParams(low=1, high=200), |
| 82 | ) |
| 83 | ) |
| 84 | |
| 85 | config_builder.add_column( |
| 86 | dd.ExpressionColumnConfig( |
| 87 | name="product_price", |
| 88 | expr="{{ (price_tens_of_dollars * 10) - 0.01 | round(2) }}", |
| 89 | dtype="float", |
| 90 | ) |
| 91 | ) |
| 92 | |
| 93 | config_builder.add_column( |
| 94 | dd.SamplerColumnConfig( |
| 95 | name="first_letter", |
| 96 | sampler_type=dd.SamplerType.CATEGORY, |
| 97 | params=dd.CategorySamplerParams(values=list(string.ascii_uppercase)), |
| 98 | ) |
| 99 | ) |
| 100 | |
| 101 | config_builder.add_column( |
| 102 | dd.SamplerColumnConfig( |
| 103 | name="is_hallucination", |
| 104 | sampler_type=dd.SamplerType.BERNOULLI, |
| 105 | params=dd.BernoulliSamplerParams(p=0.5), |
| 106 | ) |
| 107 | ) |
| 108 | |
| 109 | config_builder.add_column( |
| 110 | dd.LLMStructuredColumnConfig( |
| 111 | name="product_info", |
| 112 | model_alias=model_alias, |
| 113 | prompt=( |
| 114 | "Generate a realistic product description for a product in the {{ category }} " |
| 115 | "category that costs {{ product_price }}.\n" |
| 116 | "The name of the product MUST start with the letter {{ first_letter }}.\n" |
| 117 | ), |
| 118 | output_format=ProductInfo, |
| 119 | ) |
| 120 | ) |
| 121 | |
| 122 | config_builder.add_column( |
| 123 | dd.LLMTextColumnConfig( |
| 124 | name="question", |
| 125 | model_alias=model_alias, |
| 126 | prompt=("Ask a question about the following product:\n\n {{ product_info }}"), |
| 127 | ) |
| 128 | ) |
| 129 | |
| 130 | config_builder.add_column( |
| 131 | dd.LLMTextColumnConfig( |
| 132 | name="answer", |
| 133 | model_alias=model_alias, |
| 134 | prompt=( |
| 135 | "{%- if is_hallucination == 0 -%}\n" |
| 136 | "<product_info>\n" |
| 137 | "{{ product_info }}\n" |
| 138 | "</product_info>\n" |
| 139 | "{%- endif -%}\n" |
| 140 | "User Question: {{ question }}\n" |
| 141 | "Directly and succinctly answer the user's question.\n" |
| 142 | "{%- if is_hallucination == 1 -%}\n" |
| 143 | "Make up whatever information you need to in order to answer the user's request.\n" |
| 144 | "{%- endif -%}" |
| 145 | ), |
| 146 | ) |
| 147 | ) |
| 148 | |
| 149 | # Evaluate answer quality |
| 150 | config_builder.add_column( |
| 151 | dd.LLMJudgeColumnConfig( |
| 152 | name="llm_answer_metrics", |
| 153 | model_alias=model_alias, |
| 154 | prompt=( |
| 155 | "<product_info>\n" |
| 156 | "{{ product_info }}\n" |
| 157 | "</product_info>\n" |
| 158 | "User Question: {{question }}\n" |
| 159 | "AI Assistant Answer: {{ answer }}\n" |
| 160 | "Judge the AI assistant's response to the user's question about the product described in <product_info>." |
| 161 | ), |
| 162 | scores=answer_quality_scores, |
| 163 | ) |
| 164 | ) |
| 165 | |
| 166 | config_builder.add_column( |
| 167 | dd.ExpressionColumnConfig( |
| 168 | name="completeness_result", |
| 169 | expr="{{ llm_answer_metrics.Completeness.score }}", |
| 170 | ) |
| 171 | ) |
| 172 | |
| 173 | config_builder.add_column( |
| 174 | dd.ExpressionColumnConfig( |
| 175 | name="accuracy_result", |
| 176 | expr="{{ llm_answer_metrics.Accuracy.score }}", |
| 177 | ) |
| 178 | ) |
| 179 | |
| 180 | return config_builder |
| 181 | |
| 182 | |
| 183 | def create_dataset( |
| 184 | config_builder: dd.DataDesignerConfigBuilder, |
| 185 | num_records: int, |
| 186 | artifact_path: Path | str | None = None, |
| 187 | ) -> DatasetCreationResults: |
| 188 | data_designer = DataDesigner(artifact_path=artifact_path) |
| 189 | results = data_designer.create(config_builder, num_records=num_records) |
| 190 | return results |
| 191 | |
| 192 | |
| 193 | class ProductInfo(BaseModel): |
| 194 | product_name: str = Field(..., description="A realistic product name for the market.") |
| 195 | key_features: list[str] = Field(..., min_length=1, max_length=3, description="Key product features.") |
| 196 | description: str = Field( |
| 197 | ..., |
| 198 | description="A short, engaging description of what the product does, highlighting a unique but believable feature.", |
| 199 | ) |
| 200 | price_usd: float = Field(..., description="The price of the product", ge=10, le=1000, decimal_places=2) |
| 201 | |
| 202 | |
| 203 | completeness_score = dd.Score( |
| 204 | name="Completeness", |
| 205 | description="Evaluation of AI assistant's thoroughness in addressing all aspects of the user's query.", |
| 206 | options={ |
| 207 | "Complete": "The response thoroughly covers all key points requested in the question, providing sufficient detail to satisfy the user's information needs.", |
| 208 | "PartiallyComplete": "The response addresses the core question but omits certain important details or fails to elaborate on relevant aspects that were requested.", |
| 209 | "Incomplete": "The response significantly lacks necessary information, missing major components of what was asked and leaving the query largely unanswered.", |
| 210 | }, |
| 211 | ) |
| 212 | |
| 213 | accuracy_score = dd.Score( |
| 214 | name="Accuracy", |
| 215 | description="Evaluation of how factually correct the AI assistant's response is relative to the product information.", |
| 216 | options={ |
| 217 | "Accurate": "The information provided aligns perfectly with the product specifications without introducing any misleading or incorrect details.", |
| 218 | "PartiallyAccurate": "While some information is correctly stated, the response contains minor factual errors or potentially misleading statements about the product.", |
| 219 | "Inaccurate": "The response presents significantly wrong information about the product, with claims that contradict the actual product details.", |
| 220 | }, |
| 221 | ) |
| 222 | |
| 223 | answer_quality_scores = [completeness_score, accuracy_score] |
| 224 | |
| 225 | |
| 226 | if __name__ == "__main__": |
| 227 | from argparse import ArgumentParser |
| 228 | |
| 229 | parser = ArgumentParser() |
| 230 | parser.add_argument("--model-alias", type=str, default="openai-text") |
| 231 | parser.add_argument("--num-records", type=int, default=5) |
| 232 | parser.add_argument("--artifact-path", type=str, default=None) |
| 233 | args = parser.parse_args() |
| 234 | |
| 235 | config_builder = build_config(model_alias=args.model_alias) |
| 236 | results = create_dataset(config_builder, num_records=args.num_records, artifact_path=args.artifact_path) |
| 237 | |
| 238 | print(f"Dataset saved to: {results.artifact_storage.final_dataset_path}") |
| 239 | |
| 240 | results.load_analysis().to_report() |