| 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
| 2 | # SPDX-License-Identifier: Apache-2.0 |
| 3 | # /// script |
| 4 | # requires-python = ">=3.10" |
| 5 | # dependencies = [ |
| 6 | # "data-designer", |
| 7 | # "mcp", |
| 8 | # ] |
| 9 | # /// |
| 10 | """Basic MCP Recipe: Simple Tool Use Example |
| 11 | |
| 12 | This recipe demonstrates the minimal MCP tool-calling workflow with Data Designer: |
| 13 | |
| 14 | 1) Define a simple MCP server with basic tools (get_fact, add_numbers) |
| 15 | 2) Configure Data Designer to use the MCP tools |
| 16 | 3) Generate data that requires tool calls to complete |
| 17 | |
| 18 | Prerequisites: |
| 19 | - OPENAI_API_KEY environment variable for OpenAI provider model aliases. |
| 20 | - NVIDIA_API_KEY environment variable for NVIDIA provider model aliases (default model alias is "nvidia-text"). |
| 21 | |
| 22 | Run: |
| 23 | # Basic usage (generates 2 records by default) |
| 24 | uv run basic_mcp.py |
| 25 | |
| 26 | # For help message and available options |
| 27 | uv run basic_mcp.py --help |
| 28 | """ |
| 29 | |
| 30 | from __future__ import annotations |
| 31 | |
| 32 | import argparse |
| 33 | import json |
| 34 | import os |
| 35 | import sys |
| 36 | from pathlib import Path |
| 37 | |
| 38 | from mcp.server.fastmcp import FastMCP |
| 39 | |
| 40 | import data_designer.config as dd |
| 41 | from data_designer.interface import DataDesigner |
| 42 | |
| 43 | MCP_SERVER_NAME = "basic-tools" |
| 44 | |
| 45 | |
| 46 | # ============================================================================= |
| 47 | # MCP Server Definition |
| 48 | # ============================================================================= |
| 49 | |
| 50 | mcp_server = FastMCP(MCP_SERVER_NAME) |
| 51 | |
| 52 | # Simple knowledge base for the get_fact tool |
| 53 | FACTS = { |
| 54 | "python": "Python was created by Guido van Rossum and first released in 1991.", |
| 55 | "earth": "Earth is the third planet from the Sun and has one natural satellite, the Moon.", |
| 56 | "water": "Water (H2O) freezes at 0°C (32°F) and boils at 100°C (212°F) at sea level.", |
| 57 | "light": "The speed of light in a vacuum is approximately 299,792 kilometers per second.", |
| 58 | } |
| 59 | |
| 60 | |
| 61 | @mcp_server.tool() |
| 62 | def get_fact(topic: str) -> str: |
| 63 | """Get a fact about a topic from the knowledge base. |
| 64 | |
| 65 | Args: |
| 66 | topic: The topic to look up (e.g., "python", "earth", "water", "light") |
| 67 | |
| 68 | Returns: |
| 69 | A fact about the topic, or an error message if not found. |
| 70 | """ |
| 71 | topic_lower = topic.lower() |
| 72 | if topic_lower in FACTS: |
| 73 | return json.dumps({"topic": topic, "fact": FACTS[topic_lower]}) |
| 74 | return json.dumps({"error": f"No fact found for topic: {topic}", "available_topics": list(FACTS.keys())}) |
| 75 | |
| 76 | |
| 77 | @mcp_server.tool() |
| 78 | def add_numbers(a: float, b: float) -> str: |
| 79 | """Add two numbers together. |
| 80 | |
| 81 | Args: |
| 82 | a: First number |
| 83 | b: Second number |
| 84 | |
| 85 | Returns: |
| 86 | The sum of the two numbers. |
| 87 | """ |
| 88 | result = a + b |
| 89 | return json.dumps({"a": a, "b": b, "sum": result}) |
| 90 | |
| 91 | |
| 92 | @mcp_server.tool() |
| 93 | def list_topics() -> str: |
| 94 | """List all available topics in the knowledge base. |
| 95 | |
| 96 | Returns: |
| 97 | List of available topics. |
| 98 | """ |
| 99 | return json.dumps({"topics": list(FACTS.keys())}) |
| 100 | |
| 101 | |
| 102 | # ============================================================================= |
| 103 | # Data Designer Configuration |
| 104 | # ============================================================================= |
| 105 | |
| 106 | |
| 107 | def build_config(model_alias: str, provider_name: str) -> dd.DataDesignerConfigBuilder: |
| 108 | """Build the Data Designer configuration for basic tool use.""" |
| 109 | tool_config = dd.ToolConfig( |
| 110 | tool_alias="basic-tools", |
| 111 | providers=[provider_name], |
| 112 | allow_tools=["get_fact", "add_numbers", "list_topics"], |
| 113 | max_tool_call_turns=5, |
| 114 | timeout_sec=30.0, |
| 115 | ) |
| 116 | |
| 117 | config_builder = dd.DataDesignerConfigBuilder(tool_configs=[tool_config]) |
| 118 | |
| 119 | # Add a seed column with topics to look up |
| 120 | config_builder.add_column( |
| 121 | dd.SamplerColumnConfig( |
| 122 | name="topic", |
| 123 | sampler_type=dd.SamplerType.CATEGORY, |
| 124 | params=dd.CategorySamplerParams(values=["python", "earth", "water", "light"]), |
| 125 | ) |
| 126 | ) |
| 127 | |
| 128 | # Add a column that uses the get_fact tool |
| 129 | config_builder.add_column( |
| 130 | dd.LLMTextColumnConfig( |
| 131 | name="fact_response", |
| 132 | model_alias=model_alias, |
| 133 | prompt=( |
| 134 | "Use the get_fact tool to look up information about '{{ topic }}', " |
| 135 | "then provide a one-sentence summary of what you learned." |
| 136 | ), |
| 137 | system_prompt="You must call the get_fact tool before answering. Only use information from tool results.", |
| 138 | tool_alias="basic-tools", |
| 139 | with_trace=dd.TraceType.ALL_MESSAGES, |
| 140 | ) |
| 141 | ) |
| 142 | |
| 143 | # Add a column that uses the add_numbers tool |
| 144 | config_builder.add_column( |
| 145 | dd.SamplerColumnConfig( |
| 146 | name="num_a", |
| 147 | sampler_type=dd.SamplerType.UNIFORM, |
| 148 | params=dd.UniformSamplerParams(low=1, high=100), |
| 149 | ) |
| 150 | ) |
| 151 | |
| 152 | config_builder.add_column( |
| 153 | dd.SamplerColumnConfig( |
| 154 | name="num_b", |
| 155 | sampler_type=dd.SamplerType.UNIFORM, |
| 156 | params=dd.UniformSamplerParams(low=1, high=100), |
| 157 | ) |
| 158 | ) |
| 159 | |
| 160 | config_builder.add_column( |
| 161 | dd.LLMTextColumnConfig( |
| 162 | name="math_response", |
| 163 | model_alias=model_alias, |
| 164 | prompt=( |
| 165 | "Use the add_numbers tool to calculate {{ num_a }} + {{ num_b }}, " |
| 166 | "then report the result in a complete sentence." |
| 167 | ), |
| 168 | system_prompt="You must call the add_numbers tool to perform the calculation. Report the exact result.", |
| 169 | tool_alias="basic-tools", |
| 170 | with_trace=dd.TraceType.ALL_MESSAGES, |
| 171 | ) |
| 172 | ) |
| 173 | |
| 174 | return config_builder |
| 175 | |
| 176 | |
| 177 | # ============================================================================= |
| 178 | # Main Entry Points |
| 179 | # ============================================================================= |
| 180 | |
| 181 | |
| 182 | def serve() -> None: |
| 183 | """Run the MCP server (called when launched as subprocess by Data Designer).""" |
| 184 | mcp_server.run() |
| 185 | |
| 186 | |
| 187 | def parse_args() -> argparse.Namespace: |
| 188 | """Parse command line arguments.""" |
| 189 | parser = argparse.ArgumentParser(description="Basic MCP tool use example with Data Designer.") |
| 190 | subparsers = parser.add_subparsers(dest="command") |
| 191 | |
| 192 | # 'serve' subcommand for running the MCP server |
| 193 | subparsers.add_parser("serve", help="Run the MCP server (used by Data Designer)") |
| 194 | |
| 195 | # Default command arguments (demo mode) |
| 196 | parser.add_argument("--model-alias", type=str, default="nvidia-text", help="Model alias to use for generation") |
| 197 | parser.add_argument("--num-records", type=int, default=2, help="Number of records to generate") |
| 198 | # For compatibility with Makefile test-run-recipes target (ignored in demo mode) |
| 199 | parser.add_argument("--artifact-path", type=str, default=None, help=argparse.SUPPRESS) |
| 200 | |
| 201 | return parser.parse_args() |
| 202 | |
| 203 | |
| 204 | def main() -> None: |
| 205 | """Main entry point for the demo.""" |
| 206 | args = parse_args() |
| 207 | |
| 208 | # Handle 'serve' subcommand |
| 209 | if args.command == "serve": |
| 210 | serve() |
| 211 | return |
| 212 | |
| 213 | # Demo mode: run Data Designer with the MCP server |
| 214 | if os.environ.get("NVIDIA_API_KEY") is None and args.model_alias.startswith("nvidia"): |
| 215 | raise RuntimeError("NVIDIA_API_KEY must be set when using NVIDIA model aliases.") |
| 216 | |
| 217 | # Configure MCP provider to run via stdio transport (local subprocess) |
| 218 | mcp_provider = dd.LocalStdioMCPProvider( |
| 219 | name=MCP_SERVER_NAME, |
| 220 | command=sys.executable, |
| 221 | args=[str(Path(__file__).resolve()), "serve"], |
| 222 | ) |
| 223 | |
| 224 | config_builder = build_config( |
| 225 | model_alias=args.model_alias, |
| 226 | provider_name=MCP_SERVER_NAME, |
| 227 | ) |
| 228 | |
| 229 | data_designer = DataDesigner(mcp_providers=[mcp_provider]) |
| 230 | preview_results = data_designer.preview(config_builder, num_records=args.num_records) |
| 231 | |
| 232 | # Display results |
| 233 | print("\n" + "=" * 60) |
| 234 | print("GENERATED DATA") |
| 235 | print("=" * 60) |
| 236 | preview_results.display_sample_record() |
| 237 | |
| 238 | |
| 239 | if __name__ == "__main__": |
| 240 | main() |