NeMo Safe Synthesizer REST API Reference#
Complete reference for the NeMo Safe Synthesizer REST API with job management and result access.
Overview#
The NeMo Safe Synthesizer API provides comprehensive functionality for private synthetic data generation through REST endpoints. All operations are performed through the jobs API with detailed configuration options.
Client Initialization#
from nemo_microservices import NeMoMicroservices
client = NeMoMicroservices(base_url="http://localhost:8080")
# Access NeMo Safe Synthesizer through the beta API
safe_synthesizer_api = client.beta.safe_synthesizer
Job Management#
Create Jobs#
jobs.create(**job_request)#
Create a new NeMo Safe Synthesizer job with complete configuration.
Parameters:
name
(str): Job name for identificationproject
(str): Project identifier (default: “default”)spec
(dict): Job specification with data source and configuration
Returns: Job object with ID and status information
job_request = {
"name": "my-safe-synthesizer-job",
"project": "default",
"spec": {
"data_source": "hf://datasets/default/safe-synthesizer/my-dataset.csv",
"config": {
"enable_synthesis": True,
"enable_replace_pii": True,
"replace_pii": {
"globals": {"locales": ["en_US"]},
"steps": [{"rows": {"update": [{"entity": ["email", "phone_number"], "value": "column.entity | fake"}]}}]
},
"generation": {"num_records": 5000, "temperature": 0.8},
"privacy": {"privacy_hyperparams": {"dp": True, "epsilon": 6.0}},
"evaluation": {"mia_enabled": True, "aia_enabled": True}
}
}
}
job = client.beta.safe_synthesizer.jobs.create(**job_request)
Monitor Jobs#
jobs.retrieve(job_id) and jobs.get_status(job_id)#
Get current job status and complete job information.
Parameters:
job_id
(str): Job identifier
Returns: Job object with status and metadata
# Get complete job information (recommended)
job = client.beta.safe_synthesizer.jobs.retrieve(job.id)
print(f"Status: {job.status}")
print(f"Created: {job.created_at}")
# Or get status only
status = client.beta.safe_synthesizer.jobs.get_status(job.id)
print(f"Status: {status}")
Job Monitoring Pattern#
Monitor job completion with polling:
import time
def wait_for_job_completion(job_id, poll_interval=30):
"""Wait for job to complete with status polling."""
while True:
current_job = client.beta.safe_synthesizer.jobs.retrieve(job_id)
status = current_job.status
print(f"Job {job_id}: {status}")
if status in ("completed", "error", "cancelled"):
return current_job
time.sleep(poll_interval)
# Use monitoring function
final_status = wait_for_job_completion(job.id)
Result Access#
List Results#
jobs.results.list(job_id)#
List all available results for a completed job.
Parameters:
job_id
(str): Job identifier
Returns: List of result metadata objects
results = client.beta.safe_synthesizer.jobs.results.list(job.id)
for result in results:
print(f"Result ID: {result.id}")
print(f"Format: {result.format}")
print(f"Canonical: {result.canonical}")
print(f"Size: {result.size_bytes} bytes")
Download Results#
jobs.results.download(result_id, job_id)#
Download specific result artifacts.
Parameters:
result_id
(str): Result identifierjob_id
(str): Job identifier
Returns: File content as bytes or string
# Download synthetic data (CSV format)
for result in results:
if result.canonical and result.format == "csv":
synthetic_data_csv = client.beta.safe_synthesizer.jobs.results.download(
result.id,
job_id=job.id
)
# Save to file or process as needed
with open("synthetic_data.csv", "w") as f:
f.write(synthetic_data_csv)
print("Downloaded synthetic dataset")
break
# Download evaluation report (HTML format)
for result in results:
if result.format == "html" and "report" in result.id:
evaluation_report = client.beta.safe_synthesizer.jobs.results.download(
result.id,
job_id=job.id
)
with open("evaluation_report.html", "w") as f:
f.write(evaluation_report)
print("Downloaded evaluation report")
break
Configuration Reference#
Job Configuration Structure#
Complete job configuration includes all pipeline components:
complete_config = {
"replace_pii": {
"globals": {
"locales": ["en_US"],
"seed": 42,
"ner": {
"ner_threshold": 0.8,
"enable_gliner": True,
"entities": ["name", "email", "phone_number"]
}
},
"steps": [
{
"rows": {
"update": [
{"name": "full_name", "value": "fake.name()"},
{"name": "email", "value": "fake.email()"},
{"entity": ["phone_number"], "value": "fake.phone_number()"}
]
}
}
]
},
"training": {
"pretrained_model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"num_input_records_to_sample": "auto"
},
"generation": {
"num_records": 5000,
"temperature": 0.8,
"top_p": 0.95,
"use_structured_generation": True
},
"privacy": {
"privacy_hyperparams": {"dp": True, "epsilon": 6.0, "delta": "auto"}
},
"evaluation": {
"mia_enabled": True,
"aia_enabled": True,
"correlation_columns": 100
}
}
PII Configuration#
Configure PII detection and replacement:
pii_config = {
"globals": {
"locales": ["en_US", "en_GB"],
"seed": 42,
"ner": {
"ner_threshold": 0.8,
"enable_gliner": True,
"enable_regexps": True
}
},
"steps": [
{
"rows": {
"update": [
{"entity": ["name"], "value": "fake.name()"},
{"entity": ["email"], "value": "fake.email()"},
{"entity": ["phone_number"], "value": "fake.phone_number()"}
]
}
}
]
}
Training Configuration#
Configure model training and privacy parameters:
training_config = {
"pretrained_model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"num_input_records_to_sample": "auto",
"learning_rate": 5e-5,
"batch_size": 4,
"num_epochs": 2
}
Generation Configuration#
Configure synthetic data generation:
generation_config = {
"num_records": 5000,
"temperature": 0.8,
"top_p": 0.95,
"repetition_penalty": 1.1,
"use_structured_generation": True
}
Evaluation Configuration#
Configure quality and privacy evaluation:
evaluation_config = {
"mia_enabled": True,
"aia_enabled": True,
"correlation_columns": 100,
"sqs_report_columns": 250
}
Error Handling#
Standard Exception Handling#
Handle API errors during job operations:
from nemo_microservices import APIError, APIStatusError
try:
job = client.beta.safe_synthesizer.jobs.create(**job_request)
# Monitor job completion
final_status = wait_for_job_completion(job.id)
if final_status.status == "completed":
results = client.beta.safe_synthesizer.jobs.results.list(job.id)
print(f"Job completed with {len(results)} results")
else:
print(f"Job failed with status: {final_status.status}")
except APIError as e:
print(f"API error: {e.message}")
except APIStatusError as e:
print(f"HTTP error {e.status_code}: {e.message}")
except Exception as e:
print(f"Unexpected error: {e}")
Complete Job Example#
End-to-End NeMo Safe Synthesizer#
Note
Before you start, make sure that you have:
Stored CSVs locally
Uploaded them using the following steps:
export HF_ENDPOINT="http://localhost:3000/v1/hf"
huggingface-cli upload --repo-type dataset default/safe-synthesizer customer-data.csv
from nemo_microservices import NeMoMicroservices
import time
# Initialize client
client = NeMoMicroservices(base_url="http://localhost:8080")
# Define complete configuration
job_request = {
"name": "complete-safe-synthesizer-job",
"description": "End-to-end private synthetic data generation",
"project": "default",
"spec": {
"data_source": "hf://datasets/default/safe-synthesizer/customer-data.csv",
"config": {
"enable_synthesis": True,
"enable_replace_pii": True,
"replace_pii": {
"globals": {"locales": ["en_US"]},
"steps": [{"rows": {"update": [{"entity": ["email", "phone_number"], "value": "column.entity | fake"}]}}]
},
"training": {"pretrained_model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0"},
"privacy": {"privacy_hyperparams": {"dp": True, "epsilon": 6.0}},
"generation": {
"num_records": 5000,
"temperature": 0.8,
"use_structured_generation": True
},
"evaluation": {"mia_enabled": True, "aia_enabled": True}
}
}
}
# Create and monitor job
job = client.beta.safe_synthesizer.jobs.create(**job_request)
print(f"Created job: {job.id}")
# Wait for completion
final_status = wait_for_job_completion(job.id)
# Download results
if final_status.status == "completed":
results = client.beta.safe_synthesizer.jobs.results.list(job.id)
for result in results:
if result.canonical and result.format == "csv":
synthetic_data = client.beta.safe_synthesizer.jobs.results.download(
result.id, job_id=job.id
)
with open("synthetic_customer_data.csv", "w") as f:
f.write(synthetic_data)
print("Downloaded synthetic dataset")
elif result.format == "html" and "report" in result.id:
report = client.beta.safe_synthesizer.jobs.results.download(
result.id, job_id=job.id
)
with open("evaluation_report.html", "w") as f:
f.write(report)
print("Downloaded evaluation report")