NeMo Safe Synthesizer REST API Reference#

Complete reference for the NeMo Safe Synthesizer REST API with job management and result access.

Overview#

The NeMo Safe Synthesizer API provides comprehensive functionality for private synthetic data generation through REST endpoints. All operations are performed through the jobs API with detailed configuration options.

Client Initialization#

from nemo_microservices import NeMoMicroservices

client = NeMoMicroservices(base_url="http://localhost:8080")

# Access NeMo Safe Synthesizer through the beta API
safe_synthesizer_api = client.beta.safe_synthesizer

Job Management#

Create Jobs#

jobs.create(**job_request)#

Create a new NeMo Safe Synthesizer job with complete configuration.

Parameters:

  • name (str): Job name for identification

  • project (str): Project identifier (default: “default”)

  • spec (dict): Job specification with data source and configuration

Returns: Job object with ID and status information

job_request = {
    "name": "my-safe-synthesizer-job",
    "project": "default",
    "spec": {
        "data_source": "hf://datasets/default/safe-synthesizer/my-dataset.csv",
        "config": {
            "enable_synthesis": True,
            "enable_replace_pii": True,
            "replace_pii": {
                "globals": {"locales": ["en_US"]},
                "steps": [{"rows": {"update": [{"entity": ["email", "phone_number"], "value": "column.entity | fake"}]}}]
            },
            "generation": {"num_records": 5000, "temperature": 0.8},
            "privacy": {"privacy_hyperparams": {"dp": True, "epsilon": 6.0}},
            "evaluation": {"mia_enabled": True, "aia_enabled": True}
        }
    }
}

job = client.beta.safe_synthesizer.jobs.create(**job_request)

Monitor Jobs#

jobs.retrieve(job_id) and jobs.get_status(job_id)#

Get current job status and complete job information.

Parameters:

  • job_id (str): Job identifier

Returns: Job object with status and metadata

# Get complete job information (recommended)
job = client.beta.safe_synthesizer.jobs.retrieve(job.id)
print(f"Status: {job.status}")
print(f"Created: {job.created_at}")

# Or get status only
status = client.beta.safe_synthesizer.jobs.get_status(job.id)
print(f"Status: {status}")

Job Monitoring Pattern#

Monitor job completion with polling:

import time

def wait_for_job_completion(job_id, poll_interval=30):
    """Wait for job to complete with status polling."""
    while True:
        current_job = client.beta.safe_synthesizer.jobs.retrieve(job_id)
        status = current_job.status
        print(f"Job {job_id}: {status}")
        
        if status in ("completed", "error", "cancelled"):
            return current_job
            
        time.sleep(poll_interval)

# Use monitoring function
final_status = wait_for_job_completion(job.id)

Result Access#

List Results#

jobs.results.list(job_id)#

List all available results for a completed job.

Parameters:

  • job_id (str): Job identifier

Returns: List of result metadata objects

results = client.beta.safe_synthesizer.jobs.results.list(job.id)

for result in results:
    print(f"Result ID: {result.id}")
    print(f"Format: {result.format}")
    print(f"Canonical: {result.canonical}")
    print(f"Size: {result.size_bytes} bytes")

Download Results#

jobs.results.download(result_id, job_id)#

Download specific result artifacts.

Parameters:

  • result_id (str): Result identifier

  • job_id (str): Job identifier

Returns: File content as bytes or string

# Download synthetic data (CSV format)
for result in results:
    if result.canonical and result.format == "csv":
        synthetic_data_csv = client.beta.safe_synthesizer.jobs.results.download(
            result.id, 
            job_id=job.id
        )
        
        # Save to file or process as needed
        with open("synthetic_data.csv", "w") as f:
            f.write(synthetic_data_csv)
        
        print("Downloaded synthetic dataset")
        break

# Download evaluation report (HTML format)
for result in results:
    if result.format == "html" and "report" in result.id:
        evaluation_report = client.beta.safe_synthesizer.jobs.results.download(
            result.id,
            job_id=job.id
        )
        
        with open("evaluation_report.html", "w") as f:
            f.write(evaluation_report)
            
        print("Downloaded evaluation report")
        break

Configuration Reference#

Job Configuration Structure#

Complete job configuration includes all pipeline components:

complete_config = {
    "replace_pii": {
        "globals": {
            "locales": ["en_US"],
            "seed": 42,
            "ner": {
                "ner_threshold": 0.8,
                "enable_gliner": True,
                "entities": ["name", "email", "phone_number"]
            }
        },
        "steps": [
            {
                "rows": {
                    "update": [
                        {"name": "full_name", "value": "fake.name()"},
                        {"name": "email", "value": "fake.email()"},
                        {"entity": ["phone_number"], "value": "fake.phone_number()"}
                    ]
                }
            }
        ]
    },
    "training": {
        "pretrained_model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
        "num_input_records_to_sample": "auto"
    },
    "generation": {
        "num_records": 5000,
        "temperature": 0.8,
        "top_p": 0.95,
        "use_structured_generation": True
    },
    "privacy": {
        "privacy_hyperparams": {"dp": True, "epsilon": 6.0, "delta": "auto"}
    },
    "evaluation": {
        "mia_enabled": True,
        "aia_enabled": True,
        "correlation_columns": 100
    }
}

PII Configuration#

Configure PII detection and replacement:

pii_config = {
    "globals": {
        "locales": ["en_US", "en_GB"],
        "seed": 42,
        "ner": {
            "ner_threshold": 0.8,
            "enable_gliner": True,
            "enable_regexps": True
        }
    },
    "steps": [
        {
            "rows": {
                "update": [
                    {"entity": ["name"], "value": "fake.name()"},
                    {"entity": ["email"], "value": "fake.email()"},
                    {"entity": ["phone_number"], "value": "fake.phone_number()"}
                ]
            }
        }
    ]
}

Training Configuration#

Configure model training and privacy parameters:

training_config = {
    "pretrained_model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    "num_input_records_to_sample": "auto",
    "learning_rate": 5e-5,
    "batch_size": 4,
    "num_epochs": 2
}

Generation Configuration#

Configure synthetic data generation:

generation_config = {
    "num_records": 5000,
    "temperature": 0.8,
    "top_p": 0.95,
    "repetition_penalty": 1.1,
    "use_structured_generation": True
}

Evaluation Configuration#

Configure quality and privacy evaluation:

evaluation_config = {
    "mia_enabled": True,
    "aia_enabled": True,
    "correlation_columns": 100,
    "sqs_report_columns": 250
}

Error Handling#

Standard Exception Handling#

Handle API errors during job operations:

from nemo_microservices import APIError, APIStatusError

try:
    job = client.beta.safe_synthesizer.jobs.create(**job_request)
    
    # Monitor job completion
    final_status = wait_for_job_completion(job.id)
    
    if final_status.status == "completed":
        results = client.beta.safe_synthesizer.jobs.results.list(job.id)
        print(f"Job completed with {len(results)} results")
    else:
        print(f"Job failed with status: {final_status.status}")
        
except APIError as e:
    print(f"API error: {e.message}")
    
except APIStatusError as e:
    print(f"HTTP error {e.status_code}: {e.message}")
    
except Exception as e:
    print(f"Unexpected error: {e}")

Complete Job Example#

End-to-End NeMo Safe Synthesizer#

Note

Before you start, make sure that you have:

  • Stored CSVs locally

  • Uploaded them using the following steps:

export HF_ENDPOINT="http://localhost:3000/v1/hf"
huggingface-cli upload --repo-type dataset default/safe-synthesizer customer-data.csv
from nemo_microservices import NeMoMicroservices
import time

# Initialize client
client = NeMoMicroservices(base_url="http://localhost:8080")

# Define complete configuration
job_request = {
    "name": "complete-safe-synthesizer-job",
    "description": "End-to-end private synthetic data generation",
    "project": "default",
    "spec": {
        "data_source": "hf://datasets/default/safe-synthesizer/customer-data.csv",
        "config": {
            "enable_synthesis": True,
            "enable_replace_pii": True,
            "replace_pii": {
                "globals": {"locales": ["en_US"]},
                "steps": [{"rows": {"update": [{"entity": ["email", "phone_number"], "value": "column.entity | fake"}]}}]
            },
            "training": {"pretrained_model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0"},
            "privacy": {"privacy_hyperparams": {"dp": True, "epsilon": 6.0}},
            "generation": {
                "num_records": 5000,
                "temperature": 0.8,
                "use_structured_generation": True
            },
            "evaluation": {"mia_enabled": True, "aia_enabled": True}
        }
    }
}

# Create and monitor job
job = client.beta.safe_synthesizer.jobs.create(**job_request)
print(f"Created job: {job.id}")

# Wait for completion
final_status = wait_for_job_completion(job.id)

# Download results
if final_status.status == "completed":
    results = client.beta.safe_synthesizer.jobs.results.list(job.id)
    
    for result in results:
        if result.canonical and result.format == "csv":
            synthetic_data = client.beta.safe_synthesizer.jobs.results.download(
                result.id, job_id=job.id
            )
            with open("synthetic_customer_data.csv", "w") as f:
                f.write(synthetic_data)
            print("Downloaded synthetic dataset")
            
        elif result.format == "html" and "report" in result.id:
            report = client.beta.safe_synthesizer.jobs.results.download(
                result.id, job_id=job.id
            )
            with open("evaluation_report.html", "w") as f:
                f.write(report)
            print("Downloaded evaluation report")