API Server Extension Patterns | NVIDIA AI Cluster Runtime

This page collects forward-looking guidance and operating patterns for the aicrd API server that go beyond what is implemented in pkg/api / pkg/server today. The code samples are illustrative — they describe how a feature would be implemented or operated, not behavior that ships in the current binary.

For what the API server actually does today (architecture, request flow, implemented endpoints, current observability, security model), see API Server Architecture.

Status: Patterns and code in this document are aspirational unless explicitly noted otherwise. Treat them as a roadmap and reference, not as wired-up behavior. Do not link them from runbooks without verifying the underlying capability exists in the current build.

Future Enhancements

Near-Term Ideas

Authentication & Authorization
Rationale: Protect API from unauthorized access, enable usage tracking
Implementation: API key middleware with HMAC-SHA256 verification
Example:

1 func APIKeyMiddleware(validKeys map[string]string) func(http.Handler) http.Handler {
2     return func(next http.Handler) http.Handler {
3         return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
4             key := r.Header.Get("X-API-Key")
5             if _, ok := validKeys[key]; !ok {
6                 http.Error(w, "Invalid API key", http.StatusUnauthorized)
7                 return
8             }
9             next.ServeHTTP(w, r)
10         })
11     }
12 }

Reference: HTTP Authentication

CORS Support
Use Case: Enable browser-based clients (web dashboards)
Implementation: rs/cors middleware with configurable origins
Configuration:

1 c := cors.New(cors.Options{
2     AllowedOrigins:   []string{"https://dashboard.example.com"},
3     AllowedMethods:   []string{"GET", "POST", "OPTIONS"},
4     AllowedHeaders:   []string{"Content-Type", "X-API-Key"},
5     AllowCredentials: true,
6     MaxAge:           86400, // 24 hours
7 })
8 handler := c.Handler(mux)

Reference: CORS Specification

Response Compression
Benefit: Reduce bandwidth by 70-80% for JSON responses
Implementation: gziphandler middleware with quality threshold

1 import "github.com/NYTimes/gziphandler"
2 
3 handler := gziphandler.GzipHandler(mux)
4 // Only compresses responses > 1KB

Trade-off: CPU usage (+5-10%) vs bandwidth savings
Reference: gziphandler

Native TLS Support
Rationale: Eliminate need for reverse proxy in simple deployments
Implementation: http.ListenAndServeTLS with Let’s Encrypt integration

1 import "golang.org/x/crypto/acme/autocert"
2 
3 m := &autocert.Manager{
4     Prompt:      autocert.AcceptTOS,
5     Cache:       autocert.DirCache("/var/cache/aicr"),
6     HostPolicy:  autocert.HostWhitelist("api.example.com"),
7 }
8 
9 srv := &http.Server{
10     Addr:      ":https",
11     TLSConfig: m.TLSConfig(),
12     Handler:   handler,
13 }
14 srv.ListenAndServeTLS("", "")

Reference: autocert Package

API Versioning
Use Case: Support /v2 API with breaking changes while maintaining /v1
Pattern: URL-based versioning with version-specific handlers

1 v1 := http.NewServeMux()
2 v1.HandleFunc("/recipe", handleRecipeV1)
3 
4 v2 := http.NewServeMux()
5 v2.HandleFunc("/recipe", handleRecipeV2)
6 
7 mux := http.NewServeMux()
8 mux.Handle("/v1/", http.StripPrefix("/v1", v1))
9 mux.Handle("/v2/", http.StripPrefix("/v2", v2))

Reference: API Versioning Best Practices

Mid-Term Ideas

OpenTelemetry Integration
Use Case: Distributed tracing across services
Implementation: OTLP exporter with automatic instrumentation

1 import (
2     "go.opentelemetry.io/otel"
3     "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
4     "go.opentelemetry.io/otel/sdk/trace"
5 )
6 
7 func initTracer() (*trace.TracerProvider, error) {
8     exporter, err := otlptracehttp.New(context.Background(),
9         otlptracehttp.WithEndpoint("otel-collector:4318"),
10         otlptracehttp.WithInsecure(),
11     )
12     if err != nil {
13         return nil, err
14     }
15     
16     tp := trace.NewTracerProvider(
17         trace.WithBatcher(exporter),
18         trace.WithResource(/* service name */),
19     )
20     otel.SetTracerProvider(tp)
21     return tp, nil
22 }

Reference: OpenTelemetry Go

Recipe Caching
Benefit: 95%+ cache hit rate for repeated queries
Implementation: Redis with TTL, fallback to recipe builder

1 import "github.com/redis/go-redis/v9"
2 
3 func getRecipe(ctx context.Context, key string) (*recipe.Recipe, error) {
4     // Try cache first
5     cached, err := rdb.Get(ctx, key).Result()
6     if err == nil {
7         var r recipe.Recipe
8         if err := json.Unmarshal([]byte(cached), &r); err != nil {
9             return nil, errors.Wrap(errors.ErrCodeInternal, "unmarshal cached recipe", err)
10         }
11         return &r, nil
12     }
13 
14     // Cache miss - build recipe
15     r, err := builder.BuildRecipe(ctx, params)
16     if err != nil {
17         return nil, err
18     }
19 
20     // Cache with 1 hour TTL
21     data, err := json.Marshal(r)
22     if err != nil {
23         return nil, errors.Wrap(errors.ErrCodeInternal, "marshal recipe", err)
24     }
25     rdb.Set(ctx, key, data, time.Hour)
26 
27     return r, nil
28 }

Reference: go-redis

GraphQL API
Rationale: Enable clients to request only needed fields
Implementation: graphql-go with recipe schema

1 type Query {
2   recipe(
3     os: String!
4     osVersion: String
5     gpu: String!
6     service: String
7   ): Recipe
8 }
9 
10 type Recipe {
11   request: RequestInfo!
12   measurements: [Measurement!]!
13   context: RecipeContext
14 }

Trade-off: Added complexity vs flexible querying
Reference: GraphQL Go

Longer-Term Ideas

gRPC Support
Benefit: 5-10x better performance, smaller payloads
Implementation: Protobuf definition with streaming support

1 service RecipeService {
2   rpc GetRecipe(RecipeRequest) returns (Recipe);
3   rpc StreamRecipes(stream RecipeRequest) returns (stream Recipe);
4   rpc GetSnapshot(SnapshotRequest) returns (Snapshot);
5 }
6 
7 message RecipeRequest {
8   string os = 1;
9   string os_version = 2;
10   string gpu = 3;
11   string service = 4;
12 }

Deployment: Run HTTP/2 and gRPC on same port with cmux
Reference: gRPC Go

Multi-Tenancy
Use Case: SaaS deployment with per-customer isolation
Implementation: Tenant ID from API key, separate rate limits

1 type TenantRateLimiter struct {
2     limiters map[string]*rate.Limiter
3     mu       sync.RWMutex
4 }
5 
6 func (t *TenantRateLimiter) Allow(tenantID string) bool {
7     t.mu.RLock()
8     limiter, exists := t.limiters[tenantID]
9     t.mu.RUnlock()
10     
11     if !exists {
12         t.mu.Lock()
13         limiter = rate.NewLimiter(rate.Limit(100), 200) // Per-tenant
14         t.limiters[tenantID] = limiter
15         t.mu.Unlock()
16     }
17     
18     return limiter.Allow()
19 }

Database: Separate recipe stores per tenant

Admin API
Use Case: Runtime configuration updates without restart
Endpoints:
- POST /admin/config/rate-limit - Update rate limits
- POST /admin/config/log-level - Change log verbosity
- GET /admin/debug/pprof - CPU/memory profiling
- POST /admin/cache/flush - Clear recipe cache Security: Separate admin API key with IP allowlist

Feature Flags
Rationale: A/B testing, gradual rollouts, instant rollback
Implementation: LaunchDarkly or custom flag service

1 import "github.com/launchdarkly/go-server-sdk/v7"
2 
3 func handleRecipe(w http.ResponseWriter, r *http.Request) {
4     user := ldclient.NewUser(getUserID(r))
5     
6     // Check feature flag
7     if client.BoolVariation("use-optimized-builder", user, false) {
8         // Use new optimized recipe builder
9         recipe = optimizedBuilder.Build(params)
10     } else {
11         // Fall back to stable builder
12         recipe = stableBuilder.Build(params)
13     }
14 }

Reference: LaunchDarkly Go SDK

Production Deployment Patterns

Pattern 1: Kubernetes with Horizontal Pod Autoscaler

Use Case: Auto-scale API servers based on request rate

Deployment Manifest:

1 apiVersion: apps/v1
2 kind: Deployment
3 metadata:
4   name: aicrd
5   namespace: aicr
6 spec:
7   replicas: 3  # Initial replicas
8   selector:
9     matchLabels:
10       app: aicrd
11   template:
12     metadata:
13       labels:
14         app: aicrd
15       annotations:
16         prometheus.io/scrape: "true"
17         prometheus.io/port: "8080"
18         prometheus.io/path: "/metrics"
19     spec:
20       serviceAccountName: aicrd
21       securityContext:
22         runAsNonRoot: true
23         runAsUser: 1000
24         fsGroup: 1000
25       containers:
26       - name: api-server
27         image: ghcr.io/nvidia/aicrd:latest  # Or use specific tag like v0.8.12
28         ports:
29         - name: http
30           containerPort: 8080
31           protocol: TCP
32         - name: metrics
33           containerPort: 9090
34           protocol: TCP
35         env:
36         - name: PORT
37           value: "8080"
38         - name: AICR_LOG_LEVEL
39           value: "info"
40         # Criteria allowlists (optional - omit to allow all values)
41         - name: AICR_ALLOWED_ACCELERATORS
42           value: "h100,l40,a100"
43         - name: AICR_ALLOWED_SERVICES
44           value: "eks,gke,aks"
45         resources:
46           requests:
47             cpu: 100m
48             memory: 128Mi
49           limits:
50             cpu: 500m
51             memory: 512Mi
52         livenessProbe:
53           httpGet:
54             path: /health
55             port: http
56           initialDelaySeconds: 10
57           periodSeconds: 30
58           timeoutSeconds: 5
59           failureThreshold: 3
60         readinessProbe:
61           httpGet:
62             path: /ready
63             port: http
64           initialDelaySeconds: 5
65           periodSeconds: 10
66           timeoutSeconds: 3
67           failureThreshold: 2
68         securityContext:
69           allowPrivilegeEscalation: false
70           readOnlyRootFilesystem: true
71           capabilities:
72             drop:
73             - ALL
74         volumeMounts:
75         - name: recipes
76           mountPath: /etc/aicr/recipes
77           readOnly: true
78         - name: tmp
79           mountPath: /tmp
80       volumes:
81       - name: recipes
82         configMap:
83           name: aicr-recipes
84       - name: tmp
85         emptyDir: {}
86 ---
87 apiVersion: v1
88 kind: Service
89 metadata:
90   name: aicrd
91   namespace: aicr
92 spec:
93   type: ClusterIP
94   ports:
95   - name: http
96     port: 80
97     targetPort: http
98     protocol: TCP
99   selector:
100     app: aicrd
101 ---
102 apiVersion: autoscaling/v2
103 kind: HorizontalPodAutoscaler
104 metadata:
105   name: aicrd-hpa
106   namespace: aicr
107 spec:
108   scaleTargetRef:
109     apiVersion: apps/v1
110     kind: Deployment
111     name: aicrd
112   minReplicas: 3
113   maxReplicas: 20
114   metrics:
115   - type: Resource
116     resource:
117       name: cpu
118       target:
119         type: Utilization
120         averageUtilization: 70
121   - type: Pods
122     pods:
123       metric:
124         name: http_requests_per_second
125       target:
126         type: AverageValue
127         averageValue: "100"
128   behavior:
129     scaleUp:
130       stabilizationWindowSeconds: 60
131       policies:
132       - type: Percent
133         value: 100  # Double pods
134         periodSeconds: 60
135       - type: Pods
136         value: 4  # Add 4 pods
137         periodSeconds: 60
138       selectPolicy: Max
139     scaleDown:
140       stabilizationWindowSeconds: 300  # 5 min cooldown
141       policies:
142       - type: Percent
143         value: 50  # Remove 50% of pods
144         periodSeconds: 60

Ingress with TLS:

1 apiVersion: networking.k8s.io/v1
2 kind: Ingress
3 metadata:
4   name: aicrd
5   namespace: aicr
6   annotations:
7     cert-manager.io/cluster-issuer: "letsencrypt-prod"
8     nginx.ingress.kubernetes.io/rate-limit: "100"
9     nginx.ingress.kubernetes.io/limit-rps: "20"
10     nginx.ingress.kubernetes.io/ssl-redirect: "true"
11 spec:
12   ingressClassName: nginx
13   tls:
14   - hosts:
15     - api.aicr.example.com
16     secretName: aicr-api-tls
17   rules:
18   - host: api.aicr.example.com
19     http:
20       paths:
21       - path: /
22         pathType: Prefix
23         backend:
24           service:
25             name: aicrd
26             port:
27               name: http

Pattern 2: Service Mesh with mTLS

Use Case: Zero-trust security with automatic mTLS encryption

Istio VirtualService:

1 apiVersion: networking.istio.io/v1beta1
2 kind: VirtualService
3 metadata:
4   name: aicrd
5   namespace: aicr
6 spec:
7   hosts:
8   - aicrd.aicr.svc.cluster.local
9   - api.aicr.example.com
10   gateways:
11   - aicr-gateway
12   http:
13   - match:
14     - uri:
15         prefix: /v1/recipe
16     route:
17     - destination:
18         host: aicrd
19         port:
20           number: 80
21     timeout: 10s
22     retries:
23       attempts: 3
24       perTryTimeout: 3s
25       retryOn: 5xx,reset,connect-failure
26     headers:
27       response:
28         add:
29           X-Content-Type-Options: nosniff
30           X-Frame-Options: DENY
31           Strict-Transport-Security: max-age=31536000
32 ---
33 apiVersion: networking.istio.io/v1beta1
34 kind: DestinationRule
35 metadata:
36   name: aicrd
37   namespace: aicr
38 spec:
39   host: aicrd
40   trafficPolicy:
41     tls:
42       mode: ISTIO_MUTUAL  # mTLS between services
43     connectionPool:
44       tcp:
45         maxConnections: 100
46       http:
47         http1MaxPendingRequests: 50
48         http2MaxRequests: 100
49         maxRequestsPerConnection: 2
50     outlierDetection:
51       consecutiveErrors: 5
52       interval: 30s
53       baseEjectionTime: 30s
54       maxEjectionPercent: 50
55 ---
56 apiVersion: security.istio.io/v1beta1
57 kind: PeerAuthentication
58 metadata:
59   name: aicrd
60   namespace: aicr
61 spec:
62   selector:
63     matchLabels:
64       app: aicrd
65   mtls:
66     mode: STRICT  # Require mTLS
67 ---
68 apiVersion: security.istio.io/v1beta1
69 kind: AuthorizationPolicy
70 metadata:
71   name: aicrd
72   namespace: aicr
73 spec:
74   selector:
75     matchLabels:
76       app: aicrd
77   action: ALLOW
78   rules:
79   - from:
80     - source:
81         namespaces: ["aicr", "monitoring"]
82     to:
83     - operation:
84         methods: ["GET", "POST"]
85         paths: ["/v1/*", "/health", "/metrics"]

Pattern 3: Load Balancer with Health Checks

Use Case: Bare-metal deployment with HAProxy

HAProxy Configuration:

1 global
2     log /dev/log local0
3     maxconn 4096
4     user haproxy
5     group haproxy
6     daemon
7 
8 defaults
9     log     global
10     mode    http
11     option  httplog
12     option  dontlognull
13     timeout connect 5s
14     timeout client  30s
15     timeout server  30s
16     retries 3
17     option  redispatch
18 
19 frontend aicr_api_frontend
20     bind *:443 ssl crt /etc/ssl/certs/aicr-api.pem
21     bind *:80
22     redirect scheme https if !{ ssl_fc }
23     
24     # Rate limiting
25     stick-table type ip size 100k expire 30s store http_req_rate(10s)
26     http-request track-sc0 src
27     http-request deny deny_status 429 if { sc_http_req_rate(0) gt 100 }
28     
29     # Security headers
30     http-response set-header Strict-Transport-Security "max-age=31536000"
31     http-response set-header X-Content-Type-Options "nosniff"
32     
33     default_backend aicr_api_backend
34 
35 backend aicr_api_backend
36     balance roundrobin
37     option httpchk GET /health
38     http-check expect status 200
39     
40     server api1 10.0.1.10:8080 check inter 10s fall 3 rise 2 maxconn 100
41     server api2 10.0.1.11:8080 check inter 10s fall 3 rise 2 maxconn 100
42     server api3 10.0.1.12:8080 check inter 10s fall 3 rise 2 maxconn 100

Pattern 4: Blue-Green Deployment

Use Case: Zero-downtime updates with instant rollback

Kubernetes Service Switching:

$ #!/bin/bash
$ # Blue-green deployment script
$ 
$ set -euo pipefail
$ 
$ NAMESPACE=aicr
$ APP=aicrd
$ NEW_VERSION=$1
$ 
$ # Deploy green version
$ kubectl apply -f - <<EOF
$ apiVersion: apps/v1
$ kind: Deployment
$ metadata:
$   name: ${APP}-green
$   namespace: ${NAMESPACE}
$ spec:
$   replicas: 3
$   selector:
$     matchLabels:
$       app: ${APP}
$       version: green
$   template:
$     metadata:
$       labels:
$         app: ${APP}
$         version: green
$     spec:
$       containers:
$       - name: api-server
$         image: ghcr.io/nvidia/${APP}:${NEW_VERSION}
$         # ... same spec as blue ...
$ EOF
$ 
$ # Wait for green to be ready
$ kubectl rollout status deployment/${APP}-green -n ${NAMESPACE}
$ 
$ # Run smoke tests
$ GREEN_IP=$(kubectl get svc ${APP}-green -n ${NAMESPACE} -o jsonpath='{.spec.clusterIP}')
$ curl -f http://${GREEN_IP}/health || (echo "Health check failed" && exit 1)
$ curl -f "http://${GREEN_IP}/v1/recipe?os=ubuntu&gpu=h100" || (echo "Recipe test failed" && exit 1)
$ 
$ # Switch service to green
$ kubectl patch service ${APP} -n ${NAMESPACE} -p '{"spec":{"selector":{"version":"green"}}}'
$ 
$ echo "Switched to green (${NEW_VERSION})"
$ echo "Monitor for 10 minutes, then delete blue deployment"
$ echo "Rollback: kubectl patch service ${APP} -n ${NAMESPACE} -p '{\"spec\":{\"selector\":{\"version\":\"blue\"}}}'"
$ 
$ # Optional: Auto-delete blue after monitoring period
$ # sleep 600
$ # kubectl delete deployment ${APP}-blue -n ${NAMESPACE}

Reliability Patterns

Circuit Breaker

Use Case: Prevent cascading failures when recipe store is slow

Implementation:

1 import "github.com/sony/gobreaker"
2 
3 var (
4     recipeStoreBreaker *gobreaker.CircuitBreaker
5 )
6 
7 func init() {
8     settings := gobreaker.Settings{
9         Name:        "RecipeStore",
10         MaxRequests: 3,  // Half-open state allows 3 requests
11         Interval:    60 * time.Second,  // Reset counts every 60s
12         Timeout:     30 * time.Second,  // Stay open for 30s
13         ReadyToTrip: func(counts gobreaker.Counts) bool {
14             failureRatio := float64(counts.TotalFailures) / float64(counts.Requests)
15             return counts.Requests >= 10 && failureRatio >= 0.6
16         },
17         OnStateChange: func(name string, from gobreaker.State, to gobreaker.State) {
18             log.Info("Circuit breaker state changed",
19                 "name", name,
20                 "from", from,
21                 "to", to,
22             )
23         },
24     }
25     
26     recipeStoreBreaker = gobreaker.NewCircuitBreaker(settings)
27 }
28 
29 func handleRecipe(w http.ResponseWriter, r *http.Request) {
30     result, err := recipeStoreBreaker.Execute(func() (interface{}, error) {
31         return buildRecipe(r.Context(), params)
32     })
33     
34     if err != nil {
35         if errors.Is(err, gobreaker.ErrOpenState) {
36             http.Error(w, "Service temporarily unavailable", http.StatusServiceUnavailable)
37             return
38         }
39         http.Error(w, err.Error(), http.StatusInternalServerError)
40         return
41     }
42     
43     recipe := result.(*recipe.Recipe)
44     json.NewEncoder(w).Encode(recipe)
45 }

Reference: gobreaker

Bulkhead Pattern

Use Case: Isolate resources for different endpoints

Implementation:

1 import "golang.org/x/sync/semaphore"
2 
3 var (
4     // Separate semaphores for different endpoints
5     recipeSem   = semaphore.NewWeighted(100)  // 100 concurrent recipe requests
6     snapshotSem = semaphore.NewWeighted(10)   // 10 concurrent snapshot requests
7 )
8 
9 func handleRecipeWithBulkhead(w http.ResponseWriter, r *http.Request) {
10     // Acquire from recipe bulkhead
11     if !recipeSem.TryAcquire(1) {
12         http.Error(w, "Too many requests", http.StatusTooManyRequests)
13         return
14     }
15     defer recipeSem.Release(1)
16     
17     // Process request
18     handleRecipe(w, r)
19 }
20 
21 func handleSnapshotWithBulkhead(w http.ResponseWriter, r *http.Request) {
22     // Acquire from snapshot bulkhead (more expensive operation)
23     if !snapshotSem.TryAcquire(1) {
24         http.Error(w, "Too many requests", http.StatusTooManyRequests)
25         return
26     }
27     defer snapshotSem.Release(1)
28     
29     handleSnapshot(w, r)
30 }

Benefit: Recipe slowness doesn’t affect snapshot endpoint

Retry with Exponential Backoff

Use Case: Resilient calls to external APIs (recipe store, etc.)

Implementation:

1 import "github.com/cenkalti/backoff/v4"
2 
3 func fetchRecipeWithRetry(ctx context.Context, key string) (*recipe.Recipe, error) {
4     var r *recipe.Recipe
5     
6     operation := func() error {
7         var err error
8         r, err = recipeStore.Get(ctx, key)
9         
10         // Don't retry on 404
11         if errors.Is(err, ErrNotFound) {
12             return backoff.Permanent(err)
13         }
14         
15         return err
16     }
17     
18     // Exponential backoff: 100ms, 200ms, 400ms, 800ms, 1.6s, 3.2s
19     bo := backoff.NewExponentialBackOff()
20     bo.InitialInterval = 100 * time.Millisecond
21     bo.MaxInterval = 5 * time.Second
22     bo.MaxElapsedTime = 30 * time.Second
23     
24     err := backoff.Retry(operation, backoff.WithContext(bo, ctx))
25     return r, err
26 }

Reference: backoff

Graceful Degradation

Use Case: Serve stale/cached data when primary source fails

Implementation:

1 var (
2     recipeCacheTTL = 1 * time.Hour
3     recipeCache    = sync.Map{}
4 )
5 
6 type cachedRecipe struct {
7     recipe    *recipe.Recipe
8     timestamp time.Time
9 }
10 
11 func handleRecipeWithFallback(w http.ResponseWriter, r *http.Request) {
12     key := buildCacheKey(r)
13     
14     // Try primary source
15     recipe, err := buildRecipe(r.Context(), params)
16     if err == nil {
17         // Cache successful response
18         recipeCache.Store(key, cachedRecipe{
19             recipe:    recipe,
20             timestamp: time.Now(),
21         })
22         
23         json.NewEncoder(w).Encode(recipe)
24         return
25     }
26     
27     // Primary failed - try cache
28     if cached, ok := recipeCache.Load(key); ok {
29         cr := cached.(cachedRecipe)
30         age := time.Since(cr.timestamp)
31         
32         log.Warn("Serving stale recipe",
33             "key", key,
34             "age", age,
35             "error", err,
36         )
37         
38         w.Header().Set("X-Cache", "stale")
39         w.Header().Set("X-Cache-Age", age.String())
40         json.NewEncoder(w).Encode(cr.recipe)
41         return
42     }
43     
44     // No cache available
45     http.Error(w, "Service unavailable", http.StatusServiceUnavailable)
46 }

Performance Optimization

Connection Pooling

HTTP Client with Keep-Alive:

1 var httpClient = &http.Client{
2     Transport: &http.Transport{
3         MaxIdleConns:        100,
4         MaxIdleConnsPerHost: 10,
5         IdleConnTimeout:     90 * time.Second,
6         DisableCompression:  false,
7         ForceAttemptHTTP2:   true,
8     },
9     Timeout: 10 * time.Second,
10 }
11 
12 // Reuse client for all outbound requests
13 resp, err := httpClient.Get("https://recipe-store.example.com/recipes")

Response Caching

In-Memory Cache with TTL:

1 import "github.com/patrickmn/go-cache"
2 
3 var (
4     responseCache = cache.New(5*time.Minute, 10*time.Minute)
5 )
6 
7 func handleRecipeWithCache(w http.ResponseWriter, r *http.Request) {
8     key := buildCacheKey(r)
9     
10     // Check cache
11     if cached, found := responseCache.Get(key); found {
12         w.Header().Set("X-Cache", "hit")
13         w.Header().Set("Content-Type", "application/json")
14         w.Write(cached.([]byte))
15         return
16     }
17     
18     // Cache miss - build recipe
19     recipe, err := buildRecipe(r.Context(), params)
20     if err != nil {
21         http.Error(w, err.Error(), http.StatusInternalServerError)
22         return
23     }
24     
25     // Serialize and cache
26     data, err := json.Marshal(recipe)
27     if err != nil {
28         http.Error(w, err.Error(), http.StatusInternalServerError)
29         return
30     }
31     responseCache.Set(key, data, cache.DefaultExpiration)
32     
33     w.Header().Set("X-Cache", "miss")
34     w.Header().Set("Content-Type", "application/json")
35     w.Write(data)
36 }

Request Coalescing

Deduplicate Concurrent Identical Requests:

1 import "golang.org/x/sync/singleflight"
2 
3 var requestGroup singleflight.Group
4 
5 func handleRecipeWithCoalescing(w http.ResponseWriter, r *http.Request) {
6     key := buildCacheKey(r)
7     
8     // Deduplicate requests with same key
9     result, err, shared := requestGroup.Do(key, func() (interface{}, error) {
10         return buildRecipe(r.Context(), params)
11     })
12     
13     if shared {
14         w.Header().Set("X-Request-Coalesced", "true")
15     }
16     
17     if err != nil {
18         http.Error(w, err.Error(), http.StatusInternalServerError)
19         return
20     }
21     
22     json.NewEncoder(w).Encode(result)
23 }

Benefit: 10 concurrent identical requests = 1 recipe build

Memory Profiling

$ # Enable pprof endpoint
$ import _ "net/http/pprof"
$ 
$ go func() {
>     log.Println(http.ListenAndServe("localhost:6060", nil))
> }()
$ 
$ # Capture heap profile
$ curl http://localhost:6060/debug/pprof/heap > heap.prof
$ 
$ # Analyze
$ go tool pprof heap.prof
$ (pprof) top10
$ (pprof) list buildRecipe
$ 
$ # Check for memory leaks
$ # Compare two profiles taken 5 minutes apart
$ go tool pprof -base heap1.prof heap2.prof
$ (pprof) top10  # Shows allocations between profiles

Security Hardening

Rate Limiting Per IP

1 import "golang.org/x/time/rate"
2 
3 type ipRateLimiter struct {
4     limiters map[string]*rate.Limiter
5     mu       sync.RWMutex
6     rate     rate.Limit
7     burst    int
8 }
9 
10 func newIPRateLimiter(r rate.Limit, b int) *ipRateLimiter {
11     return &ipRateLimiter{
12         limiters: make(map[string]*rate.Limiter),
13         rate:     r,
14         burst:    b,
15     }
16 }
17 
18 func (i *ipRateLimiter) getLimiter(ip string) *rate.Limiter {
19     i.mu.RLock()
20     limiter, exists := i.limiters[ip]
21     i.mu.RUnlock()
22     
23     if !exists {
24         i.mu.Lock()
25         limiter = rate.NewLimiter(i.rate, i.burst)
26         i.limiters[ip] = limiter
27         
28         // Illustrative cleanup: clears the entire map past a threshold.
29         // Production implementations should use LRU or TTL eviction
30         // (e.g., github.com/hashicorp/golang-lru) so in-flight limiters
31         // are preserved and a request-burst cliff is avoided.
32         if len(i.limiters) > 10000 {
33             i.limiters = make(map[string]*rate.Limiter)
34         }
35         i.mu.Unlock()
36     }
37     
38     return limiter
39 }
40 
41 func (i *ipRateLimiter) middleware(next http.Handler) http.Handler {
42     return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
43         ip := getClientIP(r)
44         limiter := i.getLimiter(ip)
45         
46         if !limiter.Allow() {
47             http.Error(w, "Rate limit exceeded", http.StatusTooManyRequests)
48             return
49         }
50         
51         next.ServeHTTP(w, r)
52     })
53 }
54 
55 func getClientIP(r *http.Request) string {
56     // Check X-Forwarded-For header (behind proxy)
57     xff := r.Header.Get("X-Forwarded-For")
58     if xff != "" {
59         ips := strings.Split(xff, ",")
60         return strings.TrimSpace(ips[0])
61     }
62     
63     // Fall back to RemoteAddr
64     ip, _, err := net.SplitHostPort(r.RemoteAddr)
65     if err != nil {
66         // RemoteAddr might not include a port (e.g., some test setups);
67         // returning RemoteAddr keeps a unique key per peer and avoids
68         // collapsing every unparseable address into one rate-limit bucket.
69         return r.RemoteAddr
70     }
71     return ip
72 }

Input Validation

1 import "github.com/go-playground/validator/v10"
2 
3 var validate = validator.New()
4 
5 type RecipeRequest struct {
6     OS       string `validate:"required,oneof=ubuntu rhel cos"`
7     OSVersion string `validate:"omitempty,semver"`
8     GPU      string `validate:"required,oneof=h100 gb200 b200 a100 l40 rtx-pro-6000"`
9     Service  string `validate:"omitempty,oneof=eks gke aks oke kind lke"`
10 }
11 
12 func handleRecipe(w http.ResponseWriter, r *http.Request) {
13     req := RecipeRequest{
14         OS:       r.URL.Query().Get("os"),
15         OSVersion: r.URL.Query().Get("osv"),
16         GPU:      r.URL.Query().Get("gpu"),
17         Service:  r.URL.Query().Get("service"),
18     }
19     
20     if err := validate.Struct(req); err != nil {
21         validationErrors := err.(validator.ValidationErrors)
22         http.Error(w, validationErrors.Error(), http.StatusBadRequest)
23         return
24     }
25     
26     // Proceed with validated input
27 }

Security Headers Middleware

1 func securityHeadersMiddleware(next http.Handler) http.Handler {
2     return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
3         // HSTS
4         w.Header().Set("Strict-Transport-Security",
5             "max-age=31536000; includeSubDomains; preload")
6         
7         // Prevent MIME sniffing
8         w.Header().Set("X-Content-Type-Options", "nosniff")
9         
10         // Prevent clickjacking
11         w.Header().Set("X-Frame-Options", "DENY")
12         
13         // XSS protection
14         w.Header().Set("X-XSS-Protection", "1; mode=block")
15         
16         // CSP
17         w.Header().Set("Content-Security-Policy",
18             "default-src 'none'; script-src 'self'; connect-src 'self'; img-src 'self'; style-src 'self';")
19         
20         // Referrer policy
21         w.Header().Set("Referrer-Policy", "strict-origin-when-cross-origin")
22         
23         next.ServeHTTP(w, r)
24     })
25 }

Observability

Custom Metrics

1 import "github.com/prometheus/client_golang/prometheus"
2 
3 var (
4     recipeBuildDuration = prometheus.NewHistogramVec(
5         prometheus.HistogramOpts{
6             Name:    "aicr_recipe_build_duration_seconds",
7             Help:    "Time to build recipe",
8             Buckets: prometheus.ExponentialBuckets(0.001, 2, 12), // 1ms to 4s
9         },
10         []string{"os", "gpu", "service"},
11     )
12     
13     recipeCacheHits = prometheus.NewCounterVec(
14         prometheus.CounterOpts{
15             Name: "aicr_recipe_cache_hits_total",
16             Help: "Number of recipe cache hits",
17         },
18         []string{"cache_type"},
19     )
20     
21     activeConnections = prometheus.NewGauge(
22         prometheus.GaugeOpts{
23             Name: "aicr_active_connections",
24             Help: "Number of active HTTP connections",
25         },
26     )
27 )
28 
29 func init() {
30     prometheus.MustRegister(
31         recipeBuildDuration,
32         recipeCacheHits,
33         activeConnections,
34     )
35 }
36 
37 func handleRecipe(w http.ResponseWriter, r *http.Request) {
38     start := time.Now()
39     defer func() {
40         duration := time.Since(start).Seconds()
41         recipeBuildDuration.WithLabelValues(
42             params.OS,
43             params.GPU,
44             params.Service,
45         ).Observe(duration)
46     }()
47     
48     // Check cache
49     if cached, found := cache.Get(key); found {
50         recipeCacheHits.WithLabelValues("memory").Inc()
51         // ...
52     }
53     
54     // Build recipe
55     // ...
56 }

Structured Logging with Context

1 import "log/slog"
2 
3 func handleRecipe(w http.ResponseWriter, r *http.Request) {
4     // Create logger with request context
5     logger := slog.With(
6         "request_id", r.Header.Get("X-Request-ID"),
7         "remote_addr", r.RemoteAddr,
8         "user_agent", r.UserAgent(),
9     )
10     
11     logger.Info("Handling recipe request",
12         "os", params.OS,
13         "gpu", params.GPU,
14     )
15     
16     recipe, err := buildRecipe(r.Context(), params)
17     if err != nil {
18         logger.Error("Failed to build recipe",
19             "error", err,
20             "params", params,
21         )
22         http.Error(w, err.Error(), http.StatusInternalServerError)
23         return
24     }
25     
26     logger.Info("Recipe built successfully",
27         "measurement_count", len(recipe.Measurements),
28         "duration_ms", time.Since(start).Milliseconds(),
29     )
30     
31     json.NewEncoder(w).Encode(recipe)
32 }

Distributed Tracing

1 import (
2     "go.opentelemetry.io/otel"
3     "go.opentelemetry.io/otel/attribute"
4     "go.opentelemetry.io/otel/codes"
5     "go.opentelemetry.io/otel/trace"
6 )
7 
8 func handleRecipe(w http.ResponseWriter, r *http.Request) {
9     ctx := r.Context()
10     tracer := otel.Tracer("aicrd")
11     
12     ctx, span := tracer.Start(ctx, "handleRecipe",
13         trace.WithAttributes(
14             attribute.String("os", params.OS),
15             attribute.String("gpu", params.GPU),
16         ),
17     )
18     defer span.End()
19     
20     // Propagate context to child operations
21     recipe, err := buildRecipeWithTrace(ctx, params)
22     if err != nil {
23         span.RecordError(err)
24         span.SetStatus(codes.Error, err.Error())
25         http.Error(w, err.Error(), http.StatusInternalServerError)
26         return
27     }
28     
29     span.SetAttributes(
30         attribute.Int("measurement_count", len(recipe.Measurements)),
31     )
32     
33     json.NewEncoder(w).Encode(recipe)
34 }
35 
36 func buildRecipeWithTrace(ctx context.Context, params Params) (*recipe.Recipe, error) {
37     tracer := otel.Tracer("aicrd")
38     ctx, span := tracer.Start(ctx, "buildRecipe")
39     defer span.End()
40     
41     // Build recipe with traced context
42     return builder.Build(ctx, params)
43 }