{
  "openapi": "3.0.3",
  "info": {
    "title": "Wicklee Fleet API",
    "description": "Sovereign GPU fleet monitor for local AI inference. Monitor WES scores, thermal state, inference efficiency, and hardware health across Ollama and vLLM nodes.",
    "version": "1.0.0",
    "contact": { "name": "Wicklee", "url": "https://wicklee.dev" }
  },
  "servers": [
    { "url": "https://wicklee.dev", "description": "Production fleet API" },
    { "url": "http://localhost:7700", "description": "Local agent API (no auth)" }
  ],
  "security": [{ "ApiKeyAuth": [] }],
  "paths": {
    "/mcp": {
      "post": {
        "operationId": "mcpJsonRpc",
        "summary": "MCP JSON-RPC 2.0 endpoint",
        "description": "Model Context Protocol endpoint for AI agents. Supports tools (get_node_status, get_inference_state, get_active_models, get_observations — 18 agent-evaluated patterns; 2 additional cloud-evaluated patterns are queryable via the cloud Observations endpoint, get_metrics_history, get_model_fit) and resources (wicklee://node/metrics, wicklee://node/thermal). Localhost only, no auth required.",
        "servers": [{ "url": "http://localhost:7700" }],
        "requestBody": {
          "required": true,
          "content": { "application/json": { "schema": { "type": "object", "properties": { "jsonrpc": { "type": "string", "enum": ["2.0"] }, "method": { "type": "string" }, "params": { "type": "object" }, "id": {} }, "required": ["jsonrpc", "method", "id"] } } }
        },
        "responses": { "200": { "description": "JSON-RPC 2.0 response" } }
      }
    },
    "/.well-known/mcp.json": {
      "get": {
        "operationId": "mcpManifest",
        "summary": "MCP server discovery manifest",
        "servers": [{ "url": "http://localhost:7700" }],
        "responses": { "200": { "description": "MCP server manifest with capabilities, tools, and resources" } }
      }
    },
    "/metrics": {
      "get": {
        "operationId": "prometheusMetrics",
        "summary": "Prometheus scrape endpoint (Team+ tier)",
        "description": "Returns fleet metrics in Prometheus text format. 7 gauges per node: gpu_utilization, power_watts, tokens_per_second, wes_score, thermal_penalty, memory_pressure, ttft_ms.",
        "parameters": [{ "name": "X-API-Key", "in": "header", "required": true, "schema": { "type": "string" } }],
        "responses": { "200": { "description": "Prometheus text format", "content": { "text/plain": {} } }, "401": { "description": "Missing API key" }, "403": { "description": "Team tier required" } }
      }
    },
    "/api/v1/fleet": {
      "get": {
        "operationId": "getFleet",
        "summary": "Get all fleet nodes with full telemetry",
        "description": "Returns all nodes belonging to the authenticated user with their latest MetricsPayload.",
        "responses": {
          "200": {
            "description": "Fleet node list",
            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/FleetResponse" } } }
          },
          "401": { "description": "Missing or invalid API key" }
        }
      }
    },
    "/api/v1/fleet/wes": {
      "get": {
        "operationId": "getFleetWes",
        "summary": "Get WES scores for all nodes",
        "description": "Returns WES (Wicklee Efficiency Score) for each node, ranked by score.",
        "responses": {
          "200": {
            "description": "WES scores",
            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/WesResponse" } } }
          }
        }
      }
    },
    "/api/v1/nodes/{nodeId}": {
      "get": {
        "operationId": "getNode",
        "summary": "Get single node deep dive",
        "parameters": [{ "name": "nodeId", "in": "path", "required": true, "schema": { "type": "string" }, "example": "WK-XXXX" }],
        "responses": {
          "200": {
            "description": "Node with full metrics",
            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/NodeResponse" } } }
          },
          "404": { "description": "Node not found" }
        }
      }
    },
    "/api/v1/route/best": {
      "get": {
        "operationId": "getBestRoute",
        "summary": "Get routing recommendation",
        "description": "Returns two candidates: latency (highest tok/s) and efficiency (highest WES). Default strategy is efficiency.",
        "responses": {
          "200": {
            "description": "Routing recommendation",
            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RouteResponse" } } }
          }
        }
      }
    },
    "/api/v1/insights/latest": {
      "get": {
        "operationId": "getInsights",
        "summary": "Get fleet intelligence snapshot (Team+)",
        "description": "Fleet summary with avg WES, total tok/s, and findings array (thermal stress, WES below baseline, etc.).",
        "responses": {
          "200": {
            "description": "Intelligence snapshot",
            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/InsightsResponse" } } }
          },
          "402": { "description": "Requires Team tier or above" }
        }
      }
    },
    "/api/v1/keys": {
      "post": {
        "operationId": "createApiKey",
        "summary": "Create a new API key",
        "description": "Returns the raw key once (prefix wk_live_). Stored as SHA-256 hash.",
        "responses": {
          "200": { "description": "API key created" }
        }
      },
      "get": {
        "operationId": "listApiKeys",
        "summary": "List all API keys",
        "responses": {
          "200": { "description": "API key list (hashes never exposed)" }
        }
      }
    },
    "/api/v1/keys/{keyId}": {
      "delete": {
        "operationId": "revokeApiKey",
        "summary": "Revoke an API key",
        "parameters": [{ "name": "keyId", "in": "path", "required": true, "schema": { "type": "string" } }],
        "responses": {
          "200": { "description": "Key revoked" }
        }
      }
    },
    "/api/sla": {
      "get": {
        "operationId": "getInferenceSla",
        "summary": "Inference SLA Monitor — p50/p95/p99 percentiles",
        "description": "Returns p50/p95/p99/max for TTFT, end-to-end latency, and TPOT computed via DuckDB quantile_cont() over per-request inference_traces. Compliance percentage against a configurable TTFT target, the 20 most-recent violations, and per-model breakdown. Requires the Ollama proxy (Settings → Proxy) to capture per-request data. Window: 1–1440 minutes (24h hard ceiling — that's the trace retention).",
        "servers": [{ "url": "http://localhost:7700" }],
        "parameters": [
          { "name": "window_min",      "in": "query", "required": false, "schema": { "type": "integer", "default": 60,  "minimum": 1, "maximum": 1440 } },
          { "name": "target_ttft_ms",  "in": "query", "required": false, "schema": { "type": "integer", "default": 500, "minimum": 1 } },
          { "name": "model",           "in": "query", "required": false, "schema": { "type": "string" }, "description": "Optional model filter — narrows percentiles to a single model name." }
        ],
        "responses": {
          "200": { "description": "SLA summary", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/SlaSummary" } } } }
        }
      }
    },
    "/api/v1/thermal-budget": {
      "get": {
        "operationId": "getThermalBudget",
        "summary": "Thermal Budget Calculator (Pro+) — predicts when pushing harder backfires",
        "description": "Walks the 7-day metrics_5min Postgres rollup, identifies sustained Normal-thermal blocks (≥30 min) and Normal→Fair transitions, computes the sustainable tok/s rate, the load level that pushes the node out of Normal, the average time-to-Fair, and the resulting penalized rate (push ÷ 1.25). Generates a plain-English advice string comparing 1-hour token output of stay-sustainable vs push-then-drop-to-penalized.",
        "parameters": [
          { "name": "node_id", "in": "query", "required": true, "schema": { "type": "string" }, "example": "WK-XXXX" }
        ],
        "responses": {
          "200": { "description": "Thermal budget summary", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ThermalBudget" } } } },
          "400": { "description": "node_id query param required" },
          "401": { "description": "Missing or invalid auth" },
          "403": { "description": "Pro tier or higher required" },
          "404": { "description": "Node not found in user's fleet" }
        }
      }
    },
    "/api/runtime-config": {
      "get": {
        "operationId": "getRuntimeConfig",
        "summary": "Runtime Config Surface (v0.9.0) — cached launch-time config for a model",
        "description": "Returns the cached RuntimeConfig for the named model. Available across Ollama (parsed from /api/show on model change), vLLM (5-min poller, tries /v1/server_info then falls back to ps aux), and llama.cpp (5-min poller, tries /props then falls back to ps aux). Reads in-memory cache only — never blocks on I/O and works regardless of DuckDB store health. Templates and system prompts stay local — the cloud telemetry push does NOT carry these fields.",
        "servers": [{ "url": "http://localhost:7700" }],
        "parameters": [
          { "name": "model", "in": "query", "required": true, "schema": { "type": "string" }, "example": "llama3.1:8b" }
        ],
        "responses": {
          "200": { "description": "Cached RuntimeConfig", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RuntimeConfig" } } } },
          "400": { "description": "Missing ?model= query param" },
          "404": { "description": "No cached config for this model yet" }
        }
      }
    },
    "/api/v1/fleet/model-comparison": {
      "get": {
        "operationId": "getFleetModelComparison",
        "summary": "Fleet-wide per-model rollup (WES, tok/s, watts, TTFT, cost)",
        "description": "Per-model aggregated stats across every node owned by the authenticated tenant. Reads metrics_5min for the long window plus a metrics_raw side query for TTFT (last 24h only). Empty array until new telemetry is ingested under the v0.9.x schema (ollama_active_model column).",
        "parameters": [
          { "name": "hours", "in": "query", "required": false, "schema": { "type": "integer", "default": 168, "minimum": 1, "maximum": 720 } }
        ],
        "responses": {
          "200": { "description": "Per-model rollup", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/FleetModelComparison" } } } },
          "401": { "description": "Missing or invalid auth" }
        }
      }
    },
    "/api/v1/fleet/model-switches": {
      "get": {
        "operationId": "getFleetModelSwitches",
        "summary": "Cross-node model swap events (LAG over metrics_raw)",
        "description": "Returns model transitions detected via a Postgres LAG window function over metrics_raw partitioned by node_id. Capped at 200 rows.",
        "parameters": [
          { "name": "hours", "in": "query", "required": false, "schema": { "type": "integer", "default": 24, "minimum": 1, "maximum": 168 } }
        ],
        "responses": {
          "200": { "description": "Swap events", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/FleetModelSwitches" } } } },
          "401": { "description": "Missing or invalid auth" }
        }
      }
    },
    "/api/v1/fleet/cost-by-model": {
      "get": {
        "operationId": "getFleetCostByModel",
        "summary": "Fleet-wide per-model power cost",
        "description": "Cost = avg_watts x hours_active x $0.16 / 1000. Uses metrics_raw at 30s granularity.",
        "parameters": [
          { "name": "hours", "in": "query", "required": false, "schema": { "type": "integer", "default": 24, "minimum": 1, "maximum": 168 } }
        ],
        "responses": {
          "200": { "description": "Per-model cost rollup", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/FleetCostByModel" } } } },
          "401": { "description": "Missing or invalid auth" }
        }
      }
    },
    "/api/v1/webhooks": {
      "post": {
        "operationId": "createWebhook",
        "summary": "Threshold Webhooks (Pro+) — create subscription",
        "description": "Register a webhook for state-transition push notifications (thermal_state_changed, inference_state_changed, wes_below, wes_above). Returns the HMAC-SHA256 secret ONCE — store it. Receivers verify the X-Wicklee-Signature header to confirm authenticity.",
        "requestBody": {
          "required": true,
          "content": { "application/json": { "schema": { "$ref": "#/components/schemas/CreateWebhookBody" } } }
        },
        "responses": {
          "200": { "description": "Subscription created (secret returned ONCE)", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/WebhookSubscription" } } } },
          "400": { "description": "Invalid url, event_type, or missing required threshold" },
          "401": { "description": "Missing or invalid auth" },
          "403": { "description": "Pro tier or higher required" },
          "404": { "description": "Specified node_id not found in user's fleet" }
        }
      },
      "get": {
        "operationId": "listWebhooks",
        "summary": "List your webhook subscriptions (no secrets)",
        "responses": {
          "200": {
            "description": "Subscription list",
            "content": { "application/json": { "schema": { "type": "object", "properties": {
              "subscriptions": { "type": "array", "items": { "$ref": "#/components/schemas/WebhookSubscription" } }
            } } } }
          }
        }
      }
    },
    "/api/v1/webhooks/{id}": {
      "delete": {
        "operationId": "deleteWebhook",
        "summary": "Remove a webhook subscription",
        "parameters": [{ "name": "id", "in": "path", "required": true, "schema": { "type": "string" } }],
        "responses": {
          "200": { "description": "Subscription deleted" },
          "404": { "description": "Subscription not found" }
        }
      }
    },
    "/api/v1/webhooks/{id}/test": {
      "post": {
        "operationId": "testWebhook",
        "summary": "Fire a synthetic test payload to a subscription's URL",
        "description": "Sends a synthetic webhook payload to the subscription's configured URL with a valid HMAC signature. Use to verify your receiver before relying on real condition triggers.",
        "parameters": [{ "name": "id", "in": "path", "required": true, "schema": { "type": "string" } }],
        "responses": {
          "200": { "description": "Test delivered", "content": { "application/json": { "schema": { "type": "object", "properties": { "delivered": { "type": "boolean" }, "status": { "type": "integer", "description": "Receiver's HTTP response status" } } } } } },
          "404": { "description": "Subscription not found" },
          "502": { "description": "Receiver unreachable or returned non-2xx" }
        }
      }
    }
  },
  "components": {
    "securitySchemes": {
      "ApiKeyAuth": {
        "type": "apiKey",
        "in": "header",
        "name": "X-API-Key"
      }
    },
    "schemas": {
      "FleetResponse": {
        "type": "object",
        "properties": {
          "nodes": { "type": "array", "items": { "$ref": "#/components/schemas/FleetNode" } }
        }
      },
      "FleetNode": {
        "type": "object",
        "properties": {
          "node_id": { "type": "string", "example": "WK-XXXX" },
          "hostname": { "type": "string", "example": "macmini.local" },
          "online": { "type": "boolean" },
          "last_seen_ms": { "type": "integer", "format": "int64" },
          "metrics": { "$ref": "#/components/schemas/MetricsPayload" }
        }
      },
      "WesResponse": {
        "type": "object",
        "properties": {
          "nodes": { "type": "array", "items": {
            "type": "object",
            "properties": {
              "node_id": { "type": "string" },
              "online": { "type": "boolean" },
              "wes": { "type": "number", "format": "float" }
            }
          }}
        }
      },
      "NodeResponse": {
        "type": "object",
        "properties": {
          "node_id": { "type": "string" },
          "hostname": { "type": "string" },
          "online": { "type": "boolean" },
          "last_seen_ms": { "type": "integer", "format": "int64" },
          "metrics": { "$ref": "#/components/schemas/MetricsPayload" }
        }
      },
      "RouteResponse": {
        "type": "object",
        "properties": {
          "latency": { "$ref": "#/components/schemas/RouteCandidate" },
          "efficiency": { "$ref": "#/components/schemas/RouteCandidate" },
          "default": { "type": "string", "enum": ["latency", "efficiency"] }
        }
      },
      "RouteCandidate": {
        "type": "object",
        "properties": {
          "node": { "type": "string" },
          "tok_s": { "type": "number", "format": "float" },
          "wes": { "type": "number", "format": "float" },
          "reason": { "type": "string" }
        }
      },
      "InsightsResponse": {
        "type": "object",
        "properties": {
          "generated_at_ms": { "type": "integer", "format": "int64" },
          "fleet": {
            "type": "object",
            "properties": {
              "online_count": { "type": "integer" },
              "total_count": { "type": "integer" },
              "avg_wes": { "type": "number", "format": "float" },
              "fleet_tok_s": { "type": "number", "format": "float" }
            }
          },
          "findings": { "type": "array", "items": { "$ref": "#/components/schemas/Finding" } }
        }
      },
      "Finding": {
        "type": "object",
        "properties": {
          "node_id": { "type": "string" },
          "hostname": { "type": "string" },
          "severity": { "type": "string", "enum": ["low", "medium", "high"] },
          "pattern": { "type": "string" },
          "title": { "type": "string" },
          "detail": { "type": "string" },
          "value": { "type": "number", "format": "float" },
          "unit": { "type": "string" }
        }
      },
      "SlaSummary": {
        "type": "object",
        "properties": {
          "window_minutes":       { "type": "integer" },
          "request_count":        { "type": "integer" },
          "error_count":          { "type": "integer" },
          "error_rate_pct":       { "type": "number", "format": "float" },
          "ttft":                 { "$ref": "#/components/schemas/SlaPercentiles" },
          "e2e":                  { "$ref": "#/components/schemas/SlaPercentiles" },
          "tpot":                 { "$ref": "#/components/schemas/SlaPercentiles" },
          "sla": {
            "type": "object",
            "properties": {
              "target_ttft_ms":   { "type": "integer" },
              "compliance_pct":   { "type": "number", "format": "float" },
              "violations_count": { "type": "integer" },
              "violations":       { "type": "array", "items": { "$ref": "#/components/schemas/SlaViolation" } }
            }
          },
          "by_model": { "type": "array", "items": {
            "type": "object",
            "properties": {
              "model":        { "type": "string" },
              "count":        { "type": "integer" },
              "p95_ttft_ms":  { "type": "number", "format": "float" },
              "p95_e2e_ms":   { "type": "number", "format": "float" }
            }
          } }
        }
      },
      "SlaPercentiles": {
        "type": "object",
        "properties": {
          "p50": { "type": "number", "format": "float" },
          "p95": { "type": "number", "format": "float" },
          "p99": { "type": "number", "format": "float" },
          "max": { "type": "number", "format": "float" }
        }
      },
      "SlaViolation": {
        "type": "object",
        "properties": {
          "ts_ms":      { "type": "integer", "format": "int64" },
          "model":      { "type": "string" },
          "ttft_ms":    { "type": "integer" },
          "latency_ms": { "type": "integer" }
        }
      },
      "ThermalBudget": {
        "type": "object",
        "properties": {
          "node_id":              { "type": "string" },
          "samples_analyzed":     { "type": "integer" },
          "transitions_detected": { "type": "integer" },
          "confidence":           { "type": "string", "enum": ["insufficient", "low", "medium", "high"] },
          "sustainable_tps":      { "type": "number", "format": "float", "description": "Max tok/s sustained during any Normal-thermal block ≥30 min" },
          "sustainable_watts":    { "type": "number", "format": "float" },
          "push_threshold_tps":   { "type": "number", "format": "float", "nullable": true, "description": "Median tok/s in 10 min before any Normal→Fair transition" },
          "push_threshold_watts": { "type": "number", "format": "float", "nullable": true },
          "time_to_fair_min":     { "type": "number", "format": "float", "nullable": true, "description": "Avg duration of Normal blocks before Fair transition (minutes)" },
          "fair_penalized_tps":   { "type": "number", "format": "float", "nullable": true, "description": "push_threshold_tps ÷ 1.25 — effective rate once Fair triggers" },
          "advice":               { "type": "string", "description": "Plain-English 1-hour-output comparison: stay-sustainable vs push-then-drop" }
        }
      },
      "CreateWebhookBody": {
        "type": "object",
        "required": ["url", "event_type"],
        "properties": {
          "url":        { "type": "string", "format": "uri", "example": "https://your-server.example.com/wicklee-hook" },
          "event_type": { "type": "string", "enum": ["thermal_state_changed", "inference_state_changed", "wes_below", "wes_above"] },
          "node_id":    { "type": "string", "nullable": true, "description": "Omit to fire for any node owned by the user" },
          "threshold":  { "type": "number", "format": "float", "nullable": true, "description": "Required for wes_below / wes_above" },
          "cooldown_s": { "type": "integer", "minimum": 10, "default": 60, "description": "Min seconds between fires per (subscription, node) pair" }
        }
      },
      "WebhookSubscription": {
        "type": "object",
        "properties": {
          "id":            { "type": "string" },
          "url":           { "type": "string" },
          "event_type":    { "type": "string" },
          "node_id":       { "type": "string", "nullable": true },
          "threshold":     { "type": "number", "format": "float", "nullable": true },
          "cooldown_s":    { "type": "integer" },
          "enabled":       { "type": "boolean" },
          "last_fired_ms": { "type": "integer", "format": "int64", "nullable": true },
          "secret":        { "type": "string", "description": "HMAC-SHA256 secret returned ONLY at creation. Used to verify X-Wicklee-Signature on incoming webhook calls." }
        }
      },
      "RuntimeConfig": {
        "type": "object",
        "properties": {
          "model":           { "type": "string", "example": "llama3.1:8b" },
          "runtime":         { "type": "string", "enum": ["ollama", "vllm", "llamacpp"] },
          "captured_at_ms":  { "type": "integer", "format": "int64" },
          "context_length":  { "type": "integer", "format": "int64", "nullable": true },
          "n_gpu_layers":    { "type": "integer", "nullable": true },
          "quantization":    { "type": "string", "nullable": true, "example": "Q4_K_M" },
          "parameter_count": { "type": "integer", "format": "int64", "nullable": true },
          "template":        { "type": "string", "nullable": true, "description": "Ollama only. Stays local — never pushed to cloud." },
          "system_prompt":   { "type": "string", "nullable": true, "description": "Ollama only. Stays local — never pushed to cloud." },
          "process_args":    { "type": "array", "items": { "type": "string" }, "nullable": true, "description": "vLLM / llama.cpp — full command-line args." },
          "raw":             { "type": "object", "nullable": true, "description": "Full raw runtime-introspection response." }
        }
      },
      "FleetModelComparison": {
        "type": "object",
        "properties": {
          "models": { "type": "array", "items": {
            "type": "object",
            "properties": {
              "model":         { "type": "string" },
              "hours_active":  { "type": "number", "format": "double" },
              "avg_tok_s":     { "type": "number", "format": "float", "nullable": true },
              "avg_watts":     { "type": "number", "format": "float", "nullable": true },
              "wes":           { "type": "number", "format": "float", "nullable": true },
              "avg_ttft_ms":   { "type": "number", "format": "float", "nullable": true },
              "cost_per_hour": { "type": "number", "format": "double", "nullable": true },
              "total_cost":    { "type": "number", "format": "double", "nullable": true },
              "sample_count":  { "type": "integer", "format": "int64" }
            }
          }}
        }
      },
      "FleetModelSwitches": {
        "type": "object",
        "properties": {
          "swaps": { "type": "array", "items": {
            "type": "object",
            "properties": {
              "ts_ms":      { "type": "integer", "format": "int64" },
              "node_id":    { "type": "string" },
              "from_model": { "type": "string", "nullable": true },
              "to_model":   { "type": "string" },
              "gap_ms":     { "type": "number", "format": "double" }
            }
          }},
          "total_swaps":       { "type": "integer", "format": "int64" },
          "total_gap_ms":      { "type": "number", "format": "double" },
          "total_gap_minutes": { "type": "number", "format": "double" }
        }
      },
      "FleetCostByModel": {
        "type": "object",
        "properties": {
          "models": { "type": "array", "items": {
            "type": "object",
            "properties": {
              "model":        { "type": "string" },
              "hours_active": { "type": "number", "format": "double" },
              "avg_watts":    { "type": "number", "format": "float", "nullable": true },
              "cost_usd":     { "type": "number", "format": "double" },
              "tok_s_avg":    { "type": "number", "format": "float", "nullable": true },
              "sample_count": { "type": "integer", "format": "int64" }
            }
          }},
          "total_cost_usd": { "type": "number", "format": "double" }
        }
      },
      "MetricsPayload": {
        "type": "object",
        "description": "Full telemetry payload. All fields optional (null when unavailable).",
        "properties": {
          "node_id": { "type": "string" },
          "hostname": { "type": "string" },
          "inference_state": { "type": "string", "enum": ["live", "idle-spd", "busy", "idle"] },
          "thermal_state": { "type": "string", "enum": ["Normal", "Fair", "Serious", "Critical"] },
          "thermal_source": { "type": "string", "enum": ["nvml", "iokit", "coretemp", "clock_ratio", "sysfs", "wmi"] },
          "penalty_avg": { "type": "number", "format": "float" },
          "cpu_usage_percent": { "type": "number", "format": "float" },
          "memory_pressure_percent": { "type": "number", "format": "float" },
          "gpu_utilization_percent": { "type": "number", "format": "float" },
          "apple_soc_power_w": { "type": "number", "format": "float" },
          "nvidia_power_draw_w": { "type": "number", "format": "float" },
          "nvidia_gpu_temp_c": { "type": "number", "format": "float" },
          "ollama_running": { "type": "boolean" },
          "ollama_active_model": { "type": "string" },
          "ollama_tokens_per_second": { "type": "number", "format": "float" },
          "ollama_ttft_ms": { "type": "number", "format": "float" },
          "vllm_running": { "type": "boolean" },
          "vllm_model_name": { "type": "string" },
          "vllm_tokens_per_sec": { "type": "number", "format": "float" },
          "vllm_avg_ttft_ms": { "type": "number", "format": "float" },
          "vllm_requests_waiting": { "type": "integer" },
          "runtime_config_available": { "type": "boolean", "description": "v0.9.0+ — true when the agent has at least one cached RuntimeConfig. Fetch via GET /api/runtime-config?model=<name>." },
          "agent_version": { "type": "string" },
          "timestamp_ms": { "type": "integer", "format": "int64" }
        }
      }
    }
  }
}