Files
ubicloud/config/inference_router_config.json
Junhao Li 1fb9add186 Create control plane for inference router
Creates the Clover control plane that manages the lifecycle
of inference router replicas, which will handle inference requests
across all models and route them to an appropriate inference
endpoint based on priority, capacity, and cache characteristics.
2025-04-24 11:51:46 -04:00

46 lines
772 B
JSON

{
"basic": {},
"health_check": {
"check_frequency": "10s",
"consecutive_success": 2,
"consecutive_failure": 2
},
"servers": [
{
"name": "main-server",
"addr": "[::]:8443",
"locations": [
"inference",
"up"
],
"threads": 0,
"metrics_path": "/metrics"
},
{
"name": "admin-server",
"addr": "127.0.0.1:8080,::1:8080",
"locations": [
"usage"
],
"threads": 1
}
],
"locations": [
{
"name": "up",
"path": "^/up$",
"app": "up"
},
{
"name": "inference",
"path": "^/v1/(chat/)?completions$",
"app": "inference"
},
{
"name": "usage",
"path": "^/usage$",
"app": "usage"
}
]
}