Update the inference router configuration to handle embedding requests. Previously, the router supported the following paths: * /v1/chat/completions * /v1/completions It now also supports: * /v1/embeddings
46 lines
794 B
JSON
46 lines
794 B
JSON
{
|
|
"basic": {},
|
|
"health_check": {
|
|
"check_frequency": "10s",
|
|
"consecutive_success": 2,
|
|
"consecutive_failure": 2
|
|
},
|
|
"servers": [
|
|
{
|
|
"name": "main-server",
|
|
"addr": "[::]:8443",
|
|
"locations": [
|
|
"inference",
|
|
"up"
|
|
],
|
|
"threads": 0,
|
|
"metrics_path": "/metrics"
|
|
},
|
|
{
|
|
"name": "admin-server",
|
|
"addr": "127.0.0.1:8080,::1:8080",
|
|
"locations": [
|
|
"usage"
|
|
],
|
|
"threads": 1
|
|
}
|
|
],
|
|
"locations": [
|
|
{
|
|
"name": "up",
|
|
"path": "^/up$",
|
|
"app": "up"
|
|
},
|
|
{
|
|
"name": "inference",
|
|
"path": "^/v1/(chat/completions|completions|embeddings)$",
|
|
"app": "inference"
|
|
},
|
|
{
|
|
"name": "usage",
|
|
"path": "^/usage$",
|
|
"app": "usage"
|
|
}
|
|
]
|
|
}
|