Files
ubicloud/config/inference_router_config.json
Benjamin Satzger 308f58ff46 Inference Router: Support for embedding models
Update the inference router configuration to handle embedding requests.

Previously, the router supported the following paths:
* /v1/chat/completions
* /v1/completions

It now also supports:
* /v1/embeddings
2025-05-23 09:01:00 +02:00

46 lines
794 B
JSON

{
"basic": {},
"health_check": {
"check_frequency": "10s",
"consecutive_success": 2,
"consecutive_failure": 2
},
"servers": [
{
"name": "main-server",
"addr": "[::]:8443",
"locations": [
"inference",
"up"
],
"threads": 0,
"metrics_path": "/metrics"
},
{
"name": "admin-server",
"addr": "127.0.0.1:8080,::1:8080",
"locations": [
"usage"
],
"threads": 1
}
],
"locations": [
{
"name": "up",
"path": "^/up$",
"app": "up"
},
{
"name": "inference",
"path": "^/v1/(chat/completions|completions|embeddings)$",
"app": "inference"
},
{
"name": "usage",
"path": "^/usage$",
"app": "usage"
}
]
}