Files
ubicloud/config/ai_models.yml
Benjamin Satzger 4baf0563ce Add embedding model 'e5-mistral-7b-it'
Introduce a new model `e5-mistral-7b-it`, which can be used to produce
embeddings.

Job producing the model image:
https://github.com/ubicloud/ai-images/actions/runs/11363010686

The image contains this model:
https://huggingface.co/intfloat/e5-mistral-7b-instruct
2024-10-16 20:02:33 +02:00

5 lines
1.4 KiB
YAML

- { id: 8b0b55b3-fb99-415f-8441-3abef2c2a200, model_name: test-model, enabled: true, locations: [hetzner-fsn1], vm_size: standard-gpu-6 , storage_volumes: [{encrypted: true, size_gib: 80}, {read_only: true, image: ai-model-test-model}], boot_image: ai-ubuntu-2404-nvidia, engine: vllm, engine_params: "" }
- { id: 04ba0d97-859b-46ba-a90b-36a7c7900d4b, model_name: gemma-2-2b-it, enabled: true, locations: [hetzner-fsn1], vm_size: standard-gpu-6 , storage_volumes: [{encrypted: true, size_gib: 80}, {read_only: true, image: ai-model-gemma-2-2b-it}], boot_image: ai-ubuntu-2404-nvidia, engine: vllm, engine_params: "" }
- { id: acc50340-c036-44ff-85a2-c5b7c8823e2a, model_name: llama-3-2-3b-it, enabled: true, locations: [hetzner-fsn1], vm_size: standard-gpu-6 , storage_volumes: [{encrypted: true, size_gib: 80}, {read_only: true, image: ai-model-llama-3-2-3b-it}], boot_image: ai-ubuntu-2404-nvidia, engine: vllm, engine_params: "--gpu-memory-utilization 0.95 --max-model-len 90000 --enable-auto-tool-choice --tool-call-parser llama3_json --chat-template resources/tool_chat_template_llama3.2_json.jinja" }
- { id: 9f077493-dcd7-4067-8311-c98c4b48c4d4, model_name: e5-mistral-7b-it, enabled: true, locations: [hetzner-fsn1], vm_size: standard-gpu-6 , storage_volumes: [{encrypted: true, size_gib: 80}, {read_only: true, image: ai-model-e5-mistral-7b-it}], boot_image: ai-ubuntu-2404-nvidia, engine: vllm, engine_params: "--gpu-memory-utilization 0.95" }