Files
ubicloud/spec/prog/ai/inference_endpoint_nexus_spec.rb
Jeremy Evans 4b819d3cb2 Change all create_with_id to create
It hasn't been necessary to use create_with_id since
ebc79622df, in December 2024.

I have plans to introduce:

```ruby
def create_with_id(id, values)
  obj = new(values)
  obj.id = id
  obj.save_changes
end
```

This will make it easier to use the same id when creating
multiple objects.  The first step is removing the existing
uses of create_with_id.
2025-08-06 01:55:51 +09:00

238 lines
14 KiB
Ruby

# frozen_string_literal: true
require_relative "../../model/spec_helper"
RSpec.describe Prog::Ai::InferenceEndpointNexus do
subject(:nx) { described_class.new(Strand.new(id: "8148ebdf-66b8-8ed0-9c2f-8cfe93f5aa77")) }
let(:inference_endpoint) {
instance_double(InferenceEndpoint, id: "8148ebdf-66b8-8ed0-9c2f-8cfe93f5aa77", replica_count: 2)
}
let(:replicas) { Array.new(2) { instance_double(InferenceEndpointReplica, strand: instance_double(Strand, label: "wait")) } }
before do
allow(nx).to receive_messages(inference_endpoint: inference_endpoint, replicas: replicas)
allow(inference_endpoint).to receive(:replicas).and_return(replicas)
end
describe ".assemble_with_model" do
let(:model) { {"id" => "model_id", "boot_image" => "ai-ubuntu-2404-nvidia", "vm_size" => "standard-gpu-6", "storage_volumes" => "storage_volumes", "model_name" => "llama-3-1-8b-it", "engine" => "vllm", "engine_params" => "engine_params", "gpu_count" => 1, "tags" => {}, "max_requests" => 500, "max_project_rps" => 100, "max_project_tps" => 10000} }
it "assembles with model" do
expect(described_class).to receive(:model_for_id).and_return(model)
expect(described_class).to receive(:assemble).with(
project_id: 1,
location_id: Location::HETZNER_FSN1_ID,
name: "test-endpoint",
boot_image: "ai-ubuntu-2404-nvidia",
vm_size: "standard-gpu-6",
storage_volumes: "storage_volumes",
model_name: "llama-3-1-8b-it",
engine: "vllm",
engine_params: "engine_params",
replica_count: 1,
is_public: false,
gpu_count: 1,
max_requests: 500,
max_project_rps: 100,
max_project_tps: 10000,
tags: {}
)
described_class.assemble_with_model(project_id: 1, location_id: Location::HETZNER_FSN1_ID, name: "test-endpoint", model_id: "model_id")
end
it "the model it is assembled with has unique id" do
expect(Option::AI_MODELS.map { it["id"] }.size).to eq(Option::AI_MODELS.map { it["id"] }.uniq.size)
end
it "raises an error if model is not found" do
expect(described_class).to receive(:model_for_id).and_return(nil)
expect {
described_class.assemble_with_model(project_id: 1, location_id: Location::HETZNER_FSN1_ID, name: "test-endpoint", model_id: "invalid_id")
}.to raise_error("Model with id invalid_id not found")
end
it "fails if location doesn't exist" do
expect {
described_class.assemble_with_model(project_id: 1, location_id: nil, name: "test-endpoint", model_id: "model_id")
}.to raise_error RuntimeError, "No existing location"
end
end
describe ".assemble" do
let(:customer_project) { Project.create(name: "default") }
let(:ie_project) { Project.create(name: "default") }
it "validates input" do
expect(Config).to receive(:inference_endpoint_service_project_id).and_return(ie_project.id).at_least(:once)
Firewall.create(name: "inference-endpoint-firewall", location_id: Location::HETZNER_FSN1_ID, project_id: ie_project.id)
DnsZone.create(name: "ai.ubicloud.com", project_id: ie_project.id)
expect {
described_class.assemble(project_id: "ed6afccf-7025-4f35-8241-454221d75e18", location_id: Location::HETZNER_FSN1_ID, boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: false, gpu_count: 1, tags: {}, max_requests: 500, max_project_rps: 100, max_project_tps: 10000)
}.to raise_error("No existing project")
expect {
described_class.assemble(project_id: customer_project.id, location_id: nil, boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: false, gpu_count: 1, tags: {}, max_requests: 500, max_project_rps: 100, max_project_tps: 10000)
}.to raise_error RuntimeError, "No existing location"
expect {
described_class.assemble(project_id: customer_project.id, location_id: Location::HETZNER_FSN1_ID, boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-x", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: false, gpu_count: 1, tags: {}, max_requests: 500, max_project_rps: 100, max_project_tps: 10000)
}.to raise_error Validation::ValidationFailed, "Validation failed for following fields: size"
expect {
described_class.assemble(project_id: customer_project.id, location_id: Location::HETZNER_FSN1_ID, boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: "abc", is_public: false, gpu_count: 1, tags: {}, max_requests: 500, max_project_rps: 100, max_project_tps: 10000)
}.to raise_error("Invalid replica count")
expect {
described_class.assemble(project_id: customer_project.id, location_id: Location::HETZNER_FSN1_ID, boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 0, is_public: false, gpu_count: 1, tags: {}, max_requests: 500, max_project_rps: 100, max_project_tps: 10000)
}.to raise_error("Invalid replica count")
expect {
described_class.assemble(project_id: customer_project.id, location_id: Location::HETZNER_FSN1_ID, boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 10, is_public: false, gpu_count: 1, tags: {}, max_requests: 500, max_project_rps: 100, max_project_tps: 10000)
}.to raise_error("Invalid replica count")
expect {
described_class.assemble(project_id: customer_project.id, location_id: Location[name: "leaseweb-wdc02"].id, boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: false, gpu_count: 1, tags: {}, max_requests: 500, max_project_rps: 100, max_project_tps: 10000)
}.to raise_error("No firewall named 'inference-endpoint-firewall' configured for inference endpoints in leaseweb-wdc02")
expect {
st = described_class.assemble(project_id: customer_project.id, location_id: Location::HETZNER_FSN1_ID, boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: false, gpu_count: 1, tags: {}, max_requests: 500, max_project_rps: 100, max_project_tps: 10000)
expect(st.subject.load_balancer.hostname).to eq("test-endpoint-#{st.subject.ubid.to_s[-5...]}.ai.ubicloud.com")
expect(st.subject.load_balancer.stack).to eq("ipv4")
}.not_to raise_error
expect {
st = described_class.assemble(project_id: customer_project.id, location_id: Location::HETZNER_FSN1_ID, boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint-public", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: true, gpu_count: 1, tags: {}, max_requests: 500, max_project_rps: 100, max_project_tps: 10000)
expect(st.subject.load_balancer.hostname).to eq("test-endpoint-public.ai.ubicloud.com")
expect(st.subject.load_balancer.stack).to eq("ipv4")
}.not_to raise_error
Firewall.dataset.destroy
InferenceEndpointReplica.dataset.destroy
InferenceEndpoint.dataset.destroy
LoadBalancer.dataset.destroy
Nic.dataset.destroy
PrivateSubnet.dataset.destroy
Vm.dataset.destroy
expect {
ie_project.destroy
described_class.assemble(project_id: customer_project.id, location_id: Location::HETZNER_FSN1_ID, boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: false, gpu_count: 1, tags: {}, max_requests: 500, max_project_rps: 100, max_project_tps: 10000)
}.to raise_error("No project configured for inference endpoints")
end
it "works without dns zone" do
expect(Config).to receive(:inference_endpoint_service_project_id).and_return(ie_project.id).at_least(:once)
Firewall.create(name: "inference-endpoint-firewall", location_id: Location::HETZNER_FSN1_ID, project_id: ie_project.id)
expect {
described_class.assemble(project_id: customer_project.id, location_id: Location::HETZNER_FSN1_ID, boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: false, gpu_count: 1, tags: {}, max_requests: 500, max_project_rps: 100, max_project_tps: 10000)
}.not_to raise_error
end
end
describe "#before_run" do
it "hops to destroy when needed" do
expect(nx).to receive(:when_destroy_set?).and_yield
expect { nx.before_run }.to hop("destroy")
end
it "does not hop to destroy if already in the destroy state" do
expect(nx).to receive(:when_destroy_set?).and_yield
expect(nx.strand).to receive(:label).and_return("destroy")
expect { nx.before_run }.not_to hop("destroy")
end
end
describe "#start" do
it "reconciles replicas and hops to wait_replicas" do
expect(nx).to receive(:reconcile_replicas)
expect(nx).to receive(:register_deadline).with("wait", 10 * 60)
expect { nx.start }.to hop("wait_replicas")
end
end
describe "#wait_replicas" do
it "naps until all replicas are ready" do
expect(replicas.first).to receive(:strand).and_return(instance_double(Strand, label: "start"))
expect { nx.wait_replicas }.to nap(5)
end
it "hops when all replicas are ready" do
expect { nx.wait_replicas }.to hop("wait")
end
end
describe "#wait" do
it "reconciles replicas and naps" do
expect(nx).to receive(:reconcile_replicas)
expect { nx.wait }.to nap(60)
end
end
describe "#destroy" do
let(:load_balancer) { instance_double(LoadBalancer) }
let(:private_subnet) { instance_double(PrivateSubnet) }
it "triggers destruction of resources and hops to self_destroy" do
expect(inference_endpoint).to receive(:load_balancer).and_return(load_balancer)
expect(inference_endpoint).to receive(:private_subnet).and_return(private_subnet)
expect(nx).to receive(:register_deadline)
expect(replicas).to all(receive(:incr_destroy))
expect(load_balancer).to receive(:incr_destroy)
expect(private_subnet).to receive(:incr_destroy)
expect { nx.destroy }.to hop("self_destroy")
end
end
describe "#self_destroy" do
it "waits until replicas are destroyed" do
expect { nx.self_destroy }.to nap(10)
end
it "destroys the inference_endpoint" do
allow(nx).to receive(:replicas).and_return([])
expect(inference_endpoint).to receive(:destroy)
expect { nx.self_destroy }.to exit({"msg" => "inference endpoint is deleted"})
end
end
describe "#reconcile_replicas" do
it "assembles new replicas if actual count is less than desired" do
allow(inference_endpoint).to receive(:replica_count).and_return(3)
expect(replicas).to all(receive(:destroy_set?).and_return(false))
expect(Prog::Ai::InferenceEndpointReplicaNexus).to receive(:assemble).with(inference_endpoint.id)
nx.reconcile_replicas
end
it "destroys older excess replicas if actual count is more than desired" do
allow(inference_endpoint).to receive(:replica_count).and_return(1)
expect(replicas).to all(receive(:destroy_set?).at_least(:once).and_return(false))
expect(replicas[0]).to receive(:created_at).and_return(Time.now)
expect(replicas[1]).to receive(:created_at).and_return(Time.now + 1)
expect(replicas[0]).to receive(:incr_destroy)
expect(replicas[1]).not_to receive(:incr_destroy)
nx.reconcile_replicas
end
it "destroys excess replicas not in wait if actual count is more than desired" do
allow(inference_endpoint).to receive(:replica_count).and_return(1)
expect(replicas).to all(receive(:destroy_set?).at_least(:once).and_return(false))
expect(replicas[0]).to receive(:strand).and_return(instance_double(Strand, label: "start")).at_least(:once)
expect(replicas[0]).to receive(:created_at).and_return(Time.now + 1)
expect(replicas[1]).to receive(:created_at).and_return(Time.now)
expect(replicas[0]).to receive(:incr_destroy)
expect(replicas[1]).not_to receive(:incr_destroy)
nx.reconcile_replicas
end
it "does nothing if actual equals to desired replica count" do
allow(inference_endpoint).to receive(:replica_count).and_return(2)
expect(replicas).to all(receive(:destroy_set?).at_least(:once).and_return(false))
expect(replicas).not_to include(receive(:incr_destroy))
nx.reconcile_replicas
end
end
end