For the includers of HyperTagMethods, this changes the authorization code and object_tag member validation code to look at the project_id column for the object, instead of looking a row with the project and object in the access_tag table. This removes all calls to associate_with_project, other than those for Account. It removes the projects association for the includers of HyperTagMethods, and adds a project association to the models that didn't already have one, since there is only a single project for each object now. Most HyperTagMethods code is inlined into Account, since it is only user of the code now. Temporarily, other models will still include HyperTagMethods for the before_destroy hook, but eventually it will go away completely. The associations in Projects that previous used access_tag as a join table, are changed from many_to_many to one_to_many, except for Account (which still uses the join table). Project#has_resources now needs separate queries for all of the resource classes to see if there any associated objects. This causes a lot of fallout in the specs, but unfortunately that is unavoidable due the extensive use of projects.first in the specs to get the related project for the objects, as well as the extensive use of associate_with_project.
225 lines
12 KiB
Ruby
225 lines
12 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require_relative "../../model/spec_helper"
|
|
|
|
RSpec.describe Prog::Ai::InferenceEndpointNexus do
|
|
subject(:nx) { described_class.new(Strand.new(id: "8148ebdf-66b8-8ed0-9c2f-8cfe93f5aa77")) }
|
|
|
|
let(:inference_endpoint) {
|
|
instance_double(InferenceEndpoint, id: "8148ebdf-66b8-8ed0-9c2f-8cfe93f5aa77", replica_count: 2)
|
|
}
|
|
let(:replicas) { Array.new(2) { instance_double(InferenceEndpointReplica, strand: instance_double(Strand, label: "wait")) } }
|
|
|
|
before do
|
|
allow(nx).to receive_messages(inference_endpoint: inference_endpoint, replicas: replicas)
|
|
allow(inference_endpoint).to receive(:replicas).and_return(replicas)
|
|
end
|
|
|
|
describe ".assemble_with_model" do
|
|
let(:model) { {"id" => "model_id", "boot_image" => "ai-ubuntu-2404-nvidia", "vm_size" => "standard-gpu-6", "storage_volumes" => "storage_volumes", "model_name" => "llama-3-1-8b-it", "engine" => "vllm", "engine_params" => "engine_params", "gpu_count" => 1, "tags" => {}} }
|
|
|
|
it "assembles with model" do
|
|
expect(described_class).to receive(:model_for_id).and_return(model)
|
|
expect(described_class).to receive(:assemble).with(
|
|
project_id: 1,
|
|
location: "hetzner-fsn1",
|
|
name: "test-endpoint",
|
|
boot_image: "ai-ubuntu-2404-nvidia",
|
|
vm_size: "standard-gpu-6",
|
|
storage_volumes: "storage_volumes",
|
|
model_name: "llama-3-1-8b-it",
|
|
engine: "vllm",
|
|
engine_params: "engine_params",
|
|
replica_count: 1,
|
|
is_public: false,
|
|
gpu_count: 1,
|
|
tags: {}
|
|
)
|
|
|
|
described_class.assemble_with_model(project_id: 1, location: "hetzner-fsn1", name: "test-endpoint", model_id: "model_id")
|
|
end
|
|
|
|
it "raises an error if model is not found" do
|
|
expect(described_class).to receive(:model_for_id).and_return(nil)
|
|
expect {
|
|
described_class.assemble_with_model(project_id: 1, location: "hetzner-fsn1", name: "test-endpoint", model_id: "invalid_id")
|
|
}.to raise_error("Model with id invalid_id not found")
|
|
end
|
|
end
|
|
|
|
describe ".assemble" do
|
|
let(:customer_project) { Project.create_with_id(name: "default") }
|
|
let(:ie_project) { Project.create_with_id(name: "default") }
|
|
|
|
it "validates input" do
|
|
expect(Config).to receive(:inference_endpoint_service_project_id).and_return(ie_project.id).at_least(:once)
|
|
Firewall.create_with_id(name: "inference-endpoint-firewall", location: "hetzner-fsn1", project_id: ie_project.id)
|
|
DnsZone.create_with_id(name: "ai.ubicloud.com", project_id: ie_project.id)
|
|
|
|
expect {
|
|
described_class.assemble(project_id: "ed6afccf-7025-4f35-8241-454221d75e18", location: "hetzner-fsn1", boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: false, gpu_count: 1, tags: {})
|
|
}.to raise_error("No existing project")
|
|
|
|
expect {
|
|
described_class.assemble(project_id: customer_project.id, location: "hetzner-abc", boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: false, gpu_count: 1, tags: {})
|
|
}.to raise_error Validation::ValidationFailed, "Validation failed for following fields: provider"
|
|
|
|
expect {
|
|
described_class.assemble(project_id: customer_project.id, location: "hetzner-fsn1", boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-x", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: false, gpu_count: 1, tags: {})
|
|
}.to raise_error Validation::ValidationFailed, "Validation failed for following fields: size"
|
|
|
|
expect {
|
|
described_class.assemble(project_id: customer_project.id, location: "hetzner-fsn1", boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: "abc", is_public: false, gpu_count: 1, tags: {})
|
|
}.to raise_error("Invalid replica count")
|
|
|
|
expect {
|
|
described_class.assemble(project_id: customer_project.id, location: "hetzner-fsn1", boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 0, is_public: false, gpu_count: 1, tags: {})
|
|
}.to raise_error("Invalid replica count")
|
|
|
|
expect {
|
|
described_class.assemble(project_id: customer_project.id, location: "hetzner-fsn1", boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 10, is_public: false, gpu_count: 1, tags: {})
|
|
}.to raise_error("Invalid replica count")
|
|
|
|
expect {
|
|
described_class.assemble(project_id: customer_project.id, location: "leaseweb-wdc02", boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: false, gpu_count: 1, tags: {})
|
|
}.to raise_error("No firewall named 'inference-endpoint-firewall' configured for inference endpoints in leaseweb-wdc02")
|
|
|
|
expect {
|
|
st = described_class.assemble(project_id: customer_project.id, location: "hetzner-fsn1", boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: false, gpu_count: 1, tags: {})
|
|
expect(st.subject.load_balancer.hostname).to eq("test-endpoint-#{st.subject.ubid.to_s[-5...]}.ai.ubicloud.com")
|
|
expect(st.subject.load_balancer.stack).to eq("ipv4")
|
|
}.not_to raise_error
|
|
|
|
expect {
|
|
st = described_class.assemble(project_id: customer_project.id, location: "hetzner-fsn1", boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint-public", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: true, gpu_count: 1, tags: {})
|
|
expect(st.subject.load_balancer.hostname).to eq("test-endpoint-public.ai.ubicloud.com")
|
|
expect(st.subject.load_balancer.stack).to eq("ipv4")
|
|
}.not_to raise_error
|
|
|
|
Firewall.dataset.destroy
|
|
InferenceEndpointReplica.dataset.destroy
|
|
InferenceEndpoint.dataset.destroy
|
|
LoadBalancer.dataset.destroy
|
|
Nic.dataset.destroy
|
|
PrivateSubnet.dataset.destroy
|
|
Vm.dataset.destroy
|
|
expect {
|
|
ie_project.destroy
|
|
described_class.assemble(project_id: customer_project.id, location: "hetzner-fsn1", boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: false, gpu_count: 1, tags: {})
|
|
}.to raise_error("No project configured for inference endpoints")
|
|
end
|
|
|
|
it "works without dns zone" do
|
|
expect(Config).to receive(:inference_endpoint_service_project_id).and_return(ie_project.id).at_least(:once)
|
|
Firewall.create_with_id(name: "inference-endpoint-firewall", location: "hetzner-fsn1", project_id: ie_project.id)
|
|
expect {
|
|
described_class.assemble(project_id: customer_project.id, location: "hetzner-fsn1", boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: false, gpu_count: 1, tags: {})
|
|
}.not_to raise_error
|
|
end
|
|
end
|
|
|
|
describe "#before_run" do
|
|
it "hops to destroy when needed" do
|
|
expect(nx).to receive(:when_destroy_set?).and_yield
|
|
expect { nx.before_run }.to hop("destroy")
|
|
end
|
|
|
|
it "does not hop to destroy if already in the destroy state" do
|
|
expect(nx).to receive(:when_destroy_set?).and_yield
|
|
expect(nx.strand).to receive(:label).and_return("destroy")
|
|
expect { nx.before_run }.not_to hop("destroy")
|
|
end
|
|
end
|
|
|
|
describe "#start" do
|
|
it "reconciles replicas and hops to wait_replicas" do
|
|
expect(nx).to receive(:reconcile_replicas)
|
|
expect(nx).to receive(:register_deadline).with("wait", 10 * 60)
|
|
expect { nx.start }.to hop("wait_replicas")
|
|
end
|
|
end
|
|
|
|
describe "#wait_replicas" do
|
|
it "naps until all replicas are ready" do
|
|
expect(replicas.first).to receive(:strand).and_return(instance_double(Strand, label: "start"))
|
|
expect { nx.wait_replicas }.to nap(5)
|
|
end
|
|
|
|
it "hops when all replicas are ready" do
|
|
expect { nx.wait_replicas }.to hop("wait")
|
|
end
|
|
end
|
|
|
|
describe "#wait" do
|
|
it "reconciles replicas and naps" do
|
|
expect(nx).to receive(:reconcile_replicas)
|
|
expect { nx.wait }.to nap(60)
|
|
end
|
|
end
|
|
|
|
describe "#destroy" do
|
|
let(:load_balancer) { instance_double(LoadBalancer) }
|
|
let(:private_subnet) { instance_double(PrivateSubnet) }
|
|
|
|
it "triggers destruction of resources and hops to self_destroy" do
|
|
expect(inference_endpoint).to receive(:load_balancer).and_return(load_balancer)
|
|
expect(inference_endpoint).to receive(:private_subnet).and_return(private_subnet)
|
|
expect(nx).to receive(:register_deadline)
|
|
expect(replicas).to all(receive(:incr_destroy))
|
|
expect(load_balancer).to receive(:incr_destroy)
|
|
expect(private_subnet).to receive(:incr_destroy)
|
|
|
|
expect { nx.destroy }.to hop("self_destroy")
|
|
end
|
|
end
|
|
|
|
describe "#self_destroy" do
|
|
it "waits until replicas are destroyed" do
|
|
expect { nx.self_destroy }.to nap(10)
|
|
end
|
|
|
|
it "destroys the inference_endpoint" do
|
|
allow(nx).to receive(:replicas).and_return([])
|
|
expect(inference_endpoint).to receive(:destroy)
|
|
expect { nx.self_destroy }.to exit({"msg" => "inference endpoint is deleted"})
|
|
end
|
|
end
|
|
|
|
describe "#reconcile_replicas" do
|
|
it "assembles new replicas if actual count is less than desired" do
|
|
allow(inference_endpoint).to receive(:replica_count).and_return(3)
|
|
expect(replicas).to all(receive(:destroy_set?).and_return(false))
|
|
expect(Prog::Ai::InferenceEndpointReplicaNexus).to receive(:assemble).with(inference_endpoint.id)
|
|
nx.reconcile_replicas
|
|
end
|
|
|
|
it "destroys older excess replicas if actual count is more than desired" do
|
|
allow(inference_endpoint).to receive(:replica_count).and_return(1)
|
|
expect(replicas).to all(receive(:destroy_set?).at_least(:once).and_return(false))
|
|
expect(replicas[0]).to receive(:created_at).and_return(Time.now)
|
|
expect(replicas[1]).to receive(:created_at).and_return(Time.now + 1)
|
|
expect(replicas[0]).to receive(:incr_destroy)
|
|
expect(replicas[1]).not_to receive(:incr_destroy)
|
|
nx.reconcile_replicas
|
|
end
|
|
|
|
it "destroys excess replicas not in wait if actual count is more than desired" do
|
|
allow(inference_endpoint).to receive(:replica_count).and_return(1)
|
|
expect(replicas).to all(receive(:destroy_set?).at_least(:once).and_return(false))
|
|
expect(replicas[0]).to receive(:strand).and_return(instance_double(Strand, label: "start")).at_least(:once)
|
|
expect(replicas[0]).to receive(:created_at).and_return(Time.now + 1)
|
|
expect(replicas[1]).to receive(:created_at).and_return(Time.now)
|
|
expect(replicas[0]).to receive(:incr_destroy)
|
|
expect(replicas[1]).not_to receive(:incr_destroy)
|
|
nx.reconcile_replicas
|
|
end
|
|
|
|
it "does nothing if actual equals to desired replica count" do
|
|
allow(inference_endpoint).to receive(:replica_count).and_return(2)
|
|
expect(replicas).to all(receive(:destroy_set?).at_least(:once).and_return(false))
|
|
expect(replicas).not_to include(receive(:incr_destroy))
|
|
nx.reconcile_replicas
|
|
end
|
|
end
|
|
end
|