Files
ubicloud/spec/prog/ai/inference_endpoint_nexus_spec.rb
Jeremy Evans 215f09541a Make access_tag only for project <-> accounts join table
For the includers of HyperTagMethods, this changes the authorization
code and object_tag member validation code to look at the project_id
column for the object, instead of looking a row with the project and
object in the access_tag table.

This removes all calls to associate_with_project, other than
those for Account.  It removes the projects association for
the includers of HyperTagMethods, and adds a project association to
the models that didn't already have one, since there is only a single
project for each object now.

Most HyperTagMethods code is inlined into Account, since it is only
user of the code now.  Temporarily, other models will still include
HyperTagMethods for the before_destroy hook, but eventually it will
go away completely.

The associations in Projects that previous used access_tag as a join
table, are changed from many_to_many to one_to_many, except for
Account (which still uses the join table).

Project#has_resources now needs separate queries for all of the
resource classes to see if there any associated objects.

This causes a lot of fallout in the specs, but unfortunately that is
unavoidable due the extensive use of projects.first in the specs to
get the related project for the objects, as well as the extensive
use of associate_with_project.
2025-01-17 08:32:46 -08:00

225 lines
12 KiB
Ruby

# frozen_string_literal: true
require_relative "../../model/spec_helper"
RSpec.describe Prog::Ai::InferenceEndpointNexus do
subject(:nx) { described_class.new(Strand.new(id: "8148ebdf-66b8-8ed0-9c2f-8cfe93f5aa77")) }
let(:inference_endpoint) {
instance_double(InferenceEndpoint, id: "8148ebdf-66b8-8ed0-9c2f-8cfe93f5aa77", replica_count: 2)
}
let(:replicas) { Array.new(2) { instance_double(InferenceEndpointReplica, strand: instance_double(Strand, label: "wait")) } }
before do
allow(nx).to receive_messages(inference_endpoint: inference_endpoint, replicas: replicas)
allow(inference_endpoint).to receive(:replicas).and_return(replicas)
end
describe ".assemble_with_model" do
let(:model) { {"id" => "model_id", "boot_image" => "ai-ubuntu-2404-nvidia", "vm_size" => "standard-gpu-6", "storage_volumes" => "storage_volumes", "model_name" => "llama-3-1-8b-it", "engine" => "vllm", "engine_params" => "engine_params", "gpu_count" => 1, "tags" => {}} }
it "assembles with model" do
expect(described_class).to receive(:model_for_id).and_return(model)
expect(described_class).to receive(:assemble).with(
project_id: 1,
location: "hetzner-fsn1",
name: "test-endpoint",
boot_image: "ai-ubuntu-2404-nvidia",
vm_size: "standard-gpu-6",
storage_volumes: "storage_volumes",
model_name: "llama-3-1-8b-it",
engine: "vllm",
engine_params: "engine_params",
replica_count: 1,
is_public: false,
gpu_count: 1,
tags: {}
)
described_class.assemble_with_model(project_id: 1, location: "hetzner-fsn1", name: "test-endpoint", model_id: "model_id")
end
it "raises an error if model is not found" do
expect(described_class).to receive(:model_for_id).and_return(nil)
expect {
described_class.assemble_with_model(project_id: 1, location: "hetzner-fsn1", name: "test-endpoint", model_id: "invalid_id")
}.to raise_error("Model with id invalid_id not found")
end
end
describe ".assemble" do
let(:customer_project) { Project.create_with_id(name: "default") }
let(:ie_project) { Project.create_with_id(name: "default") }
it "validates input" do
expect(Config).to receive(:inference_endpoint_service_project_id).and_return(ie_project.id).at_least(:once)
Firewall.create_with_id(name: "inference-endpoint-firewall", location: "hetzner-fsn1", project_id: ie_project.id)
DnsZone.create_with_id(name: "ai.ubicloud.com", project_id: ie_project.id)
expect {
described_class.assemble(project_id: "ed6afccf-7025-4f35-8241-454221d75e18", location: "hetzner-fsn1", boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: false, gpu_count: 1, tags: {})
}.to raise_error("No existing project")
expect {
described_class.assemble(project_id: customer_project.id, location: "hetzner-abc", boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: false, gpu_count: 1, tags: {})
}.to raise_error Validation::ValidationFailed, "Validation failed for following fields: provider"
expect {
described_class.assemble(project_id: customer_project.id, location: "hetzner-fsn1", boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-x", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: false, gpu_count: 1, tags: {})
}.to raise_error Validation::ValidationFailed, "Validation failed for following fields: size"
expect {
described_class.assemble(project_id: customer_project.id, location: "hetzner-fsn1", boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: "abc", is_public: false, gpu_count: 1, tags: {})
}.to raise_error("Invalid replica count")
expect {
described_class.assemble(project_id: customer_project.id, location: "hetzner-fsn1", boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 0, is_public: false, gpu_count: 1, tags: {})
}.to raise_error("Invalid replica count")
expect {
described_class.assemble(project_id: customer_project.id, location: "hetzner-fsn1", boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 10, is_public: false, gpu_count: 1, tags: {})
}.to raise_error("Invalid replica count")
expect {
described_class.assemble(project_id: customer_project.id, location: "leaseweb-wdc02", boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: false, gpu_count: 1, tags: {})
}.to raise_error("No firewall named 'inference-endpoint-firewall' configured for inference endpoints in leaseweb-wdc02")
expect {
st = described_class.assemble(project_id: customer_project.id, location: "hetzner-fsn1", boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: false, gpu_count: 1, tags: {})
expect(st.subject.load_balancer.hostname).to eq("test-endpoint-#{st.subject.ubid.to_s[-5...]}.ai.ubicloud.com")
expect(st.subject.load_balancer.stack).to eq("ipv4")
}.not_to raise_error
expect {
st = described_class.assemble(project_id: customer_project.id, location: "hetzner-fsn1", boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint-public", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: true, gpu_count: 1, tags: {})
expect(st.subject.load_balancer.hostname).to eq("test-endpoint-public.ai.ubicloud.com")
expect(st.subject.load_balancer.stack).to eq("ipv4")
}.not_to raise_error
Firewall.dataset.destroy
InferenceEndpointReplica.dataset.destroy
InferenceEndpoint.dataset.destroy
LoadBalancer.dataset.destroy
Nic.dataset.destroy
PrivateSubnet.dataset.destroy
Vm.dataset.destroy
expect {
ie_project.destroy
described_class.assemble(project_id: customer_project.id, location: "hetzner-fsn1", boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: false, gpu_count: 1, tags: {})
}.to raise_error("No project configured for inference endpoints")
end
it "works without dns zone" do
expect(Config).to receive(:inference_endpoint_service_project_id).and_return(ie_project.id).at_least(:once)
Firewall.create_with_id(name: "inference-endpoint-firewall", location: "hetzner-fsn1", project_id: ie_project.id)
expect {
described_class.assemble(project_id: customer_project.id, location: "hetzner-fsn1", boot_image: "ai-ubuntu-2404-nvidia", name: "test-endpoint", vm_size: "standard-gpu-6", storage_volumes: [{encrypted: true, size_gib: 80}], model_name: "llama-3-1-8b-it", engine: "vllm", engine_params: "", replica_count: 1, is_public: false, gpu_count: 1, tags: {})
}.not_to raise_error
end
end
describe "#before_run" do
it "hops to destroy when needed" do
expect(nx).to receive(:when_destroy_set?).and_yield
expect { nx.before_run }.to hop("destroy")
end
it "does not hop to destroy if already in the destroy state" do
expect(nx).to receive(:when_destroy_set?).and_yield
expect(nx.strand).to receive(:label).and_return("destroy")
expect { nx.before_run }.not_to hop("destroy")
end
end
describe "#start" do
it "reconciles replicas and hops to wait_replicas" do
expect(nx).to receive(:reconcile_replicas)
expect(nx).to receive(:register_deadline).with("wait", 10 * 60)
expect { nx.start }.to hop("wait_replicas")
end
end
describe "#wait_replicas" do
it "naps until all replicas are ready" do
expect(replicas.first).to receive(:strand).and_return(instance_double(Strand, label: "start"))
expect { nx.wait_replicas }.to nap(5)
end
it "hops when all replicas are ready" do
expect { nx.wait_replicas }.to hop("wait")
end
end
describe "#wait" do
it "reconciles replicas and naps" do
expect(nx).to receive(:reconcile_replicas)
expect { nx.wait }.to nap(60)
end
end
describe "#destroy" do
let(:load_balancer) { instance_double(LoadBalancer) }
let(:private_subnet) { instance_double(PrivateSubnet) }
it "triggers destruction of resources and hops to self_destroy" do
expect(inference_endpoint).to receive(:load_balancer).and_return(load_balancer)
expect(inference_endpoint).to receive(:private_subnet).and_return(private_subnet)
expect(nx).to receive(:register_deadline)
expect(replicas).to all(receive(:incr_destroy))
expect(load_balancer).to receive(:incr_destroy)
expect(private_subnet).to receive(:incr_destroy)
expect { nx.destroy }.to hop("self_destroy")
end
end
describe "#self_destroy" do
it "waits until replicas are destroyed" do
expect { nx.self_destroy }.to nap(10)
end
it "destroys the inference_endpoint" do
allow(nx).to receive(:replicas).and_return([])
expect(inference_endpoint).to receive(:destroy)
expect { nx.self_destroy }.to exit({"msg" => "inference endpoint is deleted"})
end
end
describe "#reconcile_replicas" do
it "assembles new replicas if actual count is less than desired" do
allow(inference_endpoint).to receive(:replica_count).and_return(3)
expect(replicas).to all(receive(:destroy_set?).and_return(false))
expect(Prog::Ai::InferenceEndpointReplicaNexus).to receive(:assemble).with(inference_endpoint.id)
nx.reconcile_replicas
end
it "destroys older excess replicas if actual count is more than desired" do
allow(inference_endpoint).to receive(:replica_count).and_return(1)
expect(replicas).to all(receive(:destroy_set?).at_least(:once).and_return(false))
expect(replicas[0]).to receive(:created_at).and_return(Time.now)
expect(replicas[1]).to receive(:created_at).and_return(Time.now + 1)
expect(replicas[0]).to receive(:incr_destroy)
expect(replicas[1]).not_to receive(:incr_destroy)
nx.reconcile_replicas
end
it "destroys excess replicas not in wait if actual count is more than desired" do
allow(inference_endpoint).to receive(:replica_count).and_return(1)
expect(replicas).to all(receive(:destroy_set?).at_least(:once).and_return(false))
expect(replicas[0]).to receive(:strand).and_return(instance_double(Strand, label: "start")).at_least(:once)
expect(replicas[0]).to receive(:created_at).and_return(Time.now + 1)
expect(replicas[1]).to receive(:created_at).and_return(Time.now)
expect(replicas[0]).to receive(:incr_destroy)
expect(replicas[1]).not_to receive(:incr_destroy)
nx.reconcile_replicas
end
it "does nothing if actual equals to desired replica count" do
allow(inference_endpoint).to receive(:replica_count).and_return(2)
expect(replicas).to all(receive(:destroy_set?).at_least(:once).and_return(false))
expect(replicas).not_to include(receive(:incr_destroy))
nx.reconcile_replicas
end
end
end