Files
ubicloud/spec/prog/ai/inference_router_target_nexus_spec.rb
Jeremy Evans 4b819d3cb2 Change all create_with_id to create
It hasn't been necessary to use create_with_id since
ebc79622df, in December 2024.

I have plans to introduce:

```ruby
def create_with_id(id, values)
  obj = new(values)
  obj.id = id
  obj.save_changes
end
```

This will make it easier to use the same id when creating
multiple objects.  The first step is removing the existing
uses of create_with_id.
2025-08-06 01:55:51 +09:00

174 lines
5.2 KiB
Ruby

# frozen_string_literal: true
require "spec_helper"
require_relative "../../../prog/ai/inference_router_target_nexus"
RSpec.describe Prog::Ai::InferenceRouterTargetNexus do
subject(:nexus) { described_class.new(target_strand) }
let(:project) { Project.create(name: "default") }
let(:location_id) { Location::HETZNER_FSN1_ID }
let(:inference_router) do
Prog::Ai::InferenceRouterNexus.assemble(
project_id: project.id,
location_id: location_id
)
end
let(:inference_router_model) do
InferenceRouterModel.create(
model_name: "meta-llama/Llama-3.2-1B-Instruct",
prompt_billing_resource: "llama-3-2-1b-it-input",
completion_billing_resource: "llama-3-2-1b-it-output",
project_inflight_limit: 100,
project_prompt_tps_limit: 10_000,
project_completion_tps_limit: 10_000,
visible: true,
tags: {capability: "Text Generation"}
)
end
let(:config) do
{
gpuCount: 1,
gpuTypeIds: ["NVIDIA RTX 4000 Ada"],
dataCenterIds: ["EU-RO-1"],
env: {VLLM_PARAMS: "--tensor-parallel-size 1 --gpu-memory-utilization 0.95"}
}
end
let(:target_strand) do
described_class.assemble(
inference_router_id: inference_router.id,
inference_router_model_id: inference_router_model.id,
name: "target",
priority: 1,
type: "runpod",
config: config,
inflight_limit: 10
)
end
before do
Firewall.create(
name: "inference-router-firewall",
project_id: project.id,
location_id: location_id,
description: "inference-router-firewall"
)
allow(Config).to receive(:inference_endpoint_service_project_id)
.and_return(project.id)
end
describe ".assemble" do
it "creates an InferenceRouterTarget with expected attributes" do
strand = described_class.assemble(
inference_router_id: inference_router.id,
inference_router_model_id: inference_router_model.id,
name: "target",
priority: 1,
inflight_limit: 10
)
target = InferenceRouterTarget[strand.id]
expect(target.inference_router_model_id).to eq(inference_router_model.id)
expect(target.inference_router_id).to eq(inference_router.id)
end
end
describe "#before_run" do
context "when destroy is set" do
before do
expect(nexus).to receive(:when_destroy_set?).and_yield
end
it "hops to destroy if not already destroying" do
expect(nexus.strand).to receive(:label).twice.and_return("active")
expect { nexus.before_run }.to hop("destroy")
end
it "does not hop if already in destroy state" do
expect(nexus.strand).to receive(:label).and_return("destroy")
expect { nexus.before_run }.not_to hop("destroy")
end
it "exits if there are operations on the stack" do
expect(nexus.strand).to receive(:label).and_return("destroy")
expect(nexus.strand.stack).to receive(:count).and_return(2)
expect { nexus.before_run }.to exit(
{"msg" => "operation is cancelled due to the destruction of the inference router target"}
)
end
end
end
describe "#start" do
it "hops to wait for manual target" do
target_strand.subject.update(type: "manual")
expect { nexus.start }.to hop("wait")
end
it "hops to setup for runpod target" do
expect { nexus.start }.to hop("setup")
end
end
describe "#setup" do
let(:client) { instance_double(RunpodClient) }
it "creates a pod and updates target state" do
expect(RunpodClient).to receive(:new).and_return(client)
expect(client).to receive(:create_pod).with(
target_strand.subject.ubid,
hash_including("env" => hash_including(
"HF_MODEL" => inference_router_model.model_name,
"VLLM_PARAMS" => config[:env][:VLLM_PARAMS]
))
).and_return("pod-123")
expect { nexus.setup }.to hop("wait_setup")
expect(Strand[target_strand.id].subject).to have_attributes(state: {"pod_id" => "pod-123"})
end
end
describe "#wait_setup" do
let(:client) { instance_double(RunpodClient) }
before do
expect(RunpodClient).to receive(:new).and_return(client)
end
it "naps if pod publicIp is empty" do
expect(client).to receive(:get_pod).and_return({"publicIp" => ""})
expect { nexus.wait_setup }.to nap(10)
end
it "hops to wait if publicIp is available" do
expect(client).to receive(:get_pod).and_return({"publicIp" => "1.1.1.1"})
expect { nexus.wait_setup }.to hop("wait")
end
end
describe "#wait" do
it "naps for a long time (30 days)" do
expect { nexus.wait }.to nap(60 * 60 * 24 * 30)
end
end
describe "#destroy" do
let(:client) { instance_double(RunpodClient) }
it "destroys target and deletes pod if pod_id is set" do
expect(RunpodClient).to receive(:new).and_return(client)
target_strand.subject.update(state: {"pod_id" => "pod-123"})
expect(client).to receive(:delete_pod).with("pod-123")
expect { nexus.destroy }.to exit({"msg" => "inference router target is deleted"})
end
it "destroys target without deleting pod if none exists" do
expect { nexus.destroy }.to exit({"msg" => "inference router target is deleted"})
end
end
end