Files
ubicloud/spec/prog/vm/vm_pool_spec.rb
Maciek Sarnowicz d2899d3f4a Use vcpus for VM allocation and topology
This patch switches the VM allocation from Cores to VCpus when selecting a host. There are two use cases motivating this change:
- we have x64 hosts that have threads_per_cores ratio of 1 (GEX44). That breaks the assumption encoded in the VmSizes, per architecture type
- we are going to introduce Burstable family, where relation between number of CPUs allocated for a VM and number of Cores allocated to a slice hosting that VM may vary per VM instance, regardless of the architecture.

With this change, the number of cores is computed during the allocation, based on the actual architecture of the candidate host and then updated back to the VM. In case when the VM is allocated in a slice, the number of cores is left as 0 on the VM, and instead, the number of cores is saved in the VmHostSlice, and that is subtracted from the host. At any point in time this should be true: vm_host.used_cores == SUM(vm_host_slice.cores) + SUM(vm.cores if vm.vm_host_slice_id.nil?)

This logic also helps us indicate who is really controlling the cores - it is either the VmHostSlice or a Vm running without the slice. Vms inside the slice, do not control the cores and relay on the slice instead.

The special case for vcpus==1 in cloud_hypervisor_cpu_topology is needed for Burstables, where we will have Burstable-1 size. I wanted to include this in the review together with this patch for completeness.
2025-02-05 14:42:50 -08:00

147 lines
5.2 KiB
Ruby

# frozen_string_literal: true
require_relative "../../model/spec_helper"
RSpec.describe Prog::Vm::VmPool do
subject(:nx) { described_class.new(st) }
let(:st) { Strand.new }
let(:project_id) { Project.create(name: "test-project").id }
describe ".assemble" do
it "creates the entity and strand properly" do
st = described_class.assemble(
size: 3, vm_size: "standard-2", boot_image: "img", location: "hetzner-fsn1",
storage_size_gib: 86, storage_encrypted: true,
storage_skip_sync: false, arch: "x64"
)
pool = VmPool[st.id]
expect(pool).not_to be_nil
expect(pool.size).to eq(3)
expect(pool.vm_size).to eq("standard-2")
expect(pool.boot_image).to eq("img")
expect(pool.location).to eq("hetzner-fsn1")
expect(pool.storage_size_gib).to eq(86)
expect(pool.storage_encrypted).to be(true)
expect(pool.storage_skip_sync).to be(false)
expect(st.label).to eq("create_new_vm")
end
end
describe "#create_new_vm" do
let(:prj) {
Project.create_with_id(name: "default")
}
it "creates a new vm and hops to wait" do
expect(Config).to receive(:vm_pool_project_id).and_return(prj.id).at_least(:once)
st = described_class.assemble(
size: 3, vm_size: "standard-2", boot_image: "img", location: "hetzner-fsn1",
storage_size_gib: 86, storage_encrypted: true,
storage_skip_sync: false, arch: "arm64"
)
st.update(label: "create_new_vm")
expect(SshKey).to receive(:generate).and_call_original
expect(nx).to receive(:vm_pool).and_return(VmPool[st.id]).at_least(:once)
expect { nx.create_new_vm }.to hop("wait")
pool = VmPool[st.id]
expect(pool.vms.count).to eq(1)
expect(pool.vms.first.sshable).not_to be_nil
end
end
describe "#wait" do
before do
create_vm_host(location: "github-runners", total_cores: 2, total_cpus: 4, used_cores: 0)
end
let(:pool) {
VmPool.create_with_id(
size: 0, vm_size: "standard-2", boot_image: "img", location: "hetzner-fsn1",
storage_size_gib: 86, storage_encrypted: true,
storage_skip_sync: false, arch: "x64"
)
}
it "waits if nothing to do" do
expect(nx).to receive(:vm_pool).and_return(pool).at_least(:once)
expect { nx.wait }.to nap(30)
end
it "hops to create_new_vm if there are enough idle cores" do
pool.update(size: 1)
expect(nx).to receive(:vm_pool).and_return(pool).at_least(:once)
expect { nx.wait }.to hop("create_new_vm")
end
it "hops to create_new_vm if there are enough idle cores for the given arch" do
pool.update(size: 1)
expect(nx).to receive(:vm_pool).and_return(pool).at_least(:once)
Vm.create(vm_host: VmHost.first, unix_user: "ubi", public_key: "key", name: "vm1", location: "github-runners", boot_image: "github-ubuntu-2204", family: "standard", arch: "arm64", cores: 2, vcpus: 2, memory_gib: 8, project_id:) { _1.id = Sshable.create_with_id.id }
expect { nx.wait }.to hop("create_new_vm")
end
it "waits if there are not enough idle cores due to waiting github runners" do
pool.update(size: 1)
expect(nx).to receive(:vm_pool).and_return(pool).at_least(:once)
Vm.create(vm_host: VmHost.first, unix_user: "ubi", public_key: "key", name: "vm1", location: "github-runners", boot_image: "github-ubuntu-2204", family: "standard", arch: "x64", cores: 2, vcpus: 4, memory_gib: 8, project_id:) { _1.id = Sshable.create_with_id.id }
expect { nx.wait }.to nap(30)
end
end
describe "#destroy" do
let(:pool) {
VmPool.create_with_id(size: 0, vm_size: "standard-2", boot_image: "img", location: "hetzner-fsn1", storage_size_gib: 86)
}
it "increments vms' destroy semaphore and hops to wait_vms_destroy" do
ps = instance_double(PrivateSubnet)
vm = instance_double(Vm, private_subnets: [ps])
expect(nx).to receive(:vm_pool).and_return(pool)
expect(pool).to receive(:vms).and_return([vm])
expect(vm).to receive(:incr_destroy)
expect(ps).to receive(:incr_destroy)
expect { nx.destroy }.to hop("wait_vms_destroy")
end
end
describe "#wait_vms_destroy" do
let(:pool) {
VmPool.create_with_id(size: 0, vm_size: "standard-2", boot_image: "img", location: "hetzner-fsn1", storage_size_gib: 86)
}
it "pops if vms are all destroyed" do
expect(nx).to receive(:vm_pool).and_return(pool).at_least(:once)
expect(pool).to receive(:destroy)
expect(pool).to receive(:vms).and_return([])
expect { nx.wait_vms_destroy }.to exit({"msg" => "pool destroyed"})
end
it "naps if there are still vms" do
expect(nx).to receive(:vm_pool).and_return(pool).at_least(:once)
expect(pool).to receive(:vms).and_return([true])
expect { nx.wait_vms_destroy }.to nap(10)
end
end
describe "#before_run" do
it "hops to destroy when needed" do
expect(nx).to receive(:when_destroy_set?).and_yield
expect { nx.before_run }.to hop("destroy")
end
it "does not hop to destroy if already in the destroy state" do
expect(nx).to receive(:when_destroy_set?).and_yield
expect(nx.strand).to receive(:label).and_return("destroy")
expect { nx.before_run }.not_to hop("destroy")
end
end
end