Files
ubicloud/spec/model/vm_spec.rb
Maciek Sarnowicz 3f769e1193 Use vcpus for VM allocation and topology
This patch switches the VM allocation from Cores to VCpus when selecting a host. There are two use cases motivating this change:
- we have x64 hosts that have threads_per_cores ratio of 1 (GEX44). That breaks the assumption encoded in the VmSizes, per architecture type
- we are going to introduce Burstable family, where relation between number of CPUs allocated for a VM and number of Cores allocated to a slice hosting that VM may vary per VM instance, regardless of the architecture.

With this change, the number of cores is computed during the allocation, based on the actual architecture of the candidate host and then updated back to the VM. In case when the VM is allocated in a slice, the number of cores is left as 0 on the VM, and instead, the number of cores is saved in the VmHostSlice, and that is subtracted from the host. At any point in time this should be true: vm_host.used_cores == SUM(vm_host_slice.cores) + SUM(vm.cores if vm.vm_host_slice_id.nil?)

This logic also helps us indicate who is really controlling the cores - it is either the VmHostSlice or a Vm running without the slice. Vms inside the slice, do not control the cores and relay on the slice instead.

The special case for vcpus==1 in cloud_hypervisor_cpu_topology is needed for Burstables, where we will have Burstable-1 size. I wanted to include this in the review together with this patch for completeness.
2025-02-05 16:27:35 -05:00

283 lines
11 KiB
Ruby

# frozen_string_literal: true
require_relative "spec_helper"
RSpec.describe Vm do
subject(:vm) { described_class.new(display_state: "creating", created_at: Time.now) }
describe "#display_state" do
it "returns deleting if destroy semaphore increased" do
expect(vm).to receive(:semaphores).and_return([instance_double(Semaphore, name: "destroy")]).at_least(:once)
expect(vm.display_state).to eq("deleting")
end
it "returns restarting if restart semaphore increased" do
expect(vm).to receive(:semaphores).and_return([instance_double(Semaphore, name: "restart")]).at_least(:once)
expect(vm.display_state).to eq("restarting")
end
it "returns stopped if stop semaphore increased" do
expect(vm).to receive(:semaphores).and_return([instance_double(Semaphore, name: "stop")]).at_least(:once)
expect(vm.display_state).to eq("stopped")
end
it "returns waiting for capacity if semaphore increased" do
expect(vm).to receive(:semaphores).and_return([instance_double(Semaphore, name: "waiting_for_capacity")]).at_least(:once)
expect(vm.display_state).to eq("waiting for capacity")
end
it "returns no capacity available if it's waiting capacity more than 15 minutes" do
expect(vm).to receive(:created_at).and_return(Time.now - 16 * 60)
expect(vm).to receive(:semaphores).and_return([instance_double(Semaphore, name: "waiting_for_capacity")]).at_least(:once)
expect(vm.display_state).to eq("no capacity available")
end
it "return same if semaphores not increased" do
expect(vm.display_state).to eq("creating")
end
end
describe "#cloud_hypervisor_cpu_topology" do
it "scales a single-socket hyperthreaded system" do
vm.family = "standard"
vm.vcpus = 4
expect(vm).to receive(:vm_host).and_return(instance_double(
VmHost,
total_cpus: 12,
total_cores: 6,
total_dies: 1,
total_sockets: 1
)).at_least(:once)
expect(vm.cloud_hypervisor_cpu_topology.to_s).to eq("2:2:1:1")
end
it "scales a dual-socket hyperthreaded system" do
vm.family = "standard"
vm.vcpus = 4
expect(vm).to receive(:vm_host).and_return(instance_double(
VmHost,
total_cpus: 24,
total_cores: 12,
total_dies: 2,
total_sockets: 2
)).at_least(:once)
expect(vm.cloud_hypervisor_cpu_topology.to_s).to eq("2:2:1:1")
end
it "crashes if total_cpus is not multiply of total_cores" do
expect(vm).to receive(:vm_host).and_return(instance_double(
VmHost,
total_cpus: 3,
total_cores: 2
)).at_least(:once)
expect { vm.cloud_hypervisor_cpu_topology }.to raise_error RuntimeError, "BUG"
end
it "crashes if total_dies is not a multiple of total_sockets" do
expect(vm).to receive(:vm_host).and_return(instance_double(
VmHost,
total_cpus: 24,
total_cores: 12,
total_dies: 3,
total_sockets: 2
)).at_least(:once)
expect { vm.cloud_hypervisor_cpu_topology }.to raise_error RuntimeError, "BUG"
end
it "crashes if cores allocated per die is not uniform number" do
vm.family = "standard"
vm.vcpus = 4
expect(vm).to receive(:vm_host).and_return(instance_double(
VmHost,
total_cpus: 1,
total_cores: 1,
total_dies: 1,
total_sockets: 1
)).at_least(:once)
expect { vm.cloud_hypervisor_cpu_topology }.to raise_error RuntimeError, "BUG: need uniform number of cores allocated per die"
end
it "crashes if the vcpus is an odd number" do
vm.family = "burstable"
vm.vcpus = 5
expect(vm).to receive(:vm_host).and_return(instance_double(
VmHost,
total_cpus: 12,
total_cores: 6,
total_dies: 1,
total_sockets: 1
)).at_least(:once)
expect { vm.cloud_hypervisor_cpu_topology }.to raise_error RuntimeError, "BUG: need uniform number of cores allocated per die"
end
it "scales a single-socket hyperthreaded system for burstable family for 1 vcpu" do
vm.family = "burstable"
vm.vcpus = 1
expect(vm).to receive(:vm_host).and_return(instance_double(
VmHost,
total_cpus: 12,
total_cores: 6,
total_dies: 1,
total_sockets: 1
)).at_least(:once)
expect(vm.cloud_hypervisor_cpu_topology.to_s).to eq("1:1:1:1")
end
it "scales a double-socket hyperthreaded system for burstable family for 1 vcpu" do
vm.family = "burstable"
vm.vcpus = 1
expect(vm).to receive(:vm_host).and_return(instance_double(
VmHost,
total_cpus: 24,
total_cores: 12,
total_dies: 2,
total_sockets: 2
)).at_least(:once)
expect(vm.cloud_hypervisor_cpu_topology.to_s).to eq("1:1:1:1")
end
it "scales a single-socket non-hyperthreaded system for burstable family for 1 vcpu" do
vm.family = "burstable"
vm.vcpus = 1
expect(vm).to receive(:vm_host).and_return(instance_double(
VmHost,
total_cpus: 12,
total_cores: 12,
total_dies: 1,
total_sockets: 1
)).at_least(:once)
expect(vm.cloud_hypervisor_cpu_topology.to_s).to eq("1:1:1:1")
end
end
describe "#update_spdk_version" do
let(:vmh) { create_vm_host }
before do
expect(vm).to receive(:vm_host).and_return(vmh)
end
it "can update spdk version" do
spdk_installation = SpdkInstallation.create(version: "b", allocation_weight: 100, vm_host_id: vmh.id) { _1.id = vmh.id }
volume_dataset = instance_double(Sequel::Dataset)
expect(vm).to receive(:vm_storage_volumes_dataset).and_return(volume_dataset)
expect(volume_dataset).to receive(:update).with(spdk_installation_id: spdk_installation.id)
expect(vm).to receive(:incr_update_spdk_dependency)
vm.update_spdk_version("b")
end
it "fails if spdk installation not found" do
expect { vm.update_spdk_version("b") }.to raise_error RuntimeError, "SPDK version b not found on host"
end
end
describe "#utility functions" do
it "can compute the ipv4 addresses" do
as_ad = instance_double(AssignedVmAddress, ip: NetAddr::IPv4Net.new(NetAddr.parse_ip("1.1.1.0"), NetAddr::Mask32.new(32)))
expect(vm).to receive(:assigned_vm_address).and_return(as_ad).at_least(:once)
expect(vm.ephemeral_net4.to_s).to eq("1.1.1.0")
expect(vm.ip4.to_s).to eq("1.1.1.0/32")
end
it "can compute nil if ipv4 is not assigned" do
expect(vm.ephemeral_net4).to be_nil
end
it "returns the right private_ipv4 based on the netmask" do
nic = instance_double(Nic, private_ipv4: NetAddr::IPv4Net.parse("192.168.12.13/32"))
expect(vm).to receive(:nics).and_return([nic]).twice
expect(vm.private_ipv4.to_s).to eq("192.168.12.13")
nic = instance_double(Nic, private_ipv4: NetAddr.parse_net("10.10.240.0/24"))
expect(vm).to receive(:nics).and_return([nic]).twice
expect(vm.private_ipv4.to_s).to eq("10.10.240.1")
end
end
it "initiates a new health monitor session" do
vh = instance_double(VmHost, sshable: instance_double(Sshable))
expect(vm).to receive(:vm_host).and_return(vh).at_least(:once)
expect(vh.sshable).to receive(:start_fresh_session)
vm.init_health_monitor_session
end
it "checks pulse" do
session = {
ssh_session: instance_double(Net::SSH::Connection::Session)
}
pulse = {
reading: "down",
reading_rpt: 5,
reading_chg: Time.now - 30
}
expect(vm).to receive(:inhost_name).and_return("vmxxxx").at_least(:once)
expect(session[:ssh_session]).to receive(:exec!).and_return("active\nactive\n")
expect(vm.check_pulse(session: session, previous_pulse: pulse)[:reading]).to eq("up")
expect(session[:ssh_session]).to receive(:exec!).and_return("active\ninactive\n")
expect(vm).to receive(:reload).and_return(vm)
expect(vm).to receive(:incr_checkup)
expect(vm.check_pulse(session: session, previous_pulse: pulse)[:reading]).to eq("down")
expect(session[:ssh_session]).to receive(:exec!).and_raise Sshable::SshError
expect(vm).to receive(:reload).and_return(vm)
expect(vm).to receive(:incr_checkup)
expect(vm.check_pulse(session: session, previous_pulse: pulse)[:reading]).to eq("down")
end
it "returns storage volumes hash list" do
boot_image = instance_double(BootImage, name: "boot_image", version: "1")
storage_device = instance_double(StorageDevice, name: "default")
volumes = [
instance_double(VmStorageVolume, disk_index: 0, device_id: "dev1",
size_gib: 1, boot: true, boot_image: boot_image,
key_encryption_key_1: "key", spdk_version: "spdk1",
use_bdev_ubi: false, skip_sync: false,
storage_device: storage_device, max_ios_per_sec: nil,
max_read_mbytes_per_sec: nil, max_write_mbytes_per_sec: nil),
instance_double(VmStorageVolume, disk_index: 1, device_id: "dev2",
size_gib: 100, boot: false, boot_image: nil,
key_encryption_key_1: nil, spdk_version: "spdk2",
use_bdev_ubi: true, skip_sync: true,
storage_device: storage_device, max_ios_per_sec: 100,
max_read_mbytes_per_sec: 200, max_write_mbytes_per_sec: 300)
]
expect(vm).to receive(:vm_storage_volumes).and_return(volumes)
expect(vm.storage_volumes).to eq([
{"boot" => true, "image" => "boot_image", "image_version" => "1", "size_gib" => 1,
"device_id" => "dev1", "disk_index" => 0, "encrypted" => true,
"spdk_version" => "spdk1", "use_bdev_ubi" => false, "skip_sync" => false,
"storage_device" => "default", "read_only" => false,
"max_ios_per_sec" => nil, "max_read_mbytes_per_sec" => nil,
"max_write_mbytes_per_sec" => nil},
{"boot" => false, "image" => nil, "image_version" => nil, "size_gib" => 100,
"device_id" => "dev2", "disk_index" => 1, "encrypted" => false,
"spdk_version" => "spdk2", "use_bdev_ubi" => true, "skip_sync" => true,
"storage_device" => "default", "read_only" => false,
"max_ios_per_sec" => 100, "max_read_mbytes_per_sec" => 200,
"max_write_mbytes_per_sec" => 300}
])
end
describe "#VmSize options" do
it "no burstable cpu allowed for Standard VMs" do
expect(Option::VmSizes.map { _1.name.include?("standard-") == (_1.cpu_burst_percent_limit == 0) }.all?(true)).to be true
end
it "no gpu allowed for non-GPU VMs" do
expect(Option::VmSizes.map { _1.name.include?("gpu") == _1.gpu }.all?(true)).to be true
end
it "no odd number of vcpus allowed, except for 1" do
expect(Option::VmSizes.all? { _1.vcpus == 1 || _1.vcpus.even? }).to be true
end
end
end