Files
ubicloud/spec/prog/vm/vm_host_slice_nexus_spec.rb
Hadi Moshayedi cbd4c77cdf Make VmHostSlice.enabled mean "not scheduled for destroy".
Previously, `VmHostSlice.enabled` was overloaded to mean both:
- The slice was prepared
- The slice was not scheduled for destruction

In old times, we waited for slice to be prepared before preparing the VM
inside it. We removed the “prepared” check in 428d367, so tracking
preparedness in a vm_host_slice column is no longer needed. The
“prepared” state can still be inferred from the Strand’s label if
necessary.

A slice is allocatable immediately upon creation (we already allocate
the first VM at creation time). When `slice.is_shared=false`, 2nd
allocation couldn't happen. When `slice.is_shared=true`, we waited until
the slice is prepared to allow the 2nd allocation. This isn't necessary.

The other use of `enabled` is to prevent new VM allocations once a slice
is scheduled for destruction. A slice is marked for destroy when its
last VM begins teardown.

-------------

Dropping the “prepared” purpose of `enabled` clarifies its intent and
fixes an edge case where unprepared slices could be left behind after
their final VM was deleted. Previously, `Nexus::destroy_slice` relied on
an `enabled: true → false` transition that never fired for unprepared
slices, leaving them orphaned. This could happen for example in cases
when a VM was destroyed before its slice was prepared.

Theoretically, there were few other ways to solve this issue. But this
change makes the system less complex in addition to solving the issue.
2025-07-15 16:54:14 -07:00

203 lines
7.2 KiB
Ruby

# frozen_string_literal: true
require "spec_helper"
require_relative "../../../prog/vm/vm_host_slice_nexus"
require_relative "../../../prog/vm/host_nexus"
RSpec.describe Prog::Vm::VmHostSliceNexus do
subject(:nx) { described_class.new(Strand.create(id: "b231a172-8f56-8b10-bbed-8916ea4e5c28", prog: "Prog::Vm::VmHostSliceNexus", label: "create")) }
let(:sshable) { vm_host.sshable }
let(:vm_host) { create_vm_host(total_cores: 4, used_cores: 1) }
let(:vm_host_slice) {
VmHostSlice.create(
vm_host_id: vm_host.id,
name: "standard",
family: "standard",
is_shared: false,
cores: 1,
total_cpu_percent: 200,
used_cpu_percent: 0,
total_memory_gib: 4,
used_memory_gib: 0
)
}
before do
allow(nx).to receive_messages(vm_host_slice: vm_host_slice)
allow(vm_host_slice).to receive_messages(vm_host: vm_host)
(0..15).each { |i|
VmHostCpu.create(
spdk: i < 2,
vm_host_slice_id: (i == 2 || i == 3) ? vm_host_slice.id : nil
) {
it.vm_host_id = vm_host.id
it.cpu_number = i
}
}
end
describe ".assemble_with_host" do
it "creates vm host slice" do
# prepare the host for the test
st_vh = Prog::Vm::HostNexus.assemble("1.2.3.4")
host = st_vh.subject
expect(host).not_to be_nil
host.update(total_cpus: 8, total_cores: 4)
(0..15).each { |i|
VmHostCpu.create(vm_host_id: host.id, cpu_number: i, spdk: i < 2)
}
# run the assemble test
st_rg = described_class.assemble_with_host("standard", host, family: "standard", allowed_cpus: [2, 3], memory_gib: 4)
rg = st_rg.subject
expect(rg).not_to be_nil
expect(rg.name).to eq("standard")
expect(rg.allowed_cpus_cgroup).to eq("2-3")
expect(rg.cores).to eq(1)
expect(rg.total_cpu_percent).to eq(200)
expect(rg.used_cpu_percent).to eq(0)
expect(rg.total_memory_gib).to eq(4)
expect(rg.used_memory_gib).to eq(0)
expect(rg.enabled).to be(false)
expect(rg.is_shared).to be(false)
expect(rg.id).to eq(st_rg.id)
expect(rg.ubid).to eq(st_rg.ubid)
expect(rg.ubid[..1] == "vs").to be true
expect(rg.vm_host).not_to be_nil
expect(rg.vm_host.id).to eq(host.id)
end
end
describe "#before_run" do
it "hops to destroy when needed" do
expect(nx).to receive(:when_destroy_set?).and_yield
expect { nx.before_run }.to hop("destroy")
end
it "does not hop to destroy if already in the destroy state" do
expect(nx).to receive(:when_destroy_set?).and_yield
expect(nx.strand).to receive(:label).and_return("destroy")
expect { nx.before_run }.not_to hop("destroy")
end
end
describe "#prep" do
it "starts prep on NotStarted" do
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check prep_standard").and_return("NotStarted")
expect(sshable).to receive(:cmd).with("common/bin/daemonizer 'sudo host/bin/setup-slice prep standard.slice \"2-3\"' prep_standard")
expect(vm_host_slice).to receive(:inhost_name).and_return("standard.slice")
expect { nx.prep }.to nap(1)
end
it "starts prep on Failed" do
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check prep_standard").and_return("Failed")
expect(sshable).to receive(:cmd).with("common/bin/daemonizer 'sudo host/bin/setup-slice prep standard.slice \"2-3\"' prep_standard")
expect(vm_host_slice).to receive(:inhost_name).and_return("standard.slice")
expect { nx.prep }.to nap(1)
end
it "hops to wait" do
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check prep_standard").and_return("Succeeded")
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --clean prep_standard")
expect { nx.prep }.to hop("wait")
end
it "do nothing on random result" do
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check prep_standard").and_return("foobar")
expect { nx.prep }.to nap(1)
end
end
describe "#wait" do
it "naps for 6 hours" do
expect { nx.wait }.to nap(6 * 60 * 60)
end
it "hops to start_after_host_reboot when signaled" do
expect(nx).to receive(:when_start_after_host_reboot_set?).and_yield
expect(nx).to receive(:register_deadline).with(:wait, 5 * 60)
expect { nx.wait }.to hop("start_after_host_reboot")
end
it "hops to unavailable based on the slice's available status" do
expect(nx).to receive(:when_checkup_set?).and_yield
expect(nx).to receive(:available?).and_return(false)
expect { nx.wait }.to hop("unavailable")
expect(nx).to receive(:when_checkup_set?).and_yield
expect(nx).to receive(:available?).and_raise Sshable::SshError.new("ssh failed", "", "", nil, nil)
expect { nx.wait }.to hop("unavailable")
expect(nx).to receive(:when_checkup_set?).and_yield
expect(nx).to receive(:available?).and_return(true)
expect { nx.wait }.to nap(6 * 60 * 60)
end
end
describe "#destroy" do
it "deletes resources and exits" do
expect(vm_host_slice).to receive(:destroy)
expect(sshable).to receive(:cmd).with("sudo host/bin/setup-slice delete standard.slice")
expect(vm_host_slice).to receive(:inhost_name).and_return("standard.slice")
expect { nx.destroy }.to exit({"msg" => "vm_host_slice destroyed"})
end
end
describe "#start_after_host_reboot" do
it "starts slice on the host and hops to wait" do
expect(sshable).to receive(:cmd).with("sudo host/bin/setup-slice recreate-unpersisted standard.slice")
expect(vm_host_slice).to receive(:inhost_name).and_return("standard.slice")
expect { nx.start_after_host_reboot }.to hop("wait")
end
end
describe "#unavailable" do
it "hops to start_after_host_reboot when needed" do
expect(nx).to receive(:when_start_after_host_reboot_set?).and_yield
expect(nx).to receive(:incr_checkup)
expect { nx.unavailable }.to hop("start_after_host_reboot")
end
it "registers an immediate deadline if slice is unavailable" do
expect(nx).to receive(:register_deadline).with("wait", 0)
expect(nx).to receive(:available?).and_return(false)
expect { nx.unavailable }.to nap(30)
end
it "hops to wait if slice is available" do
expect(nx).to receive(:available?).and_return(true)
expect { nx.unavailable }.to hop("wait")
end
end
describe "#available?" do
let(:session) { instance_double(Net::SSH::Connection::Session) }
before do
expect(sshable).to receive(:start_fresh_session).and_yield(session)
expect(session).to receive(:exec!).with("systemctl is-active standard.slice").and_return("active\nactive\n").once
expect(session).to receive(:exec!).with("cat /sys/fs/cgroup/standard.slice/cpuset.cpus.effective").and_return("2-3\n").once
end
it "returns the available status" do
expect(session).to receive(:exec!).with("cat /sys/fs/cgroup/standard.slice/cpuset.cpus.partition").and_return("root\n").once
expect(nx.available?).to be true
end
it "fails on the incorrect partition status" do
expect(session).to receive(:exec!).with("cat /sys/fs/cgroup/standard.slice/cpuset.cpus.partition").and_return("member\n").once
expect(nx.available?).to be false
end
end
end