Files
ubicloud/spec/prog/vm/host_nexus_spec.rb
Jeremy Evans 7544717b96 Have last exiting child schedule parent strand
Previously, when reaping child processes, if there were no remaining
reapable children, the parent strand would only nap 1, which puts
unnecessary load on respirate unless at least one child strand
exits in the next second.

Change this approach by having the exiting child strands, after they
release the lease, schedule their parent immediately if the parent
has no non-exited child strands.

When doing this, you need to be careful to make sure there are not
race conditions that would delay the scheduling of the parent.
There are two potential situations you need to handle:

1. Multiple children exiting at the same time
2. Parent currently running while child is exiting

By waiting until after the child strand leases are released, you
still have a race condition with 1, but the race condition is that
multiple child strands exiting concurrently could both reschedule
the parent strand.  However, that isn't a problem. You want to avoid
the case where neither child strand schedules the parent, which
rescheduling after releasing the lease should do.

To handle 2, inside reap use Model#lock! to lock the parent strand.
This will make exiting child strands block if they UPDATE the parent
strand with a new schedule, until the parent strand's transaction
commits.  However, it's possible that a child strand already UPDATED
the parent. To handle this situation, before calling lock!, store
the cached schedule value in a local variable.  lock! implicitly does
a reload, so compare the schedule value after reload.  If the schedule
has changed, likely a child scheduled the parent for immediate
execution, so nap 0 in that case.

Just in case there are unforeseen race conditions that are not handled,
only nap for 120 seconds if there are active children.  Worst case
scenario, this results in a 2 minute delay for running the parent.
However, this can potentially result in 120x less load from parent
strands polling children.
2025-06-28 03:30:43 +09:00

607 lines
26 KiB
Ruby

# frozen_string_literal: true
require_relative "../../model/spec_helper"
RSpec.describe Prog::Vm::HostNexus do
subject(:nx) { described_class.new(st) }
let(:st) { described_class.assemble("192.168.0.1") }
let(:hetzner_ips) {
[
["127.0.0.1", "127.0.0.1", false],
["30.30.30.32/29", "127.0.0.1", true],
["2a01:4f8:10a:128b::/64", "127.0.0.1", true]
].map {
Hosting::HetznerApis::IpInfo.new(ip_address: _1, source_host_ip: _2, is_failover: _3)
}
}
let(:vms) { [instance_double(Vm, memory_gib: 1), instance_double(Vm, memory_gib: 2)] }
let(:spdk_installations) { [instance_double(SpdkInstallation, cpu_count: 4, hugepages: 4)] }
let(:vm_host_slices) { [instance_double(VmHostSlice, name: "standard1"), instance_double(VmHostSlice, name: "standard2")] }
let(:vm_host) { instance_double(VmHost, spdk_installations: spdk_installations, vms: vms, slices: vm_host_slices, id: "1d422893-2955-4c2c-b41c-f2ec70bcd60d", spdk_cpu_count: 2) }
let(:sshable) { instance_double(Sshable, raw_private_key_1: "bogus") }
before do
allow(nx).to receive_messages(vm_host: vm_host, sshable: sshable)
allow(sshable).to receive(:start_fresh_session).and_return(instance_double(Net::SSH::Connection::Session))
end
describe ".assemble" do
it "fails if location doesn't exist" do
expect {
described_class.assemble("127.0.0.1", location_id: nil)
}.to raise_error RuntimeError, "No existing Location"
end
it "creates addresses properly for a regular host" do
st = described_class.assemble("127.0.0.1")
expect(st).to be_a Strand
expect(st.label).to eq("start")
expect(st.subject.assigned_subnets.count).to eq(1)
expect(st.subject.assigned_subnets.first.cidr.to_s).to eq("127.0.0.1/32")
expect(st.subject.assigned_host_addresses.count).to eq(1)
expect(st.subject.assigned_host_addresses.first.ip.to_s).to eq("127.0.0.1/32")
expect(st.subject.provider).to be_nil
end
it "creates addresses properly and sets the server name for a hetzner host" do
expect(Hosting::Apis).to receive(:pull_ips).and_return(hetzner_ips)
expect(Hosting::Apis).to receive(:pull_data_center).and_return("fsn1-dc14")
expect(Hosting::Apis).to receive(:set_server_name).and_return(nil)
st = described_class.assemble("127.0.0.1", provider_name: HostProvider::HETZNER_PROVIDER_NAME, server_identifier: "1")
expect(st).to be_a Strand
expect(st.label).to eq("start")
expect(st.subject.assigned_subnets.count).to eq(3)
expect(st.subject.assigned_subnets.map { it.cidr.to_s }.sort).to eq(["127.0.0.1/32", "30.30.30.32/29", "2a01:4f8:10a:128b::/64"].sort)
expect(st.subject.assigned_host_addresses.count).to eq(1)
expect(st.subject.assigned_host_addresses.first.ip.to_s).to eq("127.0.0.1/32")
expect(st.subject.provider_name).to eq(HostProvider::HETZNER_PROVIDER_NAME)
expect(st.subject.data_center).to eq("fsn1-dc14")
end
it "does not set the server name in development" do
expect(Config).to receive(:development?).and_return(true)
expect(Hosting::Apis).to receive(:pull_ips).and_return(hetzner_ips)
expect(Hosting::Apis).to receive(:pull_data_center).and_return("fsn1-dc14")
expect(Hosting::Apis).not_to receive(:set_server_name)
described_class.assemble("127.0.0.1", provider_name: HostProvider::HETZNER_PROVIDER_NAME, server_identifier: "1")
end
end
describe "#before_run" do
it "hops to destroy when needed" do
expect(nx).to receive(:when_destroy_set?).and_yield
expect { nx.before_run }.to hop("destroy")
end
it "does not hop to destroy if already in the destroy state" do
expect(nx).to receive(:when_destroy_set?).and_yield
expect(nx.strand).to receive(:label).and_return("destroy")
expect { nx.before_run }.not_to hop("destroy")
end
end
describe "#start" do
it "hops to setup_ssh_keys" do
expect { nx.start }.to hop("setup_ssh_keys")
end
end
describe "#setup_ssh_keys" do
it "generates a keypair if one is not set" do
expect(Config).to receive(:hetzner_ssh_private_key).and_return(nil)
expect(sshable).to receive(:raw_private_key_1).and_return(nil)
expect(sshable).to receive(:update) do |**args|
key = args[:raw_private_key_1]
expect(key).to be_instance_of String
expect(key.length).to eq 64
end
expect { nx.setup_ssh_keys }.to hop("bootstrap_rhizome")
end
it "does not generate a keypair if one is already set" do
expect(Config).to receive(:hetzner_ssh_private_key).and_return(nil)
expect(sshable).to receive(:raw_private_key_1).and_return("bogus")
expect(sshable).not_to receive(:update)
expect { nx.setup_ssh_keys }.to hop("bootstrap_rhizome")
end
it "skips if private key is not set" do
expect(Config).to receive(:hetzner_ssh_private_key).and_return(nil)
expect(Util).not_to receive(:rootish_ssh)
expect { nx.setup_ssh_keys }.to hop("bootstrap_rhizome")
end
it "adds a public key if private key is set" do
root_key = SshKey.generate
vmhost_key = SshKey.generate
test_public_keys = vmhost_key.public_key.to_s
expect(Config).to receive(:hetzner_ssh_private_key).exactly(2).and_return(root_key.private_key)
expect(Config).to receive(:operator_ssh_public_keys).and_return(nil)
expect(sshable).to receive(:keys).and_return([vmhost_key])
expect(sshable).to receive(:host).and_return("127.0.0.1")
expect(Util).to receive(:rootish_ssh).with("127.0.0.1", "root", anything, "echo '#{test_public_keys}' > ~/.ssh/authorized_keys")
expect { nx.setup_ssh_keys }.to hop("bootstrap_rhizome")
end
it "adds operational keys if set" do
root_key = SshKey.generate
vmhost_key = SshKey.generate
operational_key_1 = SshKey.generate
operational_key_2 = SshKey.generate
test_public_keys = "#{vmhost_key.public_key}\n#{operational_key_1.public_key}\n#{operational_key_2.public_key}"
expect(Config).to receive(:hetzner_ssh_private_key).exactly(2).and_return(root_key.private_key)
expect(Config).to receive(:operator_ssh_public_keys).exactly(2).and_return("#{operational_key_1.public_key}\n#{operational_key_2.public_key}")
expect(sshable).to receive(:keys).and_return([vmhost_key])
expect(sshable).to receive(:host).and_return("127.0.0.1")
expect(Util).to receive(:rootish_ssh).with("127.0.0.1", "root", anything, "echo '#{test_public_keys}' > ~/.ssh/authorized_keys")
expect { nx.setup_ssh_keys }.to hop("bootstrap_rhizome")
end
end
describe "#bootstrap_rhizome" do
it "pushes a bootstrap rhizome process" do
expect(nx).to receive(:push).with(Prog::BootstrapRhizome, {"target_folder" => "host"}).and_call_original
expect { nx.bootstrap_rhizome }.to hop("start", "BootstrapRhizome")
end
it "hops once BootstrapRhizome has returned" do
nx.strand.retval = {"msg" => "rhizome user bootstrapped and source installed"}
expect { nx.bootstrap_rhizome }.to hop("prep")
end
end
describe "#prep" do
it "starts a number of sub-programs" do
expect(vm_host).to receive(:net6).and_return(NetAddr.parse_net("2a01:4f9:2b:35a::/64"))
budded = []
expect(nx).to receive(:bud) do
budded << it
end.at_least(:once)
expect { nx.prep }.to hop("wait_prep")
expect(budded).to eq([
Prog::Vm::PrepHost,
Prog::LearnMemory,
Prog::LearnOs,
Prog::LearnCpu,
Prog::LearnStorage,
Prog::LearnPci,
Prog::InstallDnsmasq,
Prog::SetupSysstat,
Prog::SetupNftables,
Prog::SetupNodeExporter
])
end
it "learns the network from the host if it is not set a-priori" do
expect(vm_host).to receive(:net6).and_return(nil)
budded_learn_network = false
expect(nx).to receive(:bud) do
budded_learn_network ||= (it == Prog::LearnNetwork)
end.at_least(:once)
expect { nx.prep }.to hop("wait_prep")
expect(budded_learn_network).to be true
end
end
describe "#os_supports_slices?" do
it "returns true if the OS supports slices" do
expect(nx.os_supports_slices?("ubuntu-22.04")).to be false
expect(nx.os_supports_slices?("ubuntu-24.04")).to be true
end
end
describe "#wait_prep" do
it "updates the vm_host record from the finished programs" do
Strand.create(parent_id: st.id, prog: "LearnMemory", label: "start", stack: [{}], exitval: {"mem_gib" => 1})
Strand.create(parent_id: st.id, prog: "LearnOs", label: "start", stack: [{}], exitval: {"os_version" => "ubuntu-22.04"})
Strand.create(parent_id: st.id, prog: "LearnCpu", label: "start", stack: [{}], exitval: {"arch" => "arm64", "total_sockets" => 2, "total_dies" => 3, "total_cores" => 4, "total_cpus" => 5})
Strand.create(parent_id: st.id, prog: "ArbitraryOtherProg", label: "start", stack: [{}], exitval: {})
vm_host = st.subject
nx.singleton_class.remove_method(:vm_host)
nx.define_singleton_method(:vm_host) { vm_host }
expect { nx.wait_prep }.to hop("setup_hugepages")
vm_host.reload
expect(vm_host.total_mem_gib).to eq 1
expect(vm_host.os_version).to eq "ubuntu-22.04"
expect(vm_host.arch).to eq "arm64"
expect(vm_host.total_cores).to eq 4
expect(vm_host.total_cpus).to eq 5
expect(vm_host.total_dies).to eq 3
expect(vm_host.total_sockets).to eq 2
expect(VmHostCpu.where(vm_host_id: vm_host.id).select_order_map([:cpu_number, :spdk])).to eq [[0, true], [1, true], [2, false], [3, false], [4, false]]
end
it "crashes if an expected field is not set for LearnMemory" do
Strand.create(parent_id: st.id, prog: "LearnMemory", label: "start", stack: [{}], exitval: {})
expect { nx.wait_prep }.to raise_error KeyError, "key not found: \"mem_gib\""
end
it "crashes if an expected field is not set for LearnCpu" do
Strand.create(parent_id: st.id, prog: "LearnCpu", label: "start", stack: [{}], exitval: {})
expect { nx.wait_prep }.to raise_error KeyError, "key not found: \"arch\""
end
it "donates to children if they are not exited yet" do
Strand.create(parent_id: st.id, prog: "LearnCpu", label: "start", stack: [{}], lease: Time.now + 10)
expect { nx.wait_prep }.to nap(120)
end
end
describe "#setup_hugepages" do
it "pushes the hugepage program" do
expect { nx.setup_hugepages }.to hop("start", "SetupHugepages")
end
it "hops once SetupHugepages has returned" do
nx.strand.retval = {"msg" => "hugepages installed"}
expect { nx.setup_hugepages }.to hop("setup_spdk")
end
end
describe "#setup_spdk" do
it "pushes the spdk program" do
expect(nx).to receive(:push).with(Prog::Storage::SetupSpdk,
{
"version" => Config.spdk_version,
"start_service" => false,
"allocation_weight" => 100
}).and_call_original
expect { nx.setup_spdk }.to hop("start", "Storage::SetupSpdk")
end
it "hops once SetupSpdk has returned" do
nx.strand.retval = {"msg" => "SPDK was setup"}
vmh = instance_double(VmHost)
spdk_installation = SpdkInstallation.new(cpu_count: 4)
allow(vmh).to receive_messages(
spdk_installations: [spdk_installation],
total_cores: 48,
total_cpus: 96
)
allow(nx).to receive(:vm_host).and_return(vmh)
expect(vmh).to receive(:update).with({used_cores: 2})
expect { nx.setup_spdk }.to hop("download_boot_images")
end
end
describe "#download_boot_images" do
it "pushes the download boot image program" do
expect(nx).to receive(:frame).and_return({"default_boot_images" => ["ubuntu-jammy", "github-ubuntu-2204"]})
expect(nx).to receive(:bud).with(Prog::DownloadBootImage, {"image_name" => "ubuntu-jammy"})
expect(nx).to receive(:bud).with(Prog::DownloadBootImage, {"image_name" => "github-ubuntu-2204"})
expect { nx.download_boot_images }.to hop("wait_download_boot_images")
end
end
describe "#wait_download_boot_images" do
it "hops to prep_reboot if all tasks are done" do
expect { nx.wait_download_boot_images }.to hop("prep_reboot")
end
it "donates its time if child strands are still running" do
Strand.create(parent_id: st.id, prog: "DownloadBootImage", label: "start", stack: [{}], lease: Time.now + 10)
expect { nx.wait_download_boot_images }.to nap(120)
end
end
describe "#wait" do
it "naps" do
expect { nx.wait }.to nap(6 * 60 * 60)
end
it "hops to prep_graceful_reboot when needed" do
expect(nx).to receive(:when_graceful_reboot_set?).and_yield
expect { nx.wait }.to hop("prep_graceful_reboot")
end
it "hops to prep_reboot when needed" do
expect(nx).to receive(:when_reboot_set?).and_yield
expect { nx.wait }.to hop("prep_reboot")
end
it "hops to prep_hardware_reset when needed" do
expect(nx).to receive(:when_hardware_reset_set?).and_yield
expect { nx.wait }.to hop("prep_hardware_reset")
end
it "hops to configure_metrics when needed" do
expect(nx).to receive(:when_configure_metrics_set?).and_yield
expect { nx.wait }.to hop("configure_metrics")
end
it "hops to unavailable based on the host's available status" do
expect(nx).to receive(:when_checkup_set?).and_yield
expect(nx).to receive(:available?).and_return(false)
expect { nx.wait }.to hop("unavailable")
expect(nx).to receive(:when_checkup_set?).and_yield
expect(nx).to receive(:available?).and_return(true)
expect { nx.wait }.to nap(6 * 60 * 60)
end
end
describe "#unavailable" do
it "registers an immediate deadline if host is unavailable" do
expect(nx).to receive(:register_deadline).with("wait", 0)
expect(nx).to receive(:available?).and_return(false)
expect { nx.unavailable }.to nap(30)
end
it "hops to wait if host is available" do
expect(nx).to receive(:available?).and_return(true)
expect { nx.unavailable }.to hop("wait")
end
end
describe "#destroy" do
it "updates allocation state and naps" do
expect(vm_host).to receive(:allocation_state).and_return("accepting")
expect(vm_host).to receive(:update).with(allocation_state: "draining")
expect { nx.destroy }.to nap(5)
end
it "waits draining" do
expect(vm_host).to receive(:allocation_state).and_return("draining")
expect(Clog).to receive(:emit).with("Cannot destroy the vm host with active virtual machines, first clean them up").and_call_original
expect { nx.destroy }.to nap(15)
end
it "deletes and exists" do
expect(vm_host).to receive(:allocation_state).and_return("draining")
expect(vm_host).to receive(:vms).and_return([])
expect(vm_host).to receive(:destroy)
expect(sshable).to receive(:destroy)
expect { nx.destroy }.to exit({"msg" => "vm host deleted"})
end
end
describe "host graceful reboot" do
it "prep_graceful_reboot sets allocation_state to draining if it is in accepting" do
expect(vm_host).to receive(:allocation_state).and_return("accepting")
expect(vm_host).to receive(:update).with(allocation_state: "draining")
expect(vm_host).to receive(:vms_dataset).and_return([true])
expect { nx.prep_graceful_reboot }.to nap(30)
end
it "prep_graceful_reboot does not change allocation_state if it is already draining" do
expect(vm_host).to receive(:allocation_state).and_return("draining")
expect(vm_host).not_to receive(:update)
expect(vm_host).to receive(:vms_dataset).and_return([true])
expect { nx.prep_graceful_reboot }.to nap(30)
end
it "prep_graceful_reboot fails if not in accepting or draining state" do
expect(vm_host).to receive(:allocation_state).and_return("unprepared")
expect(vm_host).not_to receive(:update)
expect(vm_host).not_to receive(:vms_dataset)
expect { nx.prep_graceful_reboot }.to raise_error(RuntimeError)
end
it "prep_graceful_reboot transitions to prep_reboot if there are no VMs" do
expect(vm_host).to receive(:allocation_state).and_return("draining")
expect(vm_host).to receive(:vms_dataset).and_return([])
expect { nx.prep_graceful_reboot }.to hop("prep_reboot")
end
it "prep_graceful_reboot transitions to nap if there are VMs" do
expect(vm_host).to receive(:allocation_state).and_return("draining")
expect(vm_host).to receive(:vms_dataset).and_return([true])
expect { nx.prep_graceful_reboot }.to nap(30)
end
end
describe "configure metrics" do
it "configures the metrics and hops to wait" do
metrics_config = {
metrics_dir: "/home/rhizome/host/metrics"
}
allow(vm_host).to receive(:metrics_config).and_return(metrics_config)
expect(sshable).to receive(:cmd).with("mkdir -p /home/rhizome/host/metrics")
expect(sshable).to receive(:cmd).with("tee /home/rhizome/host/metrics/config.json > /dev/null", stdin: metrics_config.to_json)
expect(sshable).to receive(:cmd).with("sudo tee /etc/systemd/system/vmhost-metrics.service > /dev/null", stdin: "[Unit]\nDescription=VmHost Metrics Collection\nAfter=network-online.target\n\n[Service]\nType=oneshot\nUser=rhizome\nExecStart=/home/rhizome/common/bin/metrics-collector /home/rhizome/host/metrics\nStandardOutput=journal\nStandardError=journal\n")
expect(sshable).to receive(:cmd).with("sudo tee /etc/systemd/system/vmhost-metrics.timer > /dev/null", stdin: "[Unit]\nDescription=Run VmHost Metrics Collection Periodically\n\n[Timer]\nOnBootSec=30s\nOnUnitActiveSec=15s\nAccuracySec=1s\n\n[Install]\nWantedBy=timers.target\n")
expect(sshable).to receive(:cmd).with("sudo systemctl daemon-reload")
expect(sshable).to receive(:cmd).with("sudo systemctl enable --now vmhost-metrics.timer")
expect { nx.configure_metrics }.to hop("wait")
end
end
describe "host reboot" do
it "prep_reboot transitions to reboot" do
expect(nx).to receive(:get_boot_id).and_return("xyz")
expect(vm_host).to receive(:update).with(last_boot_id: "xyz")
expect(vms).to all receive(:update).with(display_state: "rebooting")
expect(nx).to receive(:decr_reboot)
expect { nx.prep_reboot }.to hop("reboot")
end
it "reboot naps if host sshable is not available" do
expect(sshable).to receive(:available?).and_return(false)
expect { nx.reboot }.to nap(30)
end
it "reboot naps if reboot-host returns empty string" do
expect(sshable).to receive(:available?).and_return(true)
expect(vm_host).to receive(:last_boot_id).and_return("xyz")
expect(sshable).to receive(:cmd).with("sudo host/bin/reboot-host xyz").and_return ""
expect { nx.reboot }.to nap(30)
end
it "reboot updates last_boot_id and hops to verify_spdk" do
expect(sshable).to receive(:available?).and_return(true)
expect(vm_host).to receive(:last_boot_id).and_return("xyz")
expect(sshable).to receive(:cmd).with("sudo host/bin/reboot-host xyz").and_return "pqr\n"
expect(vm_host).to receive(:update).with(last_boot_id: "pqr")
expect { nx.reboot }.to hop("verify_spdk")
end
it "verify_spdk hops to verify_hugepages if spdk started" do
expect(vm_host).to receive(:spdk_installations).and_return([
SpdkInstallation.new(version: "v1.0"),
SpdkInstallation.new(version: "v3.0")
])
expect(sshable).to receive(:cmd).with("sudo host/bin/setup-spdk verify v1.0")
expect(sshable).to receive(:cmd).with("sudo host/bin/setup-spdk verify v3.0")
expect { nx.verify_spdk }.to hop("verify_hugepages")
end
it "start_slices starts slices" do
slice1 = instance_double(VmHostSlice)
slice2 = instance_double(VmHostSlice)
expect(vm_host).to receive(:slices).and_return([slice1, slice2])
expect(slice1).to receive(:incr_start_after_host_reboot)
expect(slice2).to receive(:incr_start_after_host_reboot)
expect { nx.start_slices }.to hop("start_vms")
end
it "start_vms starts vms & becomes accepting & hops to wait if unprepared" do
expect(vms).to all receive(:incr_start_after_host_reboot)
expect(vm_host).to receive(:allocation_state).and_return("unprepared")
expect(vm_host).to receive(:update).with(allocation_state: "accepting")
expect { nx.start_vms }.to hop("configure_metrics")
end
it "start_vms starts vms & becomes accepting & hops to wait if was draining an in graceful reboot" do
expect(vm_host).to receive(:allocation_state).twice.and_return("draining")
expect(nx).to receive(:when_graceful_reboot_set?).and_yield
expect(vms).to all receive(:incr_start_after_host_reboot)
expect(vm_host).to receive(:update).with(allocation_state: "accepting")
expect { nx.start_vms }.to hop("configure_metrics")
end
it "start_vms starts vms & raises if not in draining and in graceful reboot" do
expect(vm_host).to receive(:allocation_state).and_return("accepting")
expect(nx).to receive(:when_graceful_reboot_set?).and_yield
expect(vms).to all receive(:incr_start_after_host_reboot)
expect { nx.start_vms }.to raise_error(RuntimeError)
end
it "start_vms starts vms & hops to configure_metrics if accepting" do
expect(vms).to all receive(:incr_start_after_host_reboot)
expect(vm_host).to receive(:allocation_state).and_return("accepting")
expect { nx.start_vms }.to hop("configure_metrics")
end
it "start_vms starts vms & hops to wait if draining" do
expect(vms).to all receive(:incr_start_after_host_reboot)
expect(vm_host).to receive(:allocation_state).and_return("draining")
expect { nx.start_vms }.to hop("configure_metrics")
end
it "can get boot id" do
expect(sshable).to receive(:cmd).with("cat /proc/sys/kernel/random/boot_id").and_return("xyz\n")
expect(nx.get_boot_id).to eq("xyz")
end
end
describe "host hardware reset" do
it "prep_hardware_reset transitions to hardware_reset" do
vms_dataset = instance_double(Vm.dataset.class)
expect(vm_host).to receive(:vms_dataset).and_return(vms_dataset)
expect(vms_dataset).to receive(:update).with(display_state: "rebooting")
expect(nx).to receive(:decr_hardware_reset)
expect { nx.prep_hardware_reset }.to hop("hardware_reset")
end
it "hardware_reset transitions to reboot if is in draining state" do
expect(vm_host).to receive(:allocation_state).and_return("draining")
expect(vm_host).to receive(:hardware_reset)
expect { nx.hardware_reset }.to hop("reboot")
end
it "hardware_reset transitions to reboot if is not in draining state but has no vms" do
expect(vm_host).to receive(:allocation_state).and_return("accepting")
vms_dataset = instance_double(Vm.dataset.class)
expect(vm_host).to receive(:vms_dataset).and_return(vms_dataset)
expect(vms_dataset).to receive(:empty?).and_return(true)
expect(vm_host).to receive(:hardware_reset)
expect { nx.hardware_reset }.to hop("reboot")
end
it "hardware_reset fails if has vms and is not in draining state" do
vms_dataset = instance_double(Vm.dataset.class)
expect(vm_host).to receive(:vms_dataset).and_return(vms_dataset)
expect(vms_dataset).to receive(:empty?).and_return(false)
expect(vm_host).to receive(:allocation_state).and_return("accepting")
expect { nx.hardware_reset }.to raise_error RuntimeError, "Host has VMs and is not in draining state"
end
end
describe "#verify_hugepages" do
it "fails if hugepagesize!=1G" do
expect(sshable).to receive(:cmd).with("cat /proc/meminfo").and_return("Hugepagesize: 2048 kB\n")
expect { nx.verify_hugepages }.to raise_error RuntimeError, "Couldn't set hugepage size to 1G"
end
it "fails if total hugepages couldn't be extracted" do
expect(sshable).to receive(:cmd).with("cat /proc/meminfo").and_return("Hugepagesize: 1048576 kB\n")
expect { nx.verify_hugepages }.to raise_error RuntimeError, "Couldn't extract total hugepage count"
end
it "fails if free hugepages couldn't be extracted" do
expect(sshable).to receive(:cmd).with("cat /proc/meminfo")
.and_return("Hugepagesize: 1048576 kB\nHugePages_Total: 5")
expect { nx.verify_hugepages }.to raise_error RuntimeError, "Couldn't extract free hugepage count"
end
it "fails if not enough hugepages for VMs" do
expect(sshable).to receive(:cmd).with("cat /proc/meminfo")
.and_return("Hugepagesize: 1048576 kB\nHugePages_Total: 5\nHugePages_Free: 2")
expect { nx.verify_hugepages }.to raise_error RuntimeError, "Not enough hugepages for VMs"
end
it "fails if used hugepages exceed spdk hugepages" do
expect(sshable).to receive(:cmd).with("cat /proc/meminfo")
.and_return("Hugepagesize: 1048576 kB\nHugePages_Total: 10\nHugePages_Free: 5")
expect { nx.verify_hugepages }.to raise_error RuntimeError, "Used hugepages exceed SPDK hugepages"
end
it "updates vm_host with hugepage stats and hops" do
expect(sshable).to receive(:cmd).with("cat /proc/meminfo")
.and_return("Hugepagesize: 1048576 kB\nHugePages_Total: 10\nHugePages_Free: 8")
expect(vm_host).to receive(:update)
.with(total_hugepages_1g: 10, used_hugepages_1g: 7)
expect { nx.verify_hugepages }.to hop("start_slices")
end
end
describe "#available?" do
it "returns the available status when disks are healthy" do
expect(sshable).to receive(:connect).and_return(nil)
expect(vm_host).to receive(:perform_health_checks).and_return(true)
expect(nx.available?).to be true
end
it "returns the available status when disks are not healthy" do
expect(sshable).to receive(:connect).and_return(nil)
allow(vm_host).to receive(:perform_health_checks).and_return(false)
expect(nx.available?).to be false
end
it "returns an error trying to connect to VmHost" do
expect(sshable).to receive(:connect).and_raise Sshable::SshError.new("ssh failed", "", "", nil, nil)
expect(nx.available?).to be false
end
end
end