Reviewing leaf usage in progs, it always occurs right after reap. Combining leaf and reap methods avoids a redundant query for the strand's children. It's typical for nap or donate to be called after the leaf check after reap. Also build this into reap, by calling donate by default, or nap if a nap keyword argument is given. There are a few cases where reap was called without leaf/donate. Add a fallthrough keyword argument to support this, so if there are no children, it does not call either nap or donate Vm::HostNexus#wait_prep and Kubernetes::UpgradeKubernetesNode#wait_new_node both need the return value of the reapable child(ren). Add a reaper keyword argument for this, which is called once for each child. The most common pattern for using reap/leaf/donate was: ```ruby reap hop_download_lb_cert if leaf? donate ``` This turns into: ```ruby reap(:download_lb_cert) ``` The second most common pattern was: ```ruby reap donate unless leaf? pop "upgrade cancelled" # or other code ``` This turns into: ```ruby reap { pop "upgrade cancelled" } ``` In a few places, I changed operations on strand.children to strand.children_dataset. Now that we are no longer using cached children by default, it's better to do these checks in the database intead of in Ruby. These places deserve careful review: * Prog::Minio::MinioServerNexus#unavailable * Prog::Postgres::PostgresResourceNexus#wait * Prog::Postgres::PostgresServerNexus#unavailable For Prog::Vnet::LoadBalancerNexus#wait_update_vm_load_balancers, I removed a check on the children completely. It was checking for an exitval using children_dataset directly after reap, which should only be true if there was still an active lease for the child. This also deserves careful review. This broke many mocked tests. This fixes the mocked tests to use database-backed objects, ensuring that we are testing observable behavior and not implementation details.
411 lines
11 KiB
Ruby
411 lines
11 KiB
Ruby
# frozen_string_literal: true
|
||
|
||
class Prog::Vm::HostNexus < Prog::Base
|
||
subject_is :sshable, :vm_host
|
||
|
||
def self.assemble(sshable_hostname, location_id: Location::HETZNER_FSN1_ID, family: "standard", net6: nil, ndp_needed: false, provider_name: nil, server_identifier: nil, spdk_version: Config.spdk_version, default_boot_images: [])
|
||
DB.transaction do
|
||
unless Location[location_id]
|
||
raise "No existing Location"
|
||
end
|
||
|
||
ubid = VmHost.generate_ubid
|
||
|
||
Sshable.create(host: sshable_hostname) { it.id = ubid.to_uuid }
|
||
vmh = VmHost.create(location_id:, family:, net6:, ndp_needed:) { it.id = ubid.to_uuid }
|
||
|
||
if provider_name == HostProvider::HETZNER_PROVIDER_NAME || provider_name == HostProvider::LEASEWEB_PROVIDER_NAME
|
||
HostProvider.create do |hp|
|
||
hp.id = vmh.id
|
||
hp.provider_name = provider_name
|
||
hp.server_identifier = server_identifier
|
||
end
|
||
end
|
||
|
||
if provider_name == HostProvider::HETZNER_PROVIDER_NAME
|
||
vmh.create_addresses
|
||
vmh.set_data_center
|
||
# Avoid overriding custom server names for development hosts.
|
||
vmh.set_server_name unless Config.development?
|
||
else
|
||
Address.create(cidr: sshable_hostname, routed_to_host_id: vmh.id) { it.id = vmh.id }
|
||
AssignedHostAddress.create_with_id(ip: sshable_hostname, address_id: vmh.id, host_id: vmh.id)
|
||
end
|
||
|
||
Strand.create(
|
||
prog: "Vm::HostNexus",
|
||
label: "start",
|
||
stack: [{"spdk_version" => spdk_version, "default_boot_images" => default_boot_images}]
|
||
) { it.id = vmh.id }
|
||
end
|
||
end
|
||
|
||
def before_run
|
||
when_destroy_set? do
|
||
if strand.label != "destroy"
|
||
hop_destroy
|
||
end
|
||
end
|
||
end
|
||
|
||
label def start
|
||
hop_setup_ssh_keys
|
||
end
|
||
|
||
label def setup_ssh_keys
|
||
# Generate a new SSH key if one is not set.
|
||
sshable.update(raw_private_key_1: SshKey.generate.keypair) unless sshable.raw_private_key_1
|
||
|
||
if Config.hetzner_ssh_private_key
|
||
root_key = Net::SSH::Authentication::ED25519::PrivKey.read(Config.hetzner_ssh_private_key, Config.hetzner_ssh_private_key_passphrase).sign_key
|
||
root_ssh_key = SshKey.from_binary(root_key.keypair)
|
||
|
||
public_keys = sshable.keys.first.public_key
|
||
public_keys += "\n#{Config.operator_ssh_public_keys}" if Config.operator_ssh_public_keys
|
||
|
||
Util.rootish_ssh(sshable.host, "root", root_ssh_key.private_key, "echo '#{public_keys}' > ~/.ssh/authorized_keys")
|
||
end
|
||
|
||
hop_bootstrap_rhizome
|
||
end
|
||
|
||
label def bootstrap_rhizome
|
||
register_deadline("download_boot_images", 10 * 60)
|
||
hop_prep if retval&.dig("msg") == "rhizome user bootstrapped and source installed"
|
||
|
||
push Prog::BootstrapRhizome, {"target_folder" => "host"}
|
||
end
|
||
|
||
label def prep
|
||
bud Prog::Vm::PrepHost
|
||
bud Prog::LearnNetwork unless vm_host.net6
|
||
bud Prog::LearnMemory
|
||
bud Prog::LearnOs
|
||
bud Prog::LearnCpu
|
||
bud Prog::LearnStorage
|
||
bud Prog::LearnPci
|
||
bud Prog::InstallDnsmasq
|
||
bud Prog::SetupSysstat
|
||
bud Prog::SetupNftables
|
||
bud Prog::SetupNodeExporter
|
||
hop_wait_prep
|
||
end
|
||
|
||
def os_supports_slices?(os_version)
|
||
os_version == "ubuntu-24.04"
|
||
end
|
||
|
||
label def wait_prep
|
||
reaper = lambda do |st|
|
||
case st.prog
|
||
when "LearnOs"
|
||
os_version = st.exitval.fetch("os_version")
|
||
vm_host.update(os_version: os_version, accepts_slices: os_supports_slices?(os_version))
|
||
when "LearnMemory"
|
||
mem_gib = st.exitval.fetch("mem_gib")
|
||
vm_host.update(total_mem_gib: mem_gib)
|
||
when "LearnCpu"
|
||
arch = st.exitval.fetch("arch")
|
||
total_cores = st.exitval.fetch("total_cores")
|
||
total_cpus = st.exitval.fetch("total_cpus")
|
||
kwargs = {
|
||
arch: arch,
|
||
total_sockets: st.exitval.fetch("total_sockets"),
|
||
total_dies: st.exitval.fetch("total_dies"),
|
||
total_cores: total_cores,
|
||
total_cpus: total_cpus
|
||
}
|
||
vm_host.update(**kwargs)
|
||
(0..total_cpus - 1).each do |cpu|
|
||
VmHostCpu.create(
|
||
vm_host_id: vm_host.id,
|
||
cpu_number: cpu,
|
||
spdk: cpu < vm_host.spdk_cpu_count
|
||
)
|
||
end
|
||
end
|
||
end
|
||
|
||
reap(:setup_hugepages, reaper:)
|
||
end
|
||
|
||
label def setup_hugepages
|
||
hop_setup_spdk if retval&.dig("msg") == "hugepages installed"
|
||
|
||
push Prog::SetupHugepages
|
||
end
|
||
|
||
label def setup_spdk
|
||
if retval&.dig("msg") == "SPDK was setup"
|
||
spdk_installation = vm_host.spdk_installations.first
|
||
spdk_cores = (spdk_installation.cpu_count * vm_host.total_cores) / vm_host.total_cpus
|
||
vm_host.update(used_cores: spdk_cores)
|
||
|
||
hop_download_boot_images
|
||
end
|
||
|
||
push Prog::Storage::SetupSpdk, {
|
||
"version" => frame["spdk_version"],
|
||
"start_service" => false,
|
||
"allocation_weight" => 100
|
||
}
|
||
end
|
||
|
||
label def download_boot_images
|
||
register_deadline("prep_reboot", 4 * 60 * 60)
|
||
frame["default_boot_images"].each { |image_name|
|
||
bud Prog::DownloadBootImage, {
|
||
"image_name" => image_name
|
||
}
|
||
}
|
||
|
||
hop_wait_download_boot_images
|
||
end
|
||
|
||
label def wait_download_boot_images
|
||
reap(:prep_reboot)
|
||
end
|
||
|
||
label def prep_reboot
|
||
register_deadline("wait", 15 * 60)
|
||
boot_id = get_boot_id
|
||
vm_host.update(last_boot_id: boot_id)
|
||
|
||
vm_host.vms.each { |vm|
|
||
vm.update(display_state: "rebooting")
|
||
}
|
||
|
||
decr_reboot
|
||
|
||
hop_reboot
|
||
end
|
||
|
||
label def reboot
|
||
nap 30 unless sshable.available?
|
||
|
||
q_last_boot_id = vm_host.last_boot_id.shellescape
|
||
new_boot_id = sshable.cmd("sudo host/bin/reboot-host #{q_last_boot_id}").strip
|
||
|
||
# If we didn't get a valid new boot id, nap. This can happen if reboot-host
|
||
# issues a reboot and returns without closing the ssh connection.
|
||
nap 30 if new_boot_id.length == 0
|
||
|
||
vm_host.update(last_boot_id: new_boot_id)
|
||
hop_verify_spdk
|
||
end
|
||
|
||
label def prep_hardware_reset
|
||
register_deadline("wait", 20 * 60)
|
||
vm_host.vms_dataset.update(display_state: "rebooting")
|
||
decr_hardware_reset
|
||
hop_hardware_reset
|
||
end
|
||
|
||
# Cuts power to a Server and starts it again. This forcefully stops it
|
||
# without giving the Server operating system time to gracefully stop. This
|
||
# may lead to data loss, it’s equivalent to pulling the power cord and
|
||
# plugging it in again. Reset should only be used when reboot does not work.
|
||
label def hardware_reset
|
||
unless vm_host.allocation_state == "draining" || vm_host.vms_dataset.empty?
|
||
fail "Host has VMs and is not in draining state"
|
||
end
|
||
|
||
vm_host.hardware_reset
|
||
|
||
# Attempt to hop to reboot immediately after sending the hardware reset
|
||
# signal.
|
||
# The reboot may:
|
||
# 1. Fail, if the host is unreachable, which is a typical reason for a
|
||
# hardware reset.
|
||
# 2. Succeed, if the host is reachable; however, the hardware reset may
|
||
# interrupt the reboot.
|
||
# Regardless, the hardware reset will proceed, and upon completion, the
|
||
# host will receive a new boot id, allowing the sequence to continue
|
||
# without an additional reboot.
|
||
hop_reboot
|
||
end
|
||
|
||
label def verify_spdk
|
||
vm_host.spdk_installations.each { |installation|
|
||
q_version = installation.version.shellescape
|
||
sshable.cmd("sudo host/bin/setup-spdk verify #{q_version}")
|
||
}
|
||
|
||
hop_verify_hugepages
|
||
end
|
||
|
||
label def verify_hugepages
|
||
host_meminfo = sshable.cmd("cat /proc/meminfo")
|
||
fail "Couldn't set hugepage size to 1G" unless host_meminfo.match?(/^Hugepagesize:\s+1048576 kB$/)
|
||
|
||
total_hugepages_match = host_meminfo.match(/^HugePages_Total:\s+(\d+)$/)
|
||
fail "Couldn't extract total hugepage count" unless total_hugepages_match
|
||
|
||
free_hugepages_match = host_meminfo.match(/^HugePages_Free:\s+(\d+)$/)
|
||
fail "Couldn't extract free hugepage count" unless free_hugepages_match
|
||
|
||
total_hugepages = Integer(total_hugepages_match.captures.first)
|
||
free_hugepages = Integer(free_hugepages_match.captures.first)
|
||
|
||
spdk_hugepages = vm_host.spdk_installations.sum { |i| i.hugepages }
|
||
fail "Used hugepages exceed SPDK hugepages" unless total_hugepages - free_hugepages <= spdk_hugepages
|
||
|
||
total_vm_mem_gib = vm_host.vms.sum { |vm| vm.memory_gib }
|
||
fail "Not enough hugepages for VMs" unless total_hugepages - spdk_hugepages >= total_vm_mem_gib
|
||
|
||
vm_host.update(
|
||
total_hugepages_1g: total_hugepages,
|
||
used_hugepages_1g: spdk_hugepages + total_vm_mem_gib
|
||
)
|
||
|
||
hop_start_slices
|
||
end
|
||
|
||
label def start_slices
|
||
vm_host.slices.each { |slice|
|
||
slice.incr_start_after_host_reboot
|
||
}
|
||
|
||
hop_start_vms
|
||
end
|
||
|
||
label def start_vms
|
||
vm_host.vms.each { |vm|
|
||
vm.incr_start_after_host_reboot
|
||
}
|
||
|
||
when_graceful_reboot_set? do
|
||
fail "BUG: VmHost not in draining state" unless vm_host.allocation_state == "draining"
|
||
vm_host.update(allocation_state: "accepting")
|
||
decr_graceful_reboot
|
||
end
|
||
|
||
vm_host.update(allocation_state: "accepting") if vm_host.allocation_state == "unprepared"
|
||
|
||
hop_configure_metrics
|
||
end
|
||
|
||
label def prep_graceful_reboot
|
||
case vm_host.allocation_state
|
||
when "accepting"
|
||
vm_host.update(allocation_state: "draining")
|
||
when "draining"
|
||
# nothing
|
||
else
|
||
fail "BUG: VmHost not in accepting or draining state"
|
||
end
|
||
|
||
if vm_host.vms_dataset.empty?
|
||
hop_prep_reboot
|
||
end
|
||
|
||
nap 30
|
||
end
|
||
|
||
label def configure_metrics
|
||
metrics_dir = vm_host.metrics_config[:metrics_dir]
|
||
sshable.cmd("mkdir -p #{metrics_dir}")
|
||
sshable.cmd("tee #{metrics_dir}/config.json > /dev/null", stdin: vm_host.metrics_config.to_json)
|
||
|
||
metrics_service = <<SERVICE
|
||
[Unit]
|
||
Description=VmHost Metrics Collection
|
||
After=network-online.target
|
||
|
||
[Service]
|
||
Type=oneshot
|
||
User=rhizome
|
||
ExecStart=/home/rhizome/common/bin/metrics-collector #{metrics_dir}
|
||
StandardOutput=journal
|
||
StandardError=journal
|
||
SERVICE
|
||
sshable.cmd("sudo tee /etc/systemd/system/vmhost-metrics.service > /dev/null", stdin: metrics_service)
|
||
|
||
metrics_interval = vm_host.metrics_config[:interval] || "15s"
|
||
|
||
metrics_timer = <<TIMER
|
||
[Unit]
|
||
Description=Run VmHost Metrics Collection Periodically
|
||
|
||
[Timer]
|
||
OnBootSec=30s
|
||
OnUnitActiveSec=#{metrics_interval}
|
||
AccuracySec=1s
|
||
|
||
[Install]
|
||
WantedBy=timers.target
|
||
TIMER
|
||
sshable.cmd("sudo tee /etc/systemd/system/vmhost-metrics.timer > /dev/null", stdin: metrics_timer)
|
||
|
||
sshable.cmd("sudo systemctl daemon-reload")
|
||
sshable.cmd("sudo systemctl enable --now vmhost-metrics.timer")
|
||
|
||
hop_wait
|
||
end
|
||
|
||
label def wait
|
||
when_graceful_reboot_set? do
|
||
hop_prep_graceful_reboot
|
||
end
|
||
|
||
when_reboot_set? do
|
||
hop_prep_reboot
|
||
end
|
||
|
||
when_hardware_reset_set? do
|
||
hop_prep_hardware_reset
|
||
end
|
||
|
||
when_checkup_set? do
|
||
hop_unavailable if !available?
|
||
decr_checkup
|
||
end
|
||
|
||
when_configure_metrics_set? do
|
||
decr_configure_metrics
|
||
hop_configure_metrics
|
||
end
|
||
|
||
nap 6 * 60 * 60
|
||
end
|
||
|
||
label def unavailable
|
||
if available?
|
||
decr_checkup
|
||
hop_wait
|
||
end
|
||
|
||
register_deadline("wait", 0)
|
||
nap 30
|
||
end
|
||
|
||
label def destroy
|
||
decr_destroy
|
||
|
||
unless vm_host.allocation_state == "draining"
|
||
vm_host.update(allocation_state: "draining")
|
||
nap 5
|
||
end
|
||
|
||
unless vm_host.vms.empty?
|
||
Clog.emit("Cannot destroy the vm host with active virtual machines, first clean them up") { vm_host }
|
||
nap 15
|
||
end
|
||
|
||
vm_host.destroy
|
||
sshable.destroy
|
||
|
||
pop "vm host deleted"
|
||
end
|
||
|
||
def get_boot_id
|
||
sshable.cmd("cat /proc/sys/kernel/random/boot_id").strip
|
||
end
|
||
|
||
def available?
|
||
vm_host.perform_health_checks(sshable.connect)
|
||
rescue
|
||
false
|
||
end
|
||
end
|