Files
ubicloud/bin/ci
Hadi Moshayedi abe5fe1a25 E2E: Test host reboot for slices.
We had issues with host reboot for slices in prod, so it's worthwhile to
try this scenario in E2E.

To do this, we combine the encrypted VM scenario (which already tested
reboots) and sliced VM scenario.
2025-03-06 09:06:39 -08:00

99 lines
3.7 KiB
Ruby
Executable File

#!/usr/bin/env ruby
# frozen_string_literal: true
require_relative "../loader"
require "time"
require "optparse"
@started_at = Time.now
def main(options)
all_test_cases = YAML.load_file("config/e2e_test_cases.yml").to_h { [_1["name"], _1] }
boot_images = all_test_cases.values_at(*options[:test_cases]).flat_map { _1["images"] }.uniq
hetzner_server_st = Prog::Test::HetznerServer.assemble(vm_host_id: options[:vm_host_id], default_boot_images: boot_images)
wait_until(hetzner_server_st, "wait")
tests_to_wait = []
if options[:test_cases].include?("vm") && (test_case = all_test_cases["vm"])
# Testing slices is not parallel with other tests as it requires a specific host state
# YYY: Make this default when all hosts in prod support slices.
Semaphore.incr(hetzner_server_st.id, "allow_slices")
wait_until(hetzner_server_st, "wait")
# This tests encrypted VMs with slices, which will create both standard and
# burstable VMs. The test will also test host reboot.
encrypted_vms_st = Prog::Test::VmGroup.assemble(boot_images: test_case["images"], storage_encrypted: true, test_reboot: true, test_slices: true)
log(encrypted_vms_st, "storage_encrypted: true")
# Not running in parallel but waiting, since host is rebooted during test.
# Rebooting makes the next test to test reboot practically as well depending
# on when the host is rebooted and can cause flaky issues for github runner tests.
wait_until(encrypted_vms_st)
Semaphore.incr(hetzner_server_st.id, "disallow_slices")
wait_until(hetzner_server_st, "wait")
unencrypted_vms_st = Prog::Test::VmGroup.assemble(boot_images: test_case["images"], storage_encrypted: false, test_reboot: false, verify_host_capacity: false)
log(unencrypted_vms_st, "storage_encrypted: false")
tests_to_wait << unencrypted_vms_st
end
if (gh_test_cases = all_test_cases.values_at(*options[:test_cases].select { _1.include?("github_runner") }))
tests_to_wait << Prog::Test::GithubRunner.assemble(gh_test_cases)
end
if options[:test_cases].include?("postgres_standard")
tests_to_wait << Prog::Test::PostgresResource.assemble
end
if options[:test_cases].include?("postgres_ha")
tests_to_wait << Prog::Test::HaPostgresResource.assemble
end
# Although wait_until will be blocked while checking the first one
# it won't affect the total time as other strands will continue in parallel.
# No need to make it parallel.
tests_to_wait.each { |st| wait_until(st) }
Semaphore.incr(hetzner_server_st.id, "verify_cleanup_and_destroy")
wait_until(hetzner_server_st)
end
def wait_until(st, label = nil)
while (loaded_st = Strand[st.id]) && loaded_st.label != label
if loaded_st.label == "failed"
log(st.reload, "FAILED: #{loaded_st.exitval.fetch("msg")}")
st.destroy
exit 1
end
log(st.reload, "waiting for #{label || "exit"}")
sleep 10
end
log(st, "reached")
end
def log(st, msg)
resources = case st.prog
when "Test::HetznerServer"
"VmHost.#{Strand[st.stack.first["vm_host_id"]]&.label}"
when "Test::VmGroup"
st.stack.first["vms"].map { "Vm.#{Strand[_1]&.label}" }.join(", ")
when "Test::Vm"
"Vm.#{Strand[st.stack.first["subject_id"]]&.label}"
else
"#{st.prog}.#{st.label}"
end
$stdout.write "#{((Time.now - @started_at) / 60).round(2)}m | #{st.id} | #{st.prog}.#{st.label} | #{msg} | #{resources}\n"
end
options = {test_cases: ["vm"]}
OptionParser.new do |opts|
opts.on("--vm-host-id VM_HOST_ID", "Use existing vm host") { |v| options[:vm_host_id] = (v.length == 26) ? VmHost.from_ubid(v).id : v }
opts.on("--test-cases TEST_CASES", Array, "List of test cases to run separated by comma") { |v| options[:test_cases] = v }
end.parse!
clover_freeze
$stdout.sync = true
main(options)