This patch switches the VM allocation from Cores to VCpus when selecting a host. There are two use cases motivating this change: - we have x64 hosts that have threads_per_cores ratio of 1 (GEX44). That breaks the assumption encoded in the VmSizes, per architecture type - we are going to introduce Burstable family, where relation between number of CPUs allocated for a VM and number of Cores allocated to a slice hosting that VM may vary per VM instance, regardless of the architecture. With this change, the number of cores is computed during the allocation, based on the actual architecture of the candidate host and then updated back to the VM. In case when the VM is allocated in a slice, the number of cores is left as 0 on the VM, and instead, the number of cores is saved in the VmHostSlice, and that is subtracted from the host. At any point in time this should be true: vm_host.used_cores == SUM(vm_host_slice.cores) + SUM(vm.cores if vm.vm_host_slice_id.nil?) This logic also helps us indicate who is really controlling the cores - it is either the VmHostSlice or a Vm running without the slice. Vms inside the slice, do not control the cores and relay on the slice instead. The special case for vcpus==1 in cloud_hypervisor_cpu_topology is needed for Burstables, where we will have Burstable-1 size. I wanted to include this in the review together with this patch for completeness.
671 lines
34 KiB
Ruby
671 lines
34 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require_relative "../../model/spec_helper"
|
|
require "netaddr"
|
|
require "octokit"
|
|
|
|
RSpec.describe Prog::Vm::GithubRunner do
|
|
subject(:nx) {
|
|
described_class.new(Strand.new).tap {
|
|
_1.instance_variable_set(:@github_runner, github_runner)
|
|
}
|
|
}
|
|
|
|
let(:github_runner) {
|
|
GithubRunner.new(installation_id: "", repository_name: "test-repo", label: "ubicloud-standard-4", ready_at: Time.now, created_at: Time.now).tap {
|
|
_1.id = GithubRunner.generate_uuid
|
|
}
|
|
}
|
|
|
|
let(:vm) {
|
|
Vm.new(family: "standard", cores: 1, name: "dummy-vm", location: "github-runners").tap {
|
|
_1.id = "788525ed-d6f0-4937-a844-323d4fd91946"
|
|
}
|
|
}
|
|
let(:sshable) { instance_double(Sshable) }
|
|
let(:client) { instance_double(Octokit::Client) }
|
|
|
|
before do
|
|
allow(Github).to receive(:installation_client).and_return(client)
|
|
allow(github_runner).to receive_messages(vm: vm, installation: instance_double(GithubInstallation, installation_id: 123))
|
|
allow(vm).to receive_messages(sshable: sshable, vm_host: instance_double(VmHost, ubid: "vhfdmbbtdz3j3h8hccf8s9wz94", data_center: "FSN1-DC1"))
|
|
end
|
|
|
|
describe ".assemble" do
|
|
it "creates github runner and vm with sshable" do
|
|
project = Project.create_with_id(name: "default")
|
|
installation = GithubInstallation.create_with_id(installation_id: 123, project_id: project.id, name: "test-user", type: "User")
|
|
|
|
st = described_class.assemble(installation, repository_name: "test-repo", label: "ubicloud")
|
|
|
|
runner = GithubRunner[st.id]
|
|
expect(runner).not_to be_nil
|
|
expect(runner.repository_name).to eq("test-repo")
|
|
expect(runner.label).to eq("ubicloud")
|
|
end
|
|
|
|
it "creates github runner with custom size" do
|
|
project = Project.create_with_id(name: "default")
|
|
installation = GithubInstallation.create_with_id(installation_id: 123, project_id: project.id, name: "test-user", type: "User")
|
|
st = described_class.assemble(installation, repository_name: "test-repo", label: "ubicloud-standard-8")
|
|
|
|
runner = GithubRunner[st.id]
|
|
expect(runner).not_to be_nil
|
|
expect(runner.repository_name).to eq("test-repo")
|
|
expect(runner.label).to eq("ubicloud-standard-8")
|
|
end
|
|
|
|
it "fails if label is not valid" do
|
|
expect {
|
|
described_class.assemble(instance_double(GithubInstallation), repository_name: "test-repo", label: "ubicloud-standard-1")
|
|
}.to raise_error RuntimeError, "Invalid GitHub runner label: ubicloud-standard-1"
|
|
end
|
|
end
|
|
|
|
describe ".pick_vm" do
|
|
let(:project) { Project.create_with_id(name: "default") }
|
|
|
|
before do
|
|
runner_project = Project.create_with_id(name: "default")
|
|
allow(Config).to receive(:github_runner_service_project_id).and_return(runner_project.id)
|
|
end
|
|
|
|
it "provisions a VM if the pool is not existing" do
|
|
expect(VmPool).to receive(:where).and_return([])
|
|
expect(Prog::Vnet::SubnetNexus).to receive(:assemble).and_call_original
|
|
expect(Prog::Vm::Nexus).to receive(:assemble).and_call_original
|
|
expect(FirewallRule).to receive(:create_with_id).and_call_original.at_least(:once)
|
|
vm = nx.pick_vm
|
|
expect(vm).not_to be_nil
|
|
expect(vm.sshable.unix_user).to eq("runneradmin")
|
|
expect(vm.family).to eq("standard")
|
|
expect(vm.vcpus).to eq(4)
|
|
expect(vm.project_id).to eq(Config.github_runner_service_project_id)
|
|
end
|
|
|
|
it "provisions a new vm if pool is valid but there is no vm" do
|
|
git_runner_pool = VmPool.create_with_id(size: 2, vm_size: "standard-4", boot_image: "github-ubuntu-2204", location: "github-runners", storage_size_gib: 150, arch: "x64")
|
|
expect(VmPool).to receive(:where).with(
|
|
vm_size: "standard-4", boot_image: "github-ubuntu-2204", location: "github-runners",
|
|
storage_size_gib: 150, storage_encrypted: true,
|
|
storage_skip_sync: true, arch: "x64"
|
|
).and_return([git_runner_pool])
|
|
expect(git_runner_pool).to receive(:pick_vm).and_return(nil)
|
|
expect(Prog::Vm::Nexus).to receive(:assemble).and_call_original
|
|
expect(FirewallRule).to receive(:create_with_id).and_call_original.at_least(:once)
|
|
vm = nx.pick_vm
|
|
expect(vm).not_to be_nil
|
|
expect(vm.sshable.unix_user).to eq("runneradmin")
|
|
expect(vm.family).to eq("standard")
|
|
expect(vm.vcpus).to eq(4)
|
|
end
|
|
|
|
it "uses the existing vm if pool can pick one" do
|
|
git_runner_pool = VmPool.create_with_id(size: 2, vm_size: "standard-4", boot_image: "github-ubuntu-2204", location: "github-runners", storage_size_gib: 150, arch: "arm64")
|
|
expect(VmPool).to receive(:where).with(
|
|
vm_size: "standard-4", boot_image: "github-ubuntu-2204", location: "github-runners",
|
|
storage_size_gib: 150, storage_encrypted: true,
|
|
storage_skip_sync: true, arch: "arm64"
|
|
).and_return([git_runner_pool])
|
|
expect(git_runner_pool).to receive(:pick_vm).and_return(vm)
|
|
expect(github_runner).to receive(:label).and_return("ubicloud-standard-4-arm").at_least(:once)
|
|
vm = nx.pick_vm
|
|
expect(vm).not_to be_nil
|
|
expect(vm.name).to eq("dummy-vm")
|
|
end
|
|
end
|
|
|
|
describe ".update_billing_record" do
|
|
let(:project) { Project.create_with_id(name: "default") }
|
|
|
|
before do
|
|
allow(github_runner).to receive(:installation).and_return(instance_double(GithubInstallation, project: project)).at_least(:once)
|
|
allow(github_runner).to receive(:workflow_job).and_return({"id" => 123})
|
|
end
|
|
|
|
it "not updates billing record if the runner is destroyed before it's ready" do
|
|
expect(github_runner).to receive(:ready_at).and_return(nil)
|
|
|
|
expect(nx.update_billing_record).to be_nil
|
|
expect(BillingRecord.count).to eq(0)
|
|
end
|
|
|
|
it "not updates billing record if the runner does not pick a job" do
|
|
expect(github_runner).to receive(:ready_at).and_return(Time.now)
|
|
expect(github_runner).to receive(:workflow_job).and_return(nil)
|
|
|
|
expect(nx.update_billing_record).to be_nil
|
|
expect(BillingRecord.count).to eq(0)
|
|
end
|
|
|
|
it "creates new billing record when no daily record" do
|
|
time = Time.now
|
|
expect(Time).to receive(:now).and_return(time).at_least(:once)
|
|
expect(github_runner).to receive(:ready_at).and_return(time - 5 * 60).at_least(:once)
|
|
expect(BillingRecord).to receive(:create_with_id).and_call_original
|
|
nx.update_billing_record
|
|
|
|
br = BillingRecord[resource_id: project.id]
|
|
expect(br.amount).to eq(5)
|
|
expect(br.duration(time, time)).to eq(1)
|
|
end
|
|
|
|
it "uses separate billing rate for arm64 runners" do
|
|
time = Time.now
|
|
expect(Time).to receive(:now).and_return(time).at_least(:once)
|
|
expect(github_runner).to receive(:label).and_return("ubicloud-arm").at_least(:once)
|
|
expect(github_runner).to receive(:ready_at).and_return(time - 5 * 60).at_least(:once)
|
|
expect(BillingRecord).to receive(:create_with_id).and_call_original
|
|
nx.update_billing_record
|
|
|
|
br = BillingRecord[resource_id: project.id]
|
|
expect(br.amount).to eq(5)
|
|
expect(br.duration(time, time)).to eq(1)
|
|
expect(br.billing_rate["resource_family"]).to eq("standard-2-arm")
|
|
end
|
|
|
|
it "uses separate billing rate for gpu runners" do
|
|
time = Time.now
|
|
expect(Time).to receive(:now).and_return(time).at_least(:once)
|
|
expect(github_runner).to receive(:label).and_return("ubicloud-gpu").at_least(:once)
|
|
expect(github_runner).to receive(:ready_at).and_return(time - 5 * 60).at_least(:once)
|
|
expect(BillingRecord).to receive(:create_with_id).and_call_original
|
|
nx.update_billing_record
|
|
|
|
br = BillingRecord[resource_id: project.id]
|
|
expect(br.amount).to eq(5)
|
|
expect(br.duration(time, time)).to eq(1)
|
|
expect(br.billing_rate["resource_family"]).to eq("standard-gpu-6")
|
|
end
|
|
|
|
it "updates the amount of existing billing record" do
|
|
time = Time.now
|
|
expect(Time).to receive(:now).and_return(time).at_least(:once)
|
|
expect(github_runner).to receive(:ready_at).and_return(time - 5 * 60).at_least(:once)
|
|
expect(BillingRecord).to receive(:create_with_id).and_call_original
|
|
# Create a record
|
|
nx.update_billing_record
|
|
|
|
expect { nx.update_billing_record }
|
|
.to change { BillingRecord[resource_id: project.id].amount }.from(5).to(10)
|
|
end
|
|
|
|
it "create a new record for a new day" do
|
|
today = Time.now
|
|
tomorrow = today + 24 * 60 * 60
|
|
expect(Time).to receive(:now).and_return(today).exactly(5)
|
|
expect(github_runner).to receive(:ready_at).and_return(today - 5 * 60).twice
|
|
expect(BillingRecord).to receive(:create_with_id).and_call_original
|
|
# Create today record
|
|
nx.update_billing_record
|
|
|
|
expect(Time).to receive(:now).and_return(tomorrow).at_least(:once)
|
|
expect(github_runner).to receive(:ready_at).and_return(tomorrow - 5 * 60).at_least(:once)
|
|
expect(BillingRecord).to receive(:create_with_id).and_call_original
|
|
# Create tomorrow record
|
|
expect { nx.update_billing_record }
|
|
.to change { BillingRecord.where(resource_id: project.id).count }.from(1).to(2)
|
|
|
|
expect(BillingRecord.where(resource_id: project.id).map(&:amount)).to eq([5, 5])
|
|
end
|
|
|
|
it "tries 3 times and creates single billing record" do
|
|
time = Time.now
|
|
expect(Time).to receive(:now).and_return(time).at_least(:once)
|
|
expect(github_runner).to receive(:ready_at).and_return(time - 5 * 60).at_least(:once)
|
|
expect(BillingRecord).to receive(:create_with_id).and_raise(Sequel::Postgres::ExclusionConstraintViolation).exactly(3)
|
|
expect(BillingRecord).to receive(:create_with_id).and_call_original
|
|
|
|
expect {
|
|
3.times { nx.update_billing_record }
|
|
}.to change { BillingRecord.where(resource_id: project.id).count }.from(0).to(1)
|
|
end
|
|
|
|
it "tries 4 times and fails" do
|
|
time = Time.now
|
|
expect(Time).to receive(:now).and_return(time).at_least(:once)
|
|
expect(github_runner).to receive(:ready_at).and_return(time - 5 * 60).at_least(:once)
|
|
expect(BillingRecord).to receive(:create_with_id).and_raise(Sequel::Postgres::ExclusionConstraintViolation).at_least(:once)
|
|
|
|
expect {
|
|
4.times { nx.update_billing_record }
|
|
}.to raise_error(Sequel::Postgres::ExclusionConstraintViolation)
|
|
end
|
|
end
|
|
|
|
describe "#before_run" do
|
|
it "hops to destroy when needed" do
|
|
expect(nx).to receive(:when_destroy_set?).and_yield
|
|
expect(nx).to receive(:register_deadline)
|
|
expect(nx).to receive(:update_billing_record)
|
|
expect { nx.before_run }.to hop("destroy")
|
|
end
|
|
|
|
it "does not hop to destroy if already in the destroy state" do
|
|
expect(nx).to receive(:when_destroy_set?).and_yield
|
|
expect(nx.strand).to receive(:label).and_return("destroy")
|
|
expect { nx.before_run }.not_to hop("destroy")
|
|
end
|
|
|
|
it "does not hop to destroy if already in the wait_vm_destroy state" do
|
|
expect(nx).to receive(:when_destroy_set?).and_yield
|
|
expect(nx.strand).to receive(:label).and_return("wait_vm_destroy")
|
|
expect { nx.before_run }.not_to hop("destroy")
|
|
end
|
|
end
|
|
|
|
describe "#start" do
|
|
it "hops to wait_concurrency_limit if there is no capacity" do
|
|
dataset = instance_double(Sequel::Dataset, for_update: instance_double(Sequel::Dataset, all: []))
|
|
|
|
installation = instance_double(GithubInstallation)
|
|
project = instance_double(Project, quota_available?: false, github_installations: [installation], active?: true)
|
|
|
|
expect(github_runner).to receive(:installation).and_return(installation).at_least(:once)
|
|
expect(github_runner.installation).to receive(:project_dataset).and_return(dataset)
|
|
expect(github_runner.installation).to receive(:project).and_return(project).at_least(:once)
|
|
|
|
expect { nx.start }.to hop("wait_concurrency_limit")
|
|
end
|
|
|
|
it "hops to allocate_vm if there is capacity" do
|
|
dataset = instance_double(Sequel::Dataset, for_update: instance_double(Sequel::Dataset, all: []))
|
|
|
|
installation = instance_double(GithubInstallation)
|
|
project = instance_double(Project, quota_available?: true, github_installations: [installation], active?: true)
|
|
|
|
expect(github_runner).to receive(:installation).and_return(installation).at_least(:once)
|
|
expect(github_runner.installation).to receive(:project_dataset).and_return(dataset)
|
|
expect(github_runner.installation).to receive(:project).and_return(project).at_least(:once)
|
|
|
|
expect { nx.start }.to hop("allocate_vm")
|
|
end
|
|
|
|
it "pops if the project is not active" do
|
|
installation = instance_double(GithubInstallation, project: instance_double(Project, active?: false))
|
|
expect(github_runner).to receive(:installation).and_return(installation).at_least(:once)
|
|
|
|
expect { nx.start }.to exit({"msg" => "Could not provision a runner for inactive project"})
|
|
end
|
|
end
|
|
|
|
describe "#wait_concurrency_limit" do
|
|
before do
|
|
[["hetzner-fsn1", "x64"], ["github-runners", "x64"], ["github-runners", "arm64"]].each_with_index do |(location, arch), i|
|
|
create_vm_host(location:, arch:, total_cores: 16, used_cores: 16)
|
|
end
|
|
end
|
|
|
|
it "waits until customer concurrency limit frees up" do
|
|
dataset = instance_double(Sequel::Dataset, for_update: instance_double(Sequel::Dataset, all: []))
|
|
|
|
installation = instance_double(GithubInstallation)
|
|
project = instance_double(Project, quota_available?: false, github_installations: [installation])
|
|
|
|
expect(github_runner).to receive(:installation).and_return(installation).at_least(:once)
|
|
expect(github_runner.installation).to receive(:project_dataset).and_return(dataset)
|
|
expect(github_runner.installation).to receive(:project).and_return(project).at_least(:once)
|
|
|
|
expect { nx.wait_concurrency_limit }.to nap
|
|
end
|
|
|
|
it "hops to allocate_vm when customer concurrency limit frees up" do
|
|
dataset = instance_double(Sequel::Dataset, for_update: instance_double(Sequel::Dataset, all: []))
|
|
|
|
installation = instance_double(GithubInstallation)
|
|
project = instance_double(Project, quota_available?: true, github_installations: [installation])
|
|
|
|
expect(github_runner).to receive(:installation).and_return(installation).at_least(:once)
|
|
expect(github_runner.installation).to receive(:project_dataset).and_return(dataset)
|
|
expect(github_runner.installation).to receive(:project).and_return(project).at_least(:once)
|
|
|
|
expect { nx.wait_concurrency_limit }.to hop("allocate_vm")
|
|
end
|
|
|
|
it "hops to allocate_vm when customer concurrency limit is full but the overall utilization is low" do
|
|
dataset = instance_double(Sequel::Dataset, for_update: instance_double(Sequel::Dataset, all: []))
|
|
|
|
installation = instance_double(GithubInstallation)
|
|
project = instance_double(Project, quota_available?: false, github_installations: [installation])
|
|
|
|
expect(github_runner).to receive(:installation).and_return(installation).at_least(:once)
|
|
expect(github_runner.installation).to receive(:project_dataset).and_return(dataset)
|
|
expect(github_runner.installation).to receive(:project).and_return(project).at_least(:once)
|
|
VmHost[arch: "x64"].update(used_cores: 4)
|
|
expect { nx.wait_concurrency_limit }.to hop("allocate_vm")
|
|
end
|
|
end
|
|
|
|
describe "#allocate_vm" do
|
|
it "picks vm and hops" do
|
|
expect(nx).to receive(:pick_vm).and_return(vm)
|
|
expect(github_runner).to receive(:update).with(vm_id: vm.id)
|
|
expect(vm).to receive(:update).with(name: github_runner.ubid)
|
|
expect(github_runner).to receive(:reload).and_return(github_runner)
|
|
expect(Clog).to receive(:emit).with("runner_allocated").and_call_original
|
|
expect { nx.allocate_vm }.to hop("wait_vm")
|
|
end
|
|
end
|
|
|
|
describe "#wait_vm" do
|
|
it "naps 13 seconds if vm is not allocated yet" do
|
|
expect(vm).to receive(:allocated_at).and_return(nil)
|
|
expect { nx.wait_vm }.to nap(13)
|
|
end
|
|
|
|
it "naps a second if vm is allocated but not provisioned yet" do
|
|
expect(vm).to receive(:allocated_at).and_return(Time.now)
|
|
expect { nx.wait_vm }.to nap(1)
|
|
end
|
|
|
|
it "hops if vm is ready" do
|
|
expect(vm).to receive_messages(allocated_at: Time.now, provisioned_at: Time.now)
|
|
expect { nx.wait_vm }.to hop("setup_environment")
|
|
end
|
|
end
|
|
|
|
describe ".setup_info" do
|
|
it "returns setup info with vm pool ubid" do
|
|
expect(vm).to receive(:pool_id).and_return("ccd51c1e-2c78-8f76-b182-467e6cdc51f0").at_least(:once)
|
|
expect(vm).to receive(:vm_host).and_return(instance_double(VmHost, ubid: "vhfdmbbtdz3j3h8hccf8s9wz94", location: "hetzner-fsn1", data_center: "FSN1-DC8")).at_least(:once)
|
|
expect(github_runner.installation).to receive(:project).and_return(instance_double(Project, ubid: "pjwnadpt27b21p81d7334f11rx", path: "/project/pjwnadpt27b21p81d7334f11rx")).at_least(:once)
|
|
|
|
expect(nx.setup_info[:detail]).to eq("Name: #{github_runner.ubid}\nLabel: ubicloud-standard-4\nArch: \nImage: \nVM Host: vhfdmbbtdz3j3h8hccf8s9wz94\nVM Pool: vpskahr7hcf26p614czkcvh8z1\nLocation: hetzner-fsn1\nDatacenter: FSN1-DC8\nProject: pjwnadpt27b21p81d7334f11rx\nConsole URL: http://localhost:9292/project/pjwnadpt27b21p81d7334f11rx/github")
|
|
end
|
|
end
|
|
|
|
describe "#setup_environment" do
|
|
it "hops to register_runner" do
|
|
expect(Config).to receive(:docker_mirror_server_vm_id).and_return(vm.id).at_least(:once)
|
|
expect(Vm).to receive(:[]).with(vm.id).and_return(vm).at_least(:once)
|
|
expect(vm).to receive(:vm_host).and_return(instance_double(VmHost, ubid: "vhfdmbbtdz3j3h8hccf8s9wz94", location: "hetzner-fsn1", data_center: "FSN1-DC8", id: "788525ed-d6f0-4937-a844-323d4fd91946")).at_least(:once)
|
|
expect(vm).to receive(:runtime_token).and_return("my_token")
|
|
expect(vm).to receive(:load_balancer).and_return(instance_double(LoadBalancer, hostname: "test.lb.ubicloud.com"))
|
|
expect(github_runner.installation).to receive(:project).and_return(instance_double(Project, ubid: "pjwnadpt27b21p81d7334f11rx", path: "/project/pjwnadpt27b21p81d7334f11rx")).at_least(:once)
|
|
expect(github_runner.installation).to receive(:cache_enabled).and_return(false)
|
|
expect(sshable).to receive(:cmd).with(<<~COMMAND)
|
|
set -ueo pipefail
|
|
echo "image version: $ImageVersion"
|
|
sudo usermod -a -G sudo,adm runneradmin
|
|
jq '. += [{"group":"Ubicloud Managed Runner","detail":"Name: #{github_runner.ubid}\\nLabel: ubicloud-standard-4\\nArch: \\nImage: \\nVM Host: vhfdmbbtdz3j3h8hccf8s9wz94\\nVM Pool: \\nLocation: hetzner-fsn1\\nDatacenter: FSN1-DC8\\nProject: pjwnadpt27b21p81d7334f11rx\\nConsole URL: http://localhost:9292/project/pjwnadpt27b21p81d7334f11rx/github"}]' /imagegeneration/imagedata.json | sudo -u runner tee /home/runner/actions-runner/.setup_info
|
|
echo "UBICLOUD_RUNTIME_TOKEN=my_token
|
|
UBICLOUD_CACHE_URL=http://localhost:9292/runtime/github/" | sudo tee -a /etc/environment
|
|
if [ -f /etc/docker/daemon.json ] && [ -s /etc/docker/daemon.json ]; then
|
|
sudo jq '. + {"registry-mirrors": ["https://test.lb.ubicloud.com:5000"]}' /etc/docker/daemon.json | sudo tee /etc/docker/daemon.json.tmp
|
|
sudo mv /etc/docker/daemon.json.tmp /etc/docker/daemon.json
|
|
else
|
|
echo '{"registry-mirrors": ["https://test.lb.ubicloud.com:5000"]}' | sudo tee /etc/docker/daemon.json
|
|
fi
|
|
sudo mkdir -p /etc/buildkit
|
|
echo '
|
|
[registry."docker.io"]
|
|
mirrors = ["test.lb.ubicloud.com:5000"]
|
|
[registry."test.lb.ubicloud.com:5000"]
|
|
http = false
|
|
insecure = false' | sudo tee -a /etc/buildkit/buildkitd.toml
|
|
sudo systemctl daemon-reload
|
|
sudo systemctl restart docker
|
|
COMMAND
|
|
|
|
expect { nx.setup_environment }.to hop("register_runner")
|
|
end
|
|
|
|
it "hops to register_runner without setting up registry mirror" do
|
|
expect(Config).to receive(:docker_mirror_server_vm_id).and_return(nil)
|
|
expect(vm).to receive(:vm_host).and_return(instance_double(VmHost, ubid: "vhfdmbbtdz3j3h8hccf8s9wz94", location: "hetzner-fsn1", data_center: "FSN1-DC8", id: "788525ed-d6f0-4937-a844-323d4fd91946")).at_least(:once)
|
|
expect(vm).to receive(:runtime_token).and_return("my_token")
|
|
expect(github_runner.installation).to receive(:project).and_return(instance_double(Project, ubid: "pjwnadpt27b21p81d7334f11rx", path: "/project/pjwnadpt27b21p81d7334f11rx")).at_least(:once)
|
|
expect(github_runner.installation).to receive(:cache_enabled).and_return(false)
|
|
expect(sshable).to receive(:cmd).with(<<~COMMAND)
|
|
set -ueo pipefail
|
|
echo "image version: $ImageVersion"
|
|
sudo usermod -a -G sudo,adm runneradmin
|
|
jq '. += [{"group":"Ubicloud Managed Runner","detail":"Name: #{github_runner.ubid}\\nLabel: ubicloud-standard-4\\nArch: \\nImage: \\nVM Host: vhfdmbbtdz3j3h8hccf8s9wz94\\nVM Pool: \\nLocation: hetzner-fsn1\\nDatacenter: FSN1-DC8\\nProject: pjwnadpt27b21p81d7334f11rx\\nConsole URL: http://localhost:9292/project/pjwnadpt27b21p81d7334f11rx/github"}]' /imagegeneration/imagedata.json | sudo -u runner tee /home/runner/actions-runner/.setup_info
|
|
echo "UBICLOUD_RUNTIME_TOKEN=my_token
|
|
UBICLOUD_CACHE_URL=http://localhost:9292/runtime/github/" | sudo tee -a /etc/environment
|
|
COMMAND
|
|
|
|
expect { nx.setup_environment }.to hop("register_runner")
|
|
end
|
|
|
|
it "hops to register_runner with after enabling transparent cache" do
|
|
expect(vm).to receive(:vm_host).and_return(instance_double(VmHost, ubid: "vhfdmbbtdz3j3h8hccf8s9wz94", location: "hetzner-fsn1", data_center: "FSN1-DC8", id: "788525ed-d6f0-4937-a844-323d4fd91946")).at_least(:once)
|
|
expect(vm).to receive(:runtime_token).and_return("my_token")
|
|
expect(github_runner.installation).to receive(:project).and_return(instance_double(Project, ubid: "pjwnadpt27b21p81d7334f11rx", path: "/project/pjwnadpt27b21p81d7334f11rx")).at_least(:once)
|
|
expect(github_runner.installation).to receive(:cache_enabled).and_return(true)
|
|
expect(vm).to receive(:nics).and_return([instance_double(Nic, private_ipv4: NetAddr::IPv4Net.parse("10.0.0.1/32"))]).at_least(:once)
|
|
expect(sshable).to receive(:cmd).with(<<~COMMAND)
|
|
set -ueo pipefail
|
|
echo "image version: $ImageVersion"
|
|
sudo usermod -a -G sudo,adm runneradmin
|
|
jq '. += [{"group":"Ubicloud Managed Runner","detail":"Name: #{github_runner.ubid}\\nLabel: ubicloud-standard-4\\nArch: \\nImage: \\nVM Host: vhfdmbbtdz3j3h8hccf8s9wz94\\nVM Pool: \\nLocation: hetzner-fsn1\\nDatacenter: FSN1-DC8\\nProject: pjwnadpt27b21p81d7334f11rx\\nConsole URL: http://localhost:9292/project/pjwnadpt27b21p81d7334f11rx/github"}]' /imagegeneration/imagedata.json | sudo -u runner tee /home/runner/actions-runner/.setup_info
|
|
echo "UBICLOUD_RUNTIME_TOKEN=my_token
|
|
UBICLOUD_CACHE_URL=http://localhost:9292/runtime/github/" | sudo tee -a /etc/environment
|
|
echo "CUSTOM_ACTIONS_CACHE_URL=http://10.0.0.1:51123/random_token/" | sudo tee -a /etc/environment
|
|
COMMAND
|
|
|
|
expect { nx.setup_environment }.to hop("register_runner")
|
|
end
|
|
end
|
|
|
|
describe "#register_runner" do
|
|
it "registers runner hops" do
|
|
expect(client).to receive(:post).with(/.*generate-jitconfig/, hash_including(name: github_runner.ubid.to_s, labels: [github_runner.label])).and_return({runner: {id: 123}, encoded_jit_config: "AABBCC"})
|
|
expect(sshable).to receive(:cmd).with("sudo -- xargs -I{} -- systemd-run --uid runner --gid runner --working-directory '/home/runner' --unit runner-script --remain-after-exit -- /home/runner/actions-runner/run-withenv.sh {}",
|
|
stdin: "AABBCC")
|
|
expect(github_runner).to receive(:update).with(runner_id: 123, ready_at: anything)
|
|
|
|
expect { nx.register_runner }.to hop("wait")
|
|
end
|
|
|
|
it "deletes the runner if the generate request fails due to 'already exists with the same name' error and the runner script does not start yet." do
|
|
expect(client).to receive(:post)
|
|
.with(/.*generate-jitconfig/, hash_including(name: github_runner.ubid.to_s, labels: [github_runner.label]))
|
|
.and_raise(Octokit::Conflict.new({body: "409 - Already exists - A runner with the name *** already exists."}))
|
|
expect(client).to receive(:paginate)
|
|
.and_yield({runners: [{name: github_runner.ubid.to_s, id: 123}]}, instance_double(Sawyer::Response, data: {runners: []}))
|
|
.and_return({runners: [{name: github_runner.ubid.to_s, id: 123}]})
|
|
expect(sshable).to receive(:cmd).with("systemctl show -p SubState --value runner-script").and_return("dead")
|
|
expect(client).to receive(:delete).with("/repos/#{github_runner.repository_name}/actions/runners/123")
|
|
expect(Clog).to receive(:emit).with("Deregistering runner because it already exists").and_call_original
|
|
expect { nx.register_runner }.to nap(5)
|
|
end
|
|
|
|
it "hops to wait if the generate request fails due to 'already exists with the same name' error and the runner script is running" do
|
|
expect(client).to receive(:post)
|
|
.with(/.*generate-jitconfig/, hash_including(name: github_runner.ubid.to_s, labels: [github_runner.label]))
|
|
.and_raise(Octokit::Conflict.new({body: "409 - Already exists - A runner with the name *** already exists."}))
|
|
expect(client).to receive(:paginate)
|
|
.and_yield({runners: [{name: github_runner.ubid.to_s, id: 123}]}, instance_double(Sawyer::Response, data: {runners: []}))
|
|
.and_return({runners: [{name: github_runner.ubid.to_s, id: 123}]})
|
|
expect(sshable).to receive(:cmd).with("systemctl show -p SubState --value runner-script").and_return("running")
|
|
expect(github_runner).to receive(:update).with(runner_id: 123, ready_at: anything)
|
|
expect { nx.register_runner }.to hop("wait")
|
|
end
|
|
|
|
it "fails if the generate request fails due to 'already exists with the same name' error but couldn't find the runner" do
|
|
expect(client).to receive(:post)
|
|
.with(/.*generate-jitconfig/, hash_including(name: github_runner.ubid.to_s, labels: [github_runner.label]))
|
|
.and_raise(Octokit::Conflict.new({body: "409 - Already exists - A runner with the name *** already exists."}))
|
|
expect(client).to receive(:paginate).and_return({runners: []})
|
|
expect(client).not_to receive(:delete)
|
|
expect { nx.register_runner }.to raise_error RuntimeError, "BUG: Failed with runner already exists error but couldn't find it"
|
|
end
|
|
|
|
it "fails if the generate request fails due to 'Octokit::Conflict' but it's not already exists error" do
|
|
expect(client).to receive(:post)
|
|
.with(/.*generate-jitconfig/, hash_including(name: github_runner.ubid.to_s, labels: [github_runner.label]))
|
|
.and_raise(Octokit::Conflict.new({body: "409 - Another issue"}))
|
|
expect { nx.register_runner }.to raise_error Octokit::Conflict
|
|
end
|
|
end
|
|
|
|
describe "#wait" do
|
|
it "does not destroy runner if it does not pick a job in five minutes, and busy" do
|
|
expect(Time).to receive(:now).and_return(github_runner.ready_at + 6 * 60)
|
|
expect(client).to receive(:get).and_return({busy: true})
|
|
expect(sshable).to receive(:cmd).with("systemctl show -p SubState --value runner-script").and_return("running")
|
|
expect(github_runner).not_to receive(:incr_destroy)
|
|
|
|
expect { nx.wait }.to nap(60)
|
|
end
|
|
|
|
it "destroys runner if it does not pick a job in five minutes and not busy" do
|
|
expect(github_runner).to receive(:workflow_job).and_return(nil)
|
|
expect(Time).to receive(:now).and_return(github_runner.ready_at + 6 * 60)
|
|
expect(client).to receive(:get).and_return({busy: false})
|
|
expect(sshable).to receive(:cmd).with("systemctl show -p SubState --value runner-script").and_return("running")
|
|
expect(github_runner).to receive(:incr_destroy)
|
|
expect(Clog).to receive(:emit).with("The runner does not pick a job").and_call_original
|
|
|
|
expect { nx.wait }.to nap(0)
|
|
end
|
|
|
|
it "does not destroy runner if it doesn not pick a job but two minutes not pass yet" do
|
|
expect(github_runner).to receive(:workflow_job).and_return(nil)
|
|
expect(Time).to receive(:now).and_return(github_runner.ready_at + 1 * 60)
|
|
expect(sshable).to receive(:cmd).with("systemctl show -p SubState --value runner-script").and_return("running")
|
|
expect(github_runner).not_to receive(:incr_destroy)
|
|
|
|
expect { nx.wait }.to nap(60)
|
|
end
|
|
|
|
it "destroys the runner if the runner-script is succeeded" do
|
|
expect(sshable).to receive(:cmd).with("systemctl show -p SubState --value runner-script").and_return("exited")
|
|
expect(github_runner).to receive(:incr_destroy)
|
|
|
|
expect { nx.wait }.to nap(15)
|
|
end
|
|
|
|
it "provisions a spare runner and destroys the current one if the runner-script is failed" do
|
|
expect(sshable).to receive(:cmd).with("systemctl show -p SubState --value runner-script").and_return("failed")
|
|
expect(github_runner).to receive(:provision_spare_runner)
|
|
expect(github_runner).to receive(:incr_destroy)
|
|
expect { nx.wait }.to nap(0)
|
|
end
|
|
|
|
it "naps if the runner-script is running" do
|
|
expect(github_runner).to receive(:workflow_job).and_return({"id" => 123})
|
|
expect(sshable).to receive(:cmd).with("systemctl show -p SubState --value runner-script").and_return("running")
|
|
|
|
expect { nx.wait }.to nap(60)
|
|
end
|
|
end
|
|
|
|
describe "#destroy" do
|
|
it "naps if runner not deregistered yet" do
|
|
expect(client).to receive(:get).and_return(busy: false)
|
|
expect(client).to receive(:delete)
|
|
|
|
expect { nx.destroy }.to nap(5)
|
|
end
|
|
|
|
it "naps if runner still running a job" do
|
|
expect(client).to receive(:get).and_return(busy: true)
|
|
|
|
expect { nx.destroy }.to nap(15)
|
|
end
|
|
|
|
it "destroys resources and hops if runner deregistered" do
|
|
expect(nx).to receive(:decr_destroy)
|
|
expect(client).to receive(:get).and_raise(Octokit::NotFound)
|
|
expect(client).not_to receive(:delete)
|
|
|
|
expect(github_runner).to receive(:workflow_job).and_return({"conclusion" => "failure"}).at_least(:once)
|
|
vm_host = instance_double(VmHost, sshable: sshable)
|
|
fws = [instance_double(Firewall)]
|
|
ps = instance_double(PrivateSubnet, firewalls: fws)
|
|
expect(fws.first).to receive(:destroy)
|
|
expect(ps).to receive(:incr_destroy)
|
|
expect(vm).to receive(:private_subnets).and_return([ps])
|
|
expect(vm).to receive(:vm_host).and_return(vm_host).at_least(:once)
|
|
expect(sshable).to receive(:cmd).with("sudo ln /vm/9qf22jbv/serial.log /var/log/ubicloud/serials/#{github_runner.ubid}_serial.log")
|
|
expect(sshable).to receive(:cmd).with("journalctl -u runner-script -t 'run-withenv.sh' -t 'systemd' --no-pager | grep -Fv Started")
|
|
expect(vm).to receive(:incr_destroy)
|
|
|
|
expect { nx.destroy }.to hop("wait_vm_destroy")
|
|
end
|
|
|
|
it "skip deregistration and destroy vm immediately" do
|
|
expect(nx).to receive(:decr_destroy)
|
|
expect(github_runner).to receive(:skip_deregistration_set?).and_return(true)
|
|
expect(github_runner).to receive(:workflow_job).and_return({"conclusion" => "success"}).at_least(:once)
|
|
expect(vm).to receive(:incr_destroy)
|
|
|
|
expect { nx.destroy }.to hop("wait_vm_destroy")
|
|
end
|
|
|
|
it "destroys resources and hops if runner deregistered, also, copies serial log if workflow_job is nil" do
|
|
expect(nx).to receive(:decr_destroy)
|
|
expect(client).to receive(:get).and_raise(Octokit::NotFound)
|
|
expect(client).not_to receive(:delete)
|
|
|
|
expect(github_runner).to receive(:workflow_job).and_return(nil)
|
|
vm_host = instance_double(VmHost, sshable: sshable)
|
|
expect(vm).to receive(:vm_host).and_return(vm_host).at_least(:once)
|
|
expect(sshable).to receive(:cmd).with("sudo ln /vm/9qf22jbv/serial.log /var/log/ubicloud/serials/#{github_runner.ubid}_serial.log")
|
|
expect(sshable).to receive(:cmd).with("journalctl -u runner-script -t 'run-withenv.sh' -t 'systemd' --no-pager | grep -Fv Started")
|
|
expect(vm).to receive(:incr_destroy)
|
|
|
|
expect { nx.destroy }.to hop("wait_vm_destroy")
|
|
end
|
|
|
|
it "destroys resources and hops if runner deregistered, also, emits log if it couldn't move the serial.log" do
|
|
expect(nx).to receive(:decr_destroy)
|
|
expect(client).to receive(:get).and_raise(Octokit::NotFound)
|
|
expect(client).not_to receive(:delete)
|
|
|
|
expect(github_runner).to receive(:workflow_job).and_return({"conclusion" => "failure"}).at_least(:once)
|
|
vm_host = instance_double(VmHost, sshable: sshable)
|
|
expect(vm).to receive(:vm_host).and_return(vm_host).at_least(:once)
|
|
expect(sshable).to receive(:cmd).and_raise Sshable::SshError.new("bogus", "", "", nil, nil)
|
|
expect(Clog).to receive(:emit).with("Failed to move serial.log or running journalctl").and_call_original
|
|
expect(vm).to receive(:incr_destroy)
|
|
|
|
expect { nx.destroy }.to hop("wait_vm_destroy")
|
|
end
|
|
|
|
it "simply destroys the VM if the workflow_job is there and the conclusion is success" do
|
|
expect(nx).to receive(:decr_destroy)
|
|
expect(client).to receive(:get).and_raise(Octokit::NotFound)
|
|
expect(client).not_to receive(:delete)
|
|
|
|
expect(github_runner).to receive(:workflow_job).and_return({"conclusion" => "success"}).at_least(:once)
|
|
expect(sshable).not_to receive(:cmd).with("sudo ln /vm/9qf22jbv/serial.log /var/log/ubicloud/serials/#{github_runner.ubid}_serial.log")
|
|
expect(sshable).not_to receive(:cmd).with("journalctl -u runner-script --no-pager | grep -e run-withenv -e systemd | grep -v -e Started")
|
|
expect(vm).to receive(:incr_destroy)
|
|
|
|
expect { nx.destroy }.to hop("wait_vm_destroy")
|
|
end
|
|
|
|
it "does not destroy vm if it's already destroyed" do
|
|
expect(nx).to receive(:decr_destroy)
|
|
expect(client).to receive(:get).and_raise(Octokit::NotFound)
|
|
expect(client).not_to receive(:delete)
|
|
expect(github_runner).to receive(:vm).and_return(nil)
|
|
expect(vm).not_to receive(:incr_destroy)
|
|
|
|
expect { nx.destroy }.to hop("wait_vm_destroy")
|
|
end
|
|
end
|
|
|
|
describe "#wait_vm_destroy" do
|
|
it "naps if vm not destroyed yet" do
|
|
expect { nx.wait_vm_destroy }.to nap(10)
|
|
end
|
|
|
|
it "extends deadline if vm prevents destroy" do
|
|
expect(vm).to receive(:prevent_destroy_set?).and_return(true)
|
|
expect(nx).to receive(:register_deadline).with(nil, 15 * 60, allow_extension: true)
|
|
expect { nx.wait_vm_destroy }.to nap(10)
|
|
end
|
|
|
|
it "pops if vm destroyed" do
|
|
expect(nx).to receive(:vm).and_return(nil).twice
|
|
expect(github_runner).to receive(:destroy)
|
|
|
|
expect { nx.wait_vm_destroy }.to exit({"msg" => "github runner deleted"})
|
|
end
|
|
end
|
|
end
|