`systemd` has its own environment variable expansion. In this case, we never want the input to be interpreted this way, so we escape it. At this time, we support some GitHub Actions images that have a version of systemd too old to pass `--expand-environment=no`, so I read the manual to look for signs that there were any other bytes that needed escaping. Only finding `$` requires doubling, I implement that here.
631 lines
29 KiB
Ruby
631 lines
29 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require_relative "../../model/spec_helper"
|
|
require "netaddr"
|
|
require "octokit"
|
|
|
|
RSpec.describe Prog::Vm::GithubRunner do
|
|
subject(:nx) {
|
|
described_class.new(Strand.new).tap {
|
|
it.instance_variable_set(:@github_runner, runner)
|
|
}
|
|
}
|
|
|
|
let(:runner) do
|
|
customer_project = Project.create(name: "customer")
|
|
runner_project = Project.create(name: "runner-service")
|
|
installation_id = GithubInstallation.create(installation_id: 123, project_id: customer_project.id, name: "ubicloud", type: "Organization").id
|
|
vm_id = create_vm(location_id: Location::GITHUB_RUNNERS_ID, project_id: runner_project.id, boot_image: "github-ubuntu-2204").id
|
|
Sshable.create { it.id = vm_id }
|
|
GithubRunner.create(installation_id:, vm_id:, repository_name: "test-repo", label: "ubicloud-standard-4", created_at: now, allocated_at: now + 10, ready_at: now + 20, workflow_job: {"id" => 123})
|
|
end
|
|
let(:vm) { runner.vm }
|
|
let(:installation) { runner.installation }
|
|
let(:project) { installation.project }
|
|
let(:client) { instance_double(Octokit::Client) }
|
|
let(:now) { Time.utc(2025, 5, 19, 19, 0) }
|
|
|
|
before do
|
|
allow(Config).to receive(:github_runner_service_project_id).and_return(vm.project_id)
|
|
allow(Github).to receive(:installation_client).and_return(client)
|
|
allow(Time).to receive(:now).and_return(now)
|
|
end
|
|
|
|
describe ".assemble" do
|
|
it "creates github runner and vm with sshable" do
|
|
runner = described_class.assemble(installation, repository_name: "test-repo", label: "ubicloud").subject
|
|
|
|
expect(runner).not_to be_nil
|
|
expect(runner.repository_name).to eq("test-repo")
|
|
expect(runner.label).to eq("ubicloud")
|
|
end
|
|
|
|
it "creates github runner with custom size" do
|
|
runner = described_class.assemble(installation, repository_name: "test-repo", label: "ubicloud-standard-8").subject
|
|
|
|
expect(runner).not_to be_nil
|
|
expect(runner.repository_name).to eq("test-repo")
|
|
expect(runner.label).to eq("ubicloud-standard-8")
|
|
end
|
|
|
|
it "fails if label is not valid" do
|
|
expect {
|
|
described_class.assemble(installation, repository_name: "test-repo", label: "ubicloud-standard-1")
|
|
}.to raise_error RuntimeError, "Invalid GitHub runner label: ubicloud-standard-1"
|
|
end
|
|
end
|
|
|
|
describe ".pick_vm" do
|
|
it "provisions a VM if the pool is not existing" do
|
|
vm = nx.pick_vm
|
|
expect(vm.pool_id).to be_nil
|
|
expect(vm.sshable.unix_user).to eq("runneradmin")
|
|
expect(vm.unix_user).to eq("runneradmin")
|
|
expect(vm.family).to eq("standard")
|
|
expect(vm.vcpus).to eq(4)
|
|
expect(vm.project_id).to eq(Config.github_runner_service_project_id)
|
|
end
|
|
|
|
it "provisions a new vm if pool is valid but there is no vm" do
|
|
VmPool.create(size: 2, vm_size: "standard-4", boot_image: "github-ubuntu-2204", location_id: Location::GITHUB_RUNNERS_ID, storage_size_gib: 150, arch: "x64")
|
|
vm = nx.pick_vm
|
|
expect(vm.pool_id).to be_nil
|
|
expect(vm.sshable.unix_user).to eq("runneradmin")
|
|
expect(vm.family).to eq("standard")
|
|
expect(vm.vcpus).to eq(4)
|
|
end
|
|
|
|
it "uses the existing vm if pool can pick one" do
|
|
pool = VmPool.create(size: 2, vm_size: "standard-4", boot_image: "github-ubuntu-2204", location_id: Location::GITHUB_RUNNERS_ID, storage_size_gib: 150, arch: "x64", storage_skip_sync: true)
|
|
vm = create_vm(pool_id: pool.id, display_state: "running")
|
|
picked_vm = nx.pick_vm
|
|
expect(vm.id).to eq(picked_vm.id)
|
|
end
|
|
|
|
it "uses the premium vm pool if the installation prefers premium runners" do
|
|
pool = VmPool.create(size: 2, vm_size: "premium-4", boot_image: "github-ubuntu-2204", location_id: Location::GITHUB_RUNNERS_ID, storage_size_gib: 150, arch: "x64", storage_skip_sync: true)
|
|
vm = create_vm(pool_id: pool.id, display_state: "running", family: "premium")
|
|
expect(installation).to receive(:premium_runner_enabled?).and_return(true)
|
|
picked_vm = nx.pick_vm
|
|
expect(vm.id).to eq(picked_vm.id)
|
|
expect(picked_vm.family).to eq("premium")
|
|
end
|
|
|
|
it "uses the premium vm pool if a free premium upgrade is enabled" do
|
|
pool = VmPool.create(size: 2, vm_size: "premium-4", boot_image: "github-ubuntu-2204", location_id: Location::GITHUB_RUNNERS_ID, storage_size_gib: 150, arch: "x64", storage_skip_sync: true)
|
|
vm = create_vm(pool_id: pool.id, display_state: "running", family: "premium")
|
|
expect(installation).to receive(:premium_runner_enabled?).and_return(false)
|
|
expect(installation).to receive(:free_runner_upgrade?).and_return(true)
|
|
picked_vm = nx.pick_vm
|
|
expect(vm.id).to eq(picked_vm.id)
|
|
expect(picked_vm.family).to eq("premium")
|
|
end
|
|
end
|
|
|
|
describe ".update_billing_record" do
|
|
it "not updates billing record if the runner is destroyed before it's ready" do
|
|
runner.update(ready_at: nil)
|
|
expect(nx.update_billing_record).to be_nil
|
|
expect(BillingRecord.count).to eq(0)
|
|
end
|
|
|
|
it "not updates billing record if the runner does not pick a job" do
|
|
runner.update(ready_at: now, workflow_job: nil)
|
|
expect(nx.update_billing_record).to be_nil
|
|
expect(BillingRecord.count).to eq(0)
|
|
end
|
|
|
|
it "creates new billing record when no daily record" do
|
|
runner.update(ready_at: now - 5 * 60)
|
|
expect(BillingRecord).to receive(:create_with_id).and_call_original
|
|
nx.update_billing_record
|
|
|
|
br = BillingRecord[resource_id: project.id]
|
|
expect(br.amount).to eq(5)
|
|
expect(br.duration(now, now)).to eq(1)
|
|
end
|
|
|
|
it "uses separate billing rate for arm64 runners" do
|
|
runner.update(label: "ubicloud-arm", ready_at: now - 5 * 60)
|
|
expect(BillingRecord).to receive(:create_with_id).and_call_original
|
|
nx.update_billing_record
|
|
|
|
br = BillingRecord[resource_id: project.id]
|
|
expect(br.amount).to eq(5)
|
|
expect(br.duration(now, now)).to eq(1)
|
|
expect(br.billing_rate["resource_family"]).to eq("standard-2-arm")
|
|
expect(runner.billed_vm_size).to eq("standard-2-arm")
|
|
end
|
|
|
|
it "uses separate billing rate for gpu runners" do
|
|
vm.update(family: "standard-gpu", vcpus: 6)
|
|
runner.update(label: "ubicloud-gpu", ready_at: now - 5 * 60)
|
|
|
|
expect(BillingRecord).to receive(:create_with_id).and_call_original
|
|
nx.update_billing_record
|
|
|
|
br = BillingRecord[resource_id: project.id]
|
|
expect(br.amount).to eq(5)
|
|
expect(br.duration(now, now)).to eq(1)
|
|
expect(br.billing_rate["resource_family"]).to eq("standard-gpu-6")
|
|
expect(runner.billed_vm_size).to eq("standard-gpu-6")
|
|
end
|
|
|
|
it "uses the premium billing rate for upgraded runners" do
|
|
vm.update(family: "premium")
|
|
runner.update(label: "ubicloud-standard-2", ready_at: now - 5 * 60)
|
|
|
|
expect(BillingRecord).to receive(:create_with_id).and_call_original
|
|
nx.update_billing_record
|
|
|
|
br = BillingRecord[resource_id: project.id]
|
|
expect(br.amount).to eq(5)
|
|
expect(br.duration(now, now)).to eq(1)
|
|
expect(br.billing_rate["resource_family"]).to eq("premium-2")
|
|
expect(runner.billed_vm_size).to eq("premium-2")
|
|
end
|
|
|
|
it "uses the original billing rate for runners who were upgraded for free" do
|
|
vm.update(family: "premium")
|
|
runner.update(label: "ubicloud-standard-2", ready_at: now - 5 * 60)
|
|
|
|
expect(runner.installation).to receive(:free_runner_upgrade?).and_return(true)
|
|
expect(BillingRecord).to receive(:create_with_id).and_call_original
|
|
nx.update_billing_record
|
|
|
|
br = BillingRecord[resource_id: project.id]
|
|
expect(br.amount).to eq(5)
|
|
expect(br.duration(now, now)).to eq(1)
|
|
expect(br.billing_rate["resource_family"]).to eq("standard-2")
|
|
expect(runner.billed_vm_size).to eq("standard-2")
|
|
end
|
|
|
|
it "updates the amount of existing billing record" do
|
|
runner.update(ready_at: now - 5 * 60)
|
|
|
|
expect(BillingRecord).to receive(:create_with_id).and_call_original
|
|
# Create a record
|
|
nx.update_billing_record
|
|
|
|
expect { nx.update_billing_record }
|
|
.to change { BillingRecord[resource_id: project.id].amount }.from(5).to(10)
|
|
end
|
|
|
|
it "create a new record for a new day" do
|
|
today = Time.now
|
|
tomorrow = today + 24 * 60 * 60
|
|
expect(Time).to receive(:now).and_return(today).exactly(6)
|
|
expect(runner).to receive(:ready_at).and_return(today - 5 * 60).twice
|
|
expect(BillingRecord).to receive(:create_with_id).and_call_original
|
|
# Create today record
|
|
nx.update_billing_record
|
|
|
|
expect(Time).to receive(:now).and_return(tomorrow).at_least(:once)
|
|
expect(runner).to receive(:ready_at).and_return(tomorrow - 5 * 60).at_least(:once)
|
|
expect(BillingRecord).to receive(:create_with_id).and_call_original
|
|
# Create tomorrow record
|
|
expect { nx.update_billing_record }
|
|
.to change { BillingRecord.where(resource_id: project.id).count }.from(1).to(2)
|
|
|
|
expect(BillingRecord.where(resource_id: project.id).map(&:amount)).to eq([5, 5])
|
|
end
|
|
|
|
it "tries 3 times and creates single billing record" do
|
|
runner.update(ready_at: now - 5 * 60)
|
|
expect(BillingRecord).to receive(:create_with_id).and_raise(Sequel::Postgres::ExclusionConstraintViolation).exactly(3)
|
|
expect(BillingRecord).to receive(:create_with_id).and_call_original
|
|
|
|
expect {
|
|
3.times { nx.update_billing_record }
|
|
}.to change { BillingRecord.where(resource_id: project.id).count }.from(0).to(1)
|
|
end
|
|
|
|
it "tries 4 times and fails" do
|
|
runner.update(ready_at: now - 5 * 60)
|
|
expect(BillingRecord).to receive(:create_with_id).and_raise(Sequel::Postgres::ExclusionConstraintViolation).at_least(:once)
|
|
|
|
expect {
|
|
4.times { nx.update_billing_record }
|
|
}.to raise_error(Sequel::Postgres::ExclusionConstraintViolation)
|
|
end
|
|
end
|
|
|
|
describe "#before_run" do
|
|
it "hops to destroy when needed" do
|
|
expect(nx).to receive(:when_destroy_set?).and_yield
|
|
expect(nx).to receive(:register_deadline)
|
|
expect(nx).to receive(:update_billing_record)
|
|
expect { nx.before_run }.to hop("destroy")
|
|
end
|
|
|
|
it "does not hop to destroy if already in the destroy state" do
|
|
expect(nx).to receive(:when_destroy_set?).and_yield
|
|
expect(nx.strand).to receive(:label).and_return("destroy")
|
|
expect { nx.before_run }.not_to hop("destroy")
|
|
end
|
|
|
|
it "does not hop to destroy if already in the wait_vm_destroy state" do
|
|
expect(nx).to receive(:when_destroy_set?).and_yield
|
|
expect(nx.strand).to receive(:label).and_return("wait_vm_destroy")
|
|
expect { nx.before_run }.not_to hop("destroy")
|
|
end
|
|
end
|
|
|
|
describe "#start" do
|
|
it "hops to wait_concurrency_limit if there is no capacity" do
|
|
expect(project).to receive(:quota_available?).with("GithubRunnerVCpu", 0).and_return(false)
|
|
expect(project).to receive(:active?).and_return(true)
|
|
|
|
expect { nx.start }.to hop("wait_concurrency_limit")
|
|
end
|
|
|
|
it "hops to allocate_vm if there is capacity" do
|
|
expect(project).to receive(:quota_available?).with("GithubRunnerVCpu", 0).and_return(true)
|
|
expect(project).to receive(:active?).and_return(true)
|
|
|
|
expect { nx.start }.to hop("allocate_vm")
|
|
end
|
|
|
|
it "pops if the project is not active" do
|
|
expect(project).to receive(:active?).and_return(false)
|
|
|
|
expect { nx.start }.to exit({"msg" => "Could not provision a runner for inactive project"})
|
|
end
|
|
end
|
|
|
|
describe "#wait_concurrency_limit" do
|
|
before do
|
|
[[Location::HETZNER_FSN1_ID, "x64"], [Location::GITHUB_RUNNERS_ID, "x64"], [Location::GITHUB_RUNNERS_ID, "arm64"]].each do |location_id, arch|
|
|
create_vm_host(location_id:, arch:, total_cores: 16, used_cores: 16)
|
|
end
|
|
end
|
|
|
|
it "waits until customer concurrency limit frees up" do
|
|
expect(project).to receive(:quota_available?).with("GithubRunnerVCpu", 0).and_return(false)
|
|
|
|
expect { nx.wait_concurrency_limit }.to nap
|
|
end
|
|
|
|
it "hops to allocate_vm when customer concurrency limit frees up" do
|
|
expect(project).to receive(:quota_available?).with("GithubRunnerVCpu", 0).and_return(true)
|
|
|
|
expect { nx.wait_concurrency_limit }.to hop("allocate_vm")
|
|
end
|
|
|
|
it "hops to allocate_vm when customer concurrency limit is full but the overall utilization is low" do
|
|
expect(project).to receive(:quota_available?).with("GithubRunnerVCpu", 0).and_return(false)
|
|
|
|
VmHost[arch: "x64"].update(used_cores: 1)
|
|
expect { nx.wait_concurrency_limit }.to hop("allocate_vm")
|
|
end
|
|
end
|
|
|
|
describe "#allocate_vm" do
|
|
it "picks vm and hops" do
|
|
picked_vm = create_vm(name: "picked-vm")
|
|
expect(nx).to receive(:pick_vm).and_return(picked_vm)
|
|
expect(Clog).to receive(:emit).with("runner_allocated").and_call_original
|
|
expect { nx.allocate_vm }.to hop("wait_vm")
|
|
expect(runner.vm_id).to eq(picked_vm.id)
|
|
expect(runner.allocated_at).to eq(now)
|
|
expect(picked_vm.name).to eq(runner.ubid)
|
|
end
|
|
end
|
|
|
|
describe "#wait_vm" do
|
|
it "naps 13 seconds if vm is not allocated yet" do
|
|
vm.update(allocated_at: nil)
|
|
expect { nx.wait_vm }.to nap(13)
|
|
end
|
|
|
|
it "naps a second if vm is allocated but not provisioned yet" do
|
|
vm.update(allocated_at: now)
|
|
expect { nx.wait_vm }.to nap(1)
|
|
end
|
|
|
|
it "hops if vm is ready" do
|
|
vm.update(allocated_at: now, provisioned_at: now)
|
|
expect { nx.wait_vm }.to hop("setup_environment")
|
|
end
|
|
end
|
|
|
|
describe ".setup_info" do
|
|
it "returns setup info with vm pool ubid" do
|
|
vm_host = create_vm_host(total_cores: 4, used_cores: 4, data_center: "FSN1-DC8")
|
|
pool = VmPool.create(size: 1, vm_size: "standard-2", location_id: Location::GITHUB_RUNNERS_ID, boot_image: "github-ubuntu-2204", storage_size_gib: 86)
|
|
vm.update(pool_id: pool.id, vm_host_id: vm_host.id)
|
|
|
|
expect(nx.setup_info[:detail]).to eq("Name: #{runner.ubid}\nLabel: ubicloud-standard-4\nVM Family: standard\nArch: x64\nImage: github-ubuntu-2204\nVM Host: #{vm_host.ubid}\nVM Pool: #{pool.ubid}\nLocation: hetzner-fsn1\nDatacenter: FSN1-DC8\nProject: #{project.ubid}\nConsole URL: http://localhost:9292/project/#{project.ubid}/github")
|
|
end
|
|
end
|
|
|
|
describe "#setup_environment" do
|
|
before do
|
|
vm.update(vm_host_id: create_vm_host(data_center: "FSN1-DC8").id)
|
|
end
|
|
|
|
it "hops to register_runner" do
|
|
expect(vm).to receive(:runtime_token).and_return("my_token")
|
|
installation.update(use_docker_mirror: false, cache_enabled: false)
|
|
expect(vm.sshable).to receive(:cmd).with(<<~COMMAND)
|
|
set -ueo pipefail
|
|
echo "image version: $ImageVersion"
|
|
sudo usermod -a -G sudo,adm runneradmin
|
|
jq '. += [{"group":"Ubicloud Managed Runner","detail":"Name: #{runner.ubid}\\nLabel: ubicloud-standard-4\\nVM Family: standard\\nArch: x64\\nImage: github-ubuntu-2204\\nVM Host: #{vm.vm_host.ubid}\\nVM Pool: \\nLocation: hetzner-fsn1\\nDatacenter: FSN1-DC8\\nProject: #{project.ubid}\\nConsole URL: http://localhost:9292/project/#{project.ubid}/github"}]' /imagegeneration/imagedata.json | sudo -u runner tee /home/runner/actions-runner/.setup_info
|
|
echo "UBICLOUD_RUNTIME_TOKEN=my_token
|
|
UBICLOUD_CACHE_URL=http://localhost:9292/runtime/github/" | sudo tee -a /etc/environment
|
|
COMMAND
|
|
|
|
expect { nx.setup_environment }.to hop("register_runner")
|
|
end
|
|
|
|
it "hops to register_runner with after enabling transparent cache" do
|
|
expect(vm).to receive(:runtime_token).and_return("my_token")
|
|
installation.update(use_docker_mirror: false, cache_enabled: true)
|
|
expect(vm).to receive(:nics).and_return([instance_double(Nic, private_ipv4: NetAddr::IPv4Net.parse("10.0.0.1/32"))]).at_least(:once)
|
|
expect(vm.sshable).to receive(:cmd).with(<<~COMMAND)
|
|
set -ueo pipefail
|
|
echo "image version: $ImageVersion"
|
|
sudo usermod -a -G sudo,adm runneradmin
|
|
jq '. += [{"group":"Ubicloud Managed Runner","detail":"Name: #{runner.ubid}\\nLabel: ubicloud-standard-4\\nVM Family: standard\\nArch: x64\\nImage: github-ubuntu-2204\\nVM Host: #{vm.vm_host.ubid}\\nVM Pool: \\nLocation: hetzner-fsn1\\nDatacenter: FSN1-DC8\\nProject: #{project.ubid}\\nConsole URL: http://localhost:9292/project/#{project.ubid}/github"}]' /imagegeneration/imagedata.json | sudo -u runner tee /home/runner/actions-runner/.setup_info
|
|
echo "UBICLOUD_RUNTIME_TOKEN=my_token
|
|
UBICLOUD_CACHE_URL=http://localhost:9292/runtime/github/" | sudo tee -a /etc/environment
|
|
echo "CUSTOM_ACTIONS_CACHE_URL=http://10.0.0.1:51123/random_token/" | sudo tee -a /etc/environment
|
|
COMMAND
|
|
|
|
expect { nx.setup_environment }.to hop("register_runner")
|
|
end
|
|
end
|
|
|
|
describe "#register_runner" do
|
|
it "registers runner hops" do
|
|
expect(client).to receive(:post).with(/.*generate-jitconfig/, hash_including(name: runner.ubid.to_s, labels: [runner.label])).and_return({runner: {id: 123}, encoded_jit_config: "AABBCC$"})
|
|
expect(vm.sshable).to receive(:cmd).with("sudo -- xargs -0 -- systemd-run --uid runner --gid runner --working-directory '/home/runner' --unit runner-script --remain-after-exit -- /home/runner/actions-runner/run-withenv.sh",
|
|
stdin: "AABBCC$$")
|
|
expect { nx.register_runner }.to hop("wait")
|
|
expect(runner.runner_id).to eq(123)
|
|
expect(runner.ready_at).to eq(now)
|
|
end
|
|
|
|
it "deletes the runner if the generate request fails due to 'already exists with the same name' error and the runner script does not start yet." do
|
|
expect(client).to receive(:post)
|
|
.with(/.*generate-jitconfig/, hash_including(name: runner.ubid.to_s, labels: [runner.label]))
|
|
.and_raise(Octokit::Conflict.new({body: "409 - Already exists - A runner with the name *** already exists."}))
|
|
expect(client).to receive(:paginate)
|
|
.and_yield({runners: [{name: runner.ubid.to_s, id: 123}]}, instance_double(Sawyer::Response, data: {runners: []}))
|
|
.and_return({runners: [{name: runner.ubid.to_s, id: 123}]})
|
|
expect(vm.sshable).to receive(:cmd).with("systemctl show -p SubState --value runner-script").and_return("dead")
|
|
expect(client).to receive(:delete).with("/repos/#{runner.repository_name}/actions/runners/123")
|
|
expect(Clog).to receive(:emit).with("Deregistering runner because it already exists").and_call_original
|
|
expect { nx.register_runner }.to nap(5)
|
|
end
|
|
|
|
it "hops to wait if the generate request fails due to 'already exists with the same name' error and the runner script is running" do
|
|
expect(client).to receive(:post)
|
|
.with(/.*generate-jitconfig/, hash_including(name: runner.ubid.to_s, labels: [runner.label]))
|
|
.and_raise(Octokit::Conflict.new({body: "409 - Already exists - A runner with the name *** already exists."}))
|
|
expect(client).to receive(:paginate)
|
|
.and_yield({runners: [{name: runner.ubid.to_s, id: 123}]}, instance_double(Sawyer::Response, data: {runners: []}))
|
|
.and_return({runners: [{name: runner.ubid.to_s, id: 123}]})
|
|
expect(vm.sshable).to receive(:cmd).with("systemctl show -p SubState --value runner-script").and_return("running")
|
|
expect { nx.register_runner }.to hop("wait")
|
|
expect(runner.runner_id).to eq(123)
|
|
expect(runner.ready_at).to eq(now)
|
|
end
|
|
|
|
it "fails if the generate request fails due to 'already exists with the same name' error but couldn't find the runner" do
|
|
expect(client).to receive(:post)
|
|
.with(/.*generate-jitconfig/, hash_including(name: runner.ubid.to_s, labels: [runner.label]))
|
|
.and_raise(Octokit::Conflict.new({body: "409 - Already exists - A runner with the name *** already exists."}))
|
|
expect(client).to receive(:paginate).and_return({runners: []})
|
|
expect(client).not_to receive(:delete)
|
|
expect { nx.register_runner }.to raise_error RuntimeError, "BUG: Failed with runner already exists error but couldn't find it"
|
|
end
|
|
|
|
it "fails if the generate request fails due to 'Octokit::Conflict' but it's not already exists error" do
|
|
expect(client).to receive(:post)
|
|
.with(/.*generate-jitconfig/, hash_including(name: runner.ubid.to_s, labels: [runner.label]))
|
|
.and_raise(Octokit::Conflict.new({body: "409 - Another issue"}))
|
|
expect { nx.register_runner }.to raise_error Octokit::Conflict
|
|
end
|
|
end
|
|
|
|
describe "#wait" do
|
|
it "does not destroy runner if it does not pick a job in five minutes, and busy" do
|
|
runner.update(ready_at: now - 6 * 60, workflow_job: nil)
|
|
expect(client).to receive(:get).and_return({busy: true})
|
|
expect(vm.sshable).to receive(:cmd).with("systemctl show -p SubState --value runner-script").and_return("running")
|
|
expect(runner).not_to receive(:incr_destroy)
|
|
|
|
expect { nx.wait }.to nap(60)
|
|
end
|
|
|
|
it "destroys runner if it does not pick a job in five minutes and not busy" do
|
|
runner.update(ready_at: now - 6 * 60, workflow_job: nil)
|
|
expect(client).to receive(:get).and_return({busy: false})
|
|
expect(vm.sshable).to receive(:cmd).with("systemctl show -p SubState --value runner-script").and_return("running")
|
|
expect(runner).to receive(:incr_destroy)
|
|
expect(Clog).to receive(:emit).with("The runner does not pick a job").and_call_original
|
|
|
|
expect { nx.wait }.to nap(0)
|
|
end
|
|
|
|
it "destroys runner if it does not pick a job in five minutes and already deleted" do
|
|
runner.update(ready_at: now - 6 * 60, workflow_job: nil)
|
|
expect(client).to receive(:get).and_raise(Octokit::NotFound)
|
|
expect(vm.sshable).to receive(:cmd).with("systemctl show -p SubState --value runner-script").and_return("running")
|
|
expect(runner).to receive(:incr_destroy)
|
|
expect(Clog).to receive(:emit).with("The runner does not pick a job").and_call_original
|
|
|
|
expect { nx.wait }.to nap(0)
|
|
end
|
|
|
|
it "does not destroy runner if it doesn not pick a job but two minutes not pass yet" do
|
|
runner.update(ready_at: now - 60, workflow_job: nil)
|
|
expect(vm.sshable).to receive(:cmd).with("systemctl show -p SubState --value runner-script").and_return("running")
|
|
expect(runner).not_to receive(:incr_destroy)
|
|
|
|
expect { nx.wait }.to nap(60)
|
|
end
|
|
|
|
it "destroys the runner if the runner-script is succeeded" do
|
|
expect(vm.sshable).to receive(:cmd).with("systemctl show -p SubState --value runner-script").and_return("exited")
|
|
expect(runner).to receive(:incr_destroy)
|
|
|
|
expect { nx.wait }.to nap(15)
|
|
end
|
|
|
|
it "provisions a spare runner and destroys the current one if the runner-script is failed" do
|
|
expect(vm.sshable).to receive(:cmd).with("systemctl show -p SubState --value runner-script").and_return("failed")
|
|
expect(runner).to receive(:provision_spare_runner)
|
|
expect(runner).to receive(:incr_destroy)
|
|
expect { nx.wait }.to nap(0)
|
|
end
|
|
|
|
it "naps if the runner-script is running" do
|
|
expect(vm.sshable).to receive(:cmd).with("systemctl show -p SubState --value runner-script").and_return("running")
|
|
|
|
expect { nx.wait }.to nap(60)
|
|
end
|
|
end
|
|
|
|
describe ".collect_final_telemetry" do
|
|
before do
|
|
vm.update(vm_host_id: create_vm_host(data_center: "FSN1-DC8").id)
|
|
end
|
|
|
|
it "Logs journalctl and docker limits if workflow_job is not successful" do
|
|
runner.update(workflow_job: {"conclusion" => "failure"})
|
|
expect(vm.vm_host.sshable).to receive(:cmd).with("sudo ln /vm/#{vm.inhost_name}/serial.log /var/log/ubicloud/serials/#{runner.ubid}_serial.log")
|
|
expect(vm.sshable).to receive(:cmd).with("journalctl -u runner-script -t 'run-withenv.sh' -t 'systemd' --no-pager | grep -Fv Started")
|
|
expect(vm.sshable).to receive(:cmd).with(<<~COMMAND, log: false)
|
|
TOKEN=$(curl -m 10 -s "https://auth.docker.io/token?service=registry.docker.io&scope=repository:ratelimitpreview/test:pull" | jq -r .token)
|
|
curl -m 10 -s --head -H "Authorization: Bearer $TOKEN" https://registry-1.docker.io/v2/ratelimitpreview/test/manifests/latest | grep ratelimit
|
|
COMMAND
|
|
|
|
nx.collect_final_telemetry
|
|
end
|
|
|
|
it "Logs journalctl and docker limits if workflow_job is nil" do
|
|
runner.update(workflow_job: nil)
|
|
expect(vm.vm_host.sshable).to receive(:cmd).with("sudo ln /vm/#{vm.inhost_name}/serial.log /var/log/ubicloud/serials/#{runner.ubid}_serial.log")
|
|
expect(vm.sshable).to receive(:cmd).with("journalctl -u runner-script -t 'run-withenv.sh' -t 'systemd' --no-pager | grep -Fv Started")
|
|
expect(vm.sshable).to receive(:cmd).with(<<~COMMAND, log: false)
|
|
TOKEN=$(curl -m 10 -s "https://auth.docker.io/token?service=registry.docker.io&scope=repository:ratelimitpreview/test:pull" | jq -r .token)
|
|
curl -m 10 -s --head -H "Authorization: Bearer $TOKEN" https://registry-1.docker.io/v2/ratelimitpreview/test/manifests/latest | grep ratelimit
|
|
COMMAND
|
|
|
|
nx.collect_final_telemetry
|
|
end
|
|
|
|
it "Logs only docker limits if workflow_job is successful" do
|
|
runner.update(workflow_job: {"conclusion" => "success"})
|
|
expect(vm.sshable).to receive(:cmd).with(<<~COMMAND, log: false).and_return("ratelimit-limit: 100;w=21600\nratelimit-remaining: 98;w=21600\ndocker-ratelimit-source: 192.168.1.1\n")
|
|
TOKEN=$(curl -m 10 -s "https://auth.docker.io/token?service=registry.docker.io&scope=repository:ratelimitpreview/test:pull" | jq -r .token)
|
|
curl -m 10 -s --head -H "Authorization: Bearer $TOKEN" https://registry-1.docker.io/v2/ratelimitpreview/test/manifests/latest | grep ratelimit
|
|
COMMAND
|
|
expect(Clog).to receive(:emit).with("Remaining DockerHub rate limits") do |&blk|
|
|
expect(blk.call).to eq(dockerhub_rate_limits: {limit: 100, limit_window: 21600, remaining: 98, remaining_window: 21600, source: "192.168.1.1"})
|
|
end
|
|
|
|
nx.collect_final_telemetry
|
|
end
|
|
|
|
it "doesn't fail if it failed due to Sshable::SshError" do
|
|
runner.update(workflow_job: {"conclusion" => "success"})
|
|
expect(vm.sshable).to receive(:cmd).and_raise Sshable::SshError.new("bogus", "", "", nil, nil)
|
|
expect(Clog).to receive(:emit).with("Failed to collect final telemetry").and_call_original
|
|
|
|
nx.collect_final_telemetry
|
|
end
|
|
|
|
it "doesn't fail if it failed due to Net::SSH::ConnectionTimeout" do
|
|
runner.update(workflow_job: {"conclusion" => "success"})
|
|
expect(vm.sshable).to receive(:cmd).and_raise Net::SSH::ConnectionTimeout
|
|
expect(Clog).to receive(:emit).with("Failed to collect final telemetry").and_call_original
|
|
|
|
nx.collect_final_telemetry
|
|
end
|
|
end
|
|
|
|
describe "#destroy" do
|
|
it "naps if runner not deregistered yet" do
|
|
expect(client).to receive(:get).and_return(busy: false)
|
|
expect(client).to receive(:delete)
|
|
|
|
expect { nx.destroy }.to nap(5)
|
|
end
|
|
|
|
it "naps if runner still running a job" do
|
|
expect(client).to receive(:get).and_return(busy: true)
|
|
|
|
expect { nx.destroy }.to nap(15)
|
|
end
|
|
|
|
it "destroys resources and hops if runner deregistered" do
|
|
vm.update(vm_host_id: create_vm_host.id)
|
|
expect(nx).to receive(:decr_destroy)
|
|
expect(client).to receive(:get).and_raise(Octokit::NotFound)
|
|
expect(client).not_to receive(:delete)
|
|
expect(nx).to receive(:collect_final_telemetry)
|
|
fw = instance_double(Firewall)
|
|
ps = instance_double(PrivateSubnet, firewalls: [fw])
|
|
expect(fw).to receive(:destroy)
|
|
expect(ps).to receive(:incr_destroy)
|
|
expect(vm).to receive(:private_subnets).and_return([ps])
|
|
expect(vm).to receive(:incr_destroy)
|
|
|
|
expect { nx.destroy }.to hop("wait_vm_destroy")
|
|
end
|
|
|
|
it "skip deregistration and destroy vm immediately" do
|
|
vm.update(vm_host_id: create_vm_host.id)
|
|
expect(nx).to receive(:decr_destroy)
|
|
expect(runner).to receive(:skip_deregistration_set?).and_return(true)
|
|
expect(nx).to receive(:collect_final_telemetry)
|
|
expect(vm).to receive(:incr_destroy)
|
|
|
|
expect { nx.destroy }.to hop("wait_vm_destroy")
|
|
end
|
|
|
|
it "does not collect telemetry if the vm not allocated" do
|
|
vm.update(vm_host_id: nil)
|
|
expect(nx).to receive(:decr_destroy)
|
|
expect(client).to receive(:get).and_raise(Octokit::NotFound)
|
|
expect(nx).not_to receive(:collect_final_telemetry)
|
|
expect(vm).to receive(:incr_destroy)
|
|
|
|
expect { nx.destroy }.to hop("wait_vm_destroy")
|
|
end
|
|
|
|
it "does not destroy vm if it's already destroyed" do
|
|
runner.update(vm_id: nil)
|
|
expect(nx).to receive(:vm).and_return(nil).at_least(:once)
|
|
expect(nx).to receive(:decr_destroy)
|
|
expect(client).to receive(:get).and_raise(Octokit::NotFound)
|
|
expect(client).not_to receive(:delete)
|
|
|
|
expect { nx.destroy }.to hop("wait_vm_destroy")
|
|
end
|
|
end
|
|
|
|
describe "#wait_vm_destroy" do
|
|
it "naps if vm not destroyed yet" do
|
|
expect { nx.wait_vm_destroy }.to nap(10)
|
|
end
|
|
|
|
it "extends deadline if vm prevents destroy" do
|
|
expect(runner.vm).to receive(:prevent_destroy_set?).and_return(true)
|
|
expect(nx).to receive(:register_deadline).with(nil, 15 * 60, allow_extension: true)
|
|
expect { nx.wait_vm_destroy }.to nap(10)
|
|
end
|
|
|
|
it "pops if vm destroyed" do
|
|
expect(nx).to receive(:vm).and_return(nil).twice
|
|
expect(runner).to receive(:destroy)
|
|
|
|
expect { nx.wait_vm_destroy }.to exit({"msg" => "github runner deleted"})
|
|
end
|
|
end
|
|
end
|