Files
ubicloud/spec/prog/kubernetes/kubernetes_cluster_nexus_spec.rb
Jeremy Evans 4b819d3cb2 Change all create_with_id to create
It hasn't been necessary to use create_with_id since
ebc79622df, in December 2024.

I have plans to introduce:

```ruby
def create_with_id(id, values)
  obj = new(values)
  obj.id = id
  obj.save_changes
end
```

This will make it easier to use the same id when creating
multiple objects.  The first step is removing the existing
uses of create_with_id.
2025-08-06 01:55:51 +09:00

478 lines
23 KiB
Ruby

# frozen_string_literal: true
require_relative "../../model/spec_helper"
RSpec.describe Prog::Kubernetes::KubernetesClusterNexus do
subject(:nx) { described_class.new(st) }
let(:st) { Strand.new(id: "8148ebdf-66b8-8ed0-9c2f-8cfe93f5aa77") }
let(:customer_project) { Project.create(name: "default") }
let(:subnet) { PrivateSubnet.create(net6: "0::0", net4: "127.0.0.1", name: "x", location_id: Location::HETZNER_FSN1_ID, project_id: Config.kubernetes_service_project_id) }
let(:kubernetes_cluster) {
kc = KubernetesCluster.create(
name: "k8scluster",
version: "v1.32",
cp_node_count: 3,
private_subnet_id: subnet.id,
location_id: Location::HETZNER_FSN1_ID,
project_id: customer_project.id,
target_node_size: "standard-2"
)
KubernetesNodepool.create(name: "k8stest-np", node_count: 2, kubernetes_cluster_id: kc.id, target_node_size: "standard-2")
dns_zone = DnsZone.create(project_id: Project.first.id, name: "somezone", last_purged_at: Time.now)
apiserver_lb = LoadBalancer.create(private_subnet_id: subnet.id, name: "somelb", health_check_endpoint: "/foo", project_id: Config.kubernetes_service_project_id)
LoadBalancerPort.create(load_balancer_id: apiserver_lb.id, src_port: 123, dst_port: 456)
kc.add_cp_vm(create_vm)
kc.add_cp_vm(create_vm)
kc.update(api_server_lb_id: apiserver_lb.id)
services_lb = Prog::Vnet::LoadBalancerNexus.assemble(
subnet.id,
name: "somelb2",
algorithm: "hash_based",
# TODO: change the api to support LBs without ports
# The next two fields will be later modified by the sync_kubernetes_services label
# These are just set for passing the creation validations
src_port: 443,
dst_port: 6443,
health_check_endpoint: "/",
health_check_protocol: "tcp",
custom_hostname_dns_zone_id: dns_zone.id,
custom_hostname_prefix: "someprefix",
stack: LoadBalancer::Stack::IPV4
).subject
kc.update(services_lb_id: services_lb.id)
kc
}
before do
allow(Config).to receive(:kubernetes_service_project_id).and_return(Project.create(name: "UbicloudKubernetesService").id)
allow(nx).to receive(:kubernetes_cluster).and_return(kubernetes_cluster)
end
describe ".assemble" do
it "validates input" do
expect {
described_class.assemble(project_id: "88c8beda-0718-82d2-9948-7569acc26b80", name: "k8stest", version: "v1.32", location_id: Location::HETZNER_FSN1_ID, cp_node_count: 3, private_subnet_id: subnet.id)
}.to raise_error RuntimeError, "No existing project"
expect {
described_class.assemble(version: "v1.30", project_id: customer_project.id, name: "k8stest", location_id: Location::HETZNER_FSN1_ID, cp_node_count: 3, private_subnet_id: subnet.id)
}.to raise_error RuntimeError, "Invalid Kubernetes Version"
expect {
described_class.assemble(name: "Uppercase", version: "v1.32", project_id: customer_project.id, location_id: Location::HETZNER_FSN1_ID, cp_node_count: 3, private_subnet_id: subnet.id)
}.to raise_error Validation::ValidationFailed, "Validation failed for following fields: name"
expect {
described_class.assemble(name: "hyph_en", version: "v1.32", project_id: customer_project.id, location_id: Location::HETZNER_FSN1_ID, cp_node_count: 3, private_subnet_id: subnet.id)
}.to raise_error Validation::ValidationFailed, "Validation failed for following fields: name"
expect {
described_class.assemble(name: "onetoolongnameforatestkubernetesclustername", version: "v1.32", project_id: customer_project.id, location_id: Location::HETZNER_FSN1_ID, cp_node_count: 3, private_subnet_id: subnet.id)
}.to raise_error Validation::ValidationFailed, "Validation failed for following fields: name"
expect {
described_class.assemble(name: "somename", version: "v1.32", project_id: customer_project.id, location_id: Location::HETZNER_FSN1_ID, cp_node_count: 2, private_subnet_id: subnet.id)
}.to raise_error Validation::ValidationFailed, "Validation failed for following fields: control_plane_node_count"
p = Project.create(name: "another")
subnet.update(project_id: p.id)
expect {
described_class.assemble(name: "normalname", project_id: customer_project.id, location_id: Location::HETZNER_FSN1_ID, cp_node_count: 3, private_subnet_id: subnet.id)
}.to raise_error RuntimeError, "Given subnet is not available in the k8s project"
end
it "creates a kubernetes cluster" do
st = described_class.assemble(name: "k8stest", version: "v1.33", private_subnet_id: subnet.id, project_id: customer_project.id, location_id: Location::HETZNER_FSN1_ID, cp_node_count: 3, target_node_size: "standard-8", target_node_storage_size_gib: 100)
kc = st.subject
expect(kc.name).to eq "k8stest"
expect(kc.ubid).to start_with("kc")
expect(kc.version).to eq "v1.33"
expect(kc.location_id).to eq Location::HETZNER_FSN1_ID
expect(kc.cp_node_count).to eq 3
expect(kc.private_subnet.id).to eq subnet.id
expect(kc.project.id).to eq customer_project.id
expect(kc.strand.label).to eq "start"
expect(kc.target_node_size).to eq "standard-8"
expect(kc.target_node_storage_size_gib).to eq 100
end
it "has defaults for node size, storage size, version and subnet" do
st = described_class.assemble(name: "k8stest", project_id: customer_project.id, location_id: Location::HETZNER_FSN1_ID, cp_node_count: 3)
kc = st.subject
expect(kc.version).to eq "v1.32"
expect(kc.private_subnet.net4.to_s[-3..]).to eq "/18"
expect(kc.private_subnet.name).to eq kc.ubid.to_s + "-subnet"
expect(kc.target_node_size).to eq "standard-2"
expect(kc.target_node_storage_size_gib).to be_nil
end
end
describe "#before_run" do
it "hops to destroy" do
expect { nx.create_billing_records }.to hop("wait")
expect(nx).to receive(:when_destroy_set?).and_yield
expect(kubernetes_cluster.active_billing_records).not_to be_empty
expect(kubernetes_cluster.active_billing_records).to all(receive(:finalize))
expect { nx.before_run }.to hop("destroy")
end
it "does not hop to destroy if already in the destroy state" do
expect(nx).to receive(:when_destroy_set?).and_yield
expect(nx.strand).to receive(:label).and_return("destroy")
expect { nx.before_run }.not_to hop("destroy")
end
end
describe "#start" do
it "registers deadline and hops" do
expect(nx).to receive(:register_deadline)
expect(nx).to receive(:incr_install_metrics_server)
expect(nx).to receive(:incr_sync_worker_mesh)
expect { nx.start }.to hop("create_load_balancers")
end
end
describe "#create_load_balancers" do
it "creates api server and services load balancers with the right dns zone on prod and hops" do
kubernetes_cluster.update(api_server_lb_id: nil, services_lb_id: nil)
allow(Config).to receive(:kubernetes_service_hostname).and_return("k8s.ubicloud.com")
dns_zone = DnsZone.create(project_id: Project.first.id, name: "k8s.ubicloud.com", last_purged_at: Time.now)
expect { nx.create_load_balancers }.to hop("bootstrap_control_plane_vms")
expect(kubernetes_cluster.api_server_lb.name).to eq "#{kubernetes_cluster.ubid}-apiserver"
expect(kubernetes_cluster.api_server_lb.ports.first.src_port).to eq 443
expect(kubernetes_cluster.api_server_lb.ports.first.dst_port).to eq 6443
expect(kubernetes_cluster.api_server_lb.health_check_endpoint).to eq "/healthz"
expect(kubernetes_cluster.api_server_lb.health_check_protocol).to eq "tcp"
expect(kubernetes_cluster.api_server_lb.stack).to eq LoadBalancer::Stack::DUAL
expect(kubernetes_cluster.api_server_lb.private_subnet_id).to eq subnet.id
expect(kubernetes_cluster.api_server_lb.custom_hostname_dns_zone_id).to eq dns_zone.id
expect(kubernetes_cluster.api_server_lb.custom_hostname).to eq "k8scluster-apiserver-#{kubernetes_cluster.ubid[-5...]}.k8s.ubicloud.com"
expect(kubernetes_cluster.services_lb.name).to eq "#{kubernetes_cluster.ubid}-services"
expect(kubernetes_cluster.services_lb.stack).to eq LoadBalancer::Stack::IPV4
expect(kubernetes_cluster.services_lb.private_subnet_id).to eq subnet.id
expect(kubernetes_cluster.services_lb.custom_hostname_dns_zone_id).to eq dns_zone.id
expect(kubernetes_cluster.services_lb.custom_hostname).to eq "k8scluster-services-#{kubernetes_cluster.ubid[-5...]}.k8s.ubicloud.com"
end
it "creates load balancers with dns zone id on development for api server and services, then hops" do
expect { nx.create_load_balancers }.to hop("bootstrap_control_plane_vms")
expect(kubernetes_cluster.api_server_lb.name).to eq "#{kubernetes_cluster.ubid}-apiserver"
expect(kubernetes_cluster.api_server_lb.ports.first.src_port).to eq 443
expect(kubernetes_cluster.api_server_lb.ports.first.dst_port).to eq 6443
expect(kubernetes_cluster.api_server_lb.health_check_endpoint).to eq "/healthz"
expect(kubernetes_cluster.api_server_lb.health_check_protocol).to eq "tcp"
expect(kubernetes_cluster.api_server_lb.stack).to eq LoadBalancer::Stack::DUAL
expect(kubernetes_cluster.api_server_lb.private_subnet_id).to eq subnet.id
expect(kubernetes_cluster.api_server_lb.custom_hostname).to be_nil
expect(kubernetes_cluster.services_lb.name).to eq "#{kubernetes_cluster.ubid}-services"
expect(kubernetes_cluster.services_lb.private_subnet_id).to eq subnet.id
expect(kubernetes_cluster.services_lb.custom_hostname).to be_nil
end
end
describe "#bootstrap_control_plane_vms" do
it "waits until the load balancer endpoint is set" do
expect(kubernetes_cluster.api_server_lb).to receive(:hostname).and_return nil
expect { nx.bootstrap_control_plane_vms }.to nap(5)
end
it "incrs start_bootstrapping on KubernetesNodepool on 3 node control plane setup" do
expect(kubernetes_cluster).to receive(:cp_vms).and_return([create_vm, create_vm, create_vm]).twice
expect(kubernetes_cluster.nodepools.first).to receive(:incr_start_bootstrapping)
expect { nx.bootstrap_control_plane_vms }.to hop("wait_nodes")
end
it "incrs start_bootstrapping on KubernetesNodepool on 1 node control plane setup" do
kubernetes_cluster.update(cp_node_count: 1)
expect(kubernetes_cluster).to receive(:cp_vms).and_return([create_vm]).twice
expect(kubernetes_cluster.nodepools.first).to receive(:incr_start_bootstrapping)
expect { nx.bootstrap_control_plane_vms }.to hop("wait_nodes")
end
it "hops wait_nodes if the target number of CP vms is reached" do
expect(kubernetes_cluster.api_server_lb).to receive(:hostname).and_return "endpoint"
expect(kubernetes_cluster).to receive(:cp_vms).and_return([1, 2, 3]).twice
expect { nx.bootstrap_control_plane_vms }.to hop("wait_nodes")
end
it "buds ProvisionKubernetesNode prog to create VMs" do
expect(kubernetes_cluster).to receive(:endpoint).and_return "endpoint"
expect(nx).to receive(:bud).with(Prog::Kubernetes::ProvisionKubernetesNode, {"subject_id" => kubernetes_cluster.id})
expect { nx.bootstrap_control_plane_vms }.to hop("wait_control_plane_node")
end
end
describe "#wait_control_plane_node" do
it "hops back to bootstrap_control_plane_vms if there are no sub-programs running" do
st.update(prog: "Kubernetes::KubernetesClusterNexus", label: "wait_control_plane_node", stack: [{}])
expect { nx.wait_control_plane_node }.to hop("bootstrap_control_plane_vms")
end
it "donates if there are sub-programs running" do
st.update(prog: "Kubernetes::KubernetesClusterNexus", label: "wait_control_plane_node", stack: [{}])
Strand.create(parent_id: st.id, prog: "Kubernetes::ProvisionKubernetesNode", label: "start", stack: [{}], lease: Time.now + 10)
expect { nx.wait_control_plane_node }.to nap(120)
end
end
describe "#wait_nodes" do
it "naps until all nodepools are ready" do
expect(kubernetes_cluster.nodepools.first).to receive(:strand).and_return(instance_double(Strand, label: "not_wait"))
expect { nx.wait_nodes }.to nap(10)
end
it "hops to create_billing_records when all nodepools are ready" do
expect(kubernetes_cluster.nodepools.first).to receive(:strand).and_return(instance_double(Strand, label: "wait"))
expect { nx.wait_nodes }.to hop("create_billing_records")
end
end
describe "#create_billing_records" do
it "creates billing records for all cp vms and nodepools" do
kubernetes_cluster.nodepools.first.add_vm(create_vm)
expect { nx.create_billing_records }.to hop("wait")
expect(kubernetes_cluster.active_billing_records.length).to eq 4
expect(kubernetes_cluster.active_billing_records.map { it.billing_rate["resource_type"] }).to eq ["KubernetesControlPlaneVCpu", "KubernetesControlPlaneVCpu", "KubernetesWorkerVCpu", "KubernetesWorkerStorage"]
end
end
describe "#sync_kubernetes_services" do
it "calls the sync_kubernetes_services function" do
client = instance_double(Kubernetes::Client)
expect(nx).to receive(:decr_sync_kubernetes_services)
expect(kubernetes_cluster).to receive(:client).and_return(client)
expect(client).to receive(:sync_kubernetes_services)
expect { nx.sync_kubernetes_services }.to hop("wait")
end
end
describe "#wait" do
it "hops to the right sync_kubernetes_service when its semaphore is set" do
expect(nx).to receive(:when_sync_kubernetes_services_set?).and_yield
expect { nx.wait }.to hop("sync_kubernetes_services")
end
it "hops to upgrade when semaphore is set" do
expect(nx).to receive(:when_upgrade_set?).and_yield
expect { nx.wait }.to hop("upgrade")
end
it "hops to install_metrics_server when semaphore is set" do
expect(nx).to receive(:when_install_metrics_server_set?).and_yield
expect { nx.wait }.to hop("install_metrics_server")
end
it "hops to sync_worker_mesh when semaphore is set" do
expect(nx).to receive(:when_sync_worker_mesh_set?).and_yield
expect { nx.wait }.to hop("sync_worker_mesh")
end
it "naps until sync_kubernetes_service or upgrade is set" do
expect { nx.wait }.to nap(6 * 60 * 60)
end
end
describe "#upgrade" do
let(:first_vm) { create_vm }
let(:second_vm) { create_vm }
let(:client) { instance_double(Kubernetes::Client) }
before do
sshable0, sshable1 = instance_double(Sshable), instance_double(Sshable)
expect(kubernetes_cluster).to receive(:cp_vms).and_return([first_vm, second_vm])
allow(first_vm).to receive(:sshable).and_return(sshable0)
allow(second_vm).to receive(:sshable).and_return(sshable1)
allow(sshable0).to receive(:connect)
allow(sshable1).to receive(:connect)
expect(kubernetes_cluster).to receive(:client).and_return(client).at_least(:once)
end
it "selects a VM with minor version one less than the cluster's version" do
expect(kubernetes_cluster).to receive(:version).and_return("v1.32").twice
expect(client).to receive(:version).and_return("v1.32", "v1.31")
expect(nx).to receive(:bud).with(Prog::Kubernetes::UpgradeKubernetesNode, {"old_vm_id" => second_vm.id})
expect { nx.upgrade }.to hop("wait_upgrade")
end
it "hops to wait when all VMs are at the cluster's version" do
expect(kubernetes_cluster).to receive(:version).and_return("v1.32").twice
expect(client).to receive(:version).and_return("v1.32", "v1.32")
expect { nx.upgrade }.to hop("wait")
end
it "does not select a VM with minor version more than one less than the cluster's version" do
expect(kubernetes_cluster).to receive(:version).and_return("v1.32").twice
expect(client).to receive(:version).and_return("v1.30", "v1.32")
expect { nx.upgrade }.to hop("wait")
end
it "skips VMs with invalid version formats" do
expect(kubernetes_cluster).to receive(:version).and_return("v1.32").twice
expect(client).to receive(:version).and_return("invalid", "v1.32")
expect { nx.upgrade }.to hop("wait")
end
it "selects the first VM that is one minor version behind" do
expect(kubernetes_cluster).to receive(:version).and_return("v1.32")
expect(client).to receive(:version).and_return("v1.31")
expect(nx).to receive(:bud).with(Prog::Kubernetes::UpgradeKubernetesNode, {"old_vm_id" => first_vm.id})
expect { nx.upgrade }.to hop("wait_upgrade")
end
it "hops to wait if cluster version is invalid" do
expect(kubernetes_cluster).to receive(:version).and_return("invalid").twice
expect(client).to receive(:version).and_return("v1.31", "v1.31")
expect { nx.upgrade }.to hop("wait")
end
it "does not select a VM with a higher minor version than the cluster" do
expect(kubernetes_cluster).to receive(:version).and_return("v1.32").twice
expect(client).to receive(:version).and_return("v1.33", "v1.32")
expect { nx.upgrade }.to hop("wait")
end
end
describe "#wait_upgrade" do
it "hops back to upgrade if there are no sub-programs running" do
st.update(prog: "Kubernetes::KubernetesClusterNexus", label: "destroy", stack: [{}])
expect { nx.wait_upgrade }.to hop("upgrade")
end
it "donates if there are sub-programs running" do
st.update(prog: "Kubernetes::KubernetesClusterNexus", label: "destroy", stack: [{}])
Strand.create(parent_id: st.id, prog: "Kubernetes::ProvisionKubernetesNode", label: "start", stack: [{}], lease: Time.now + 10)
expect { nx.wait_upgrade }.to nap(120)
end
end
describe "#install_metrics_server" do
let(:sshable) { instance_double(Sshable) }
let(:vm) { create_vm }
before do
allow(vm).to receive(:sshable).and_return(sshable)
allow(kubernetes_cluster).to receive(:cp_vms).and_return([vm])
end
it "runs install_metrics_server and naps when not started" do
expect(sshable).to receive(:d_check).with("install_metrics_server").and_return("NotStarted")
expect(sshable).to receive(:d_run).with("install_metrics_server", "kubernetes/bin/install-metrics-server")
expect { nx.install_metrics_server }.to nap(30)
end
it "hops when metrics server install succeeds" do
expect(sshable).to receive(:d_check).with("install_metrics_server").and_return("Succeeded")
expect { nx.install_metrics_server }.to hop("wait")
end
it "naps when install_metrics_server is in progress" do
expect(sshable).to receive(:d_check).with("install_metrics_server").and_return("InProgress")
expect { nx.install_metrics_server }.to nap(10)
end
it "naps forever when install_metrics_server fails" do
expect(sshable).to receive(:d_check).with("install_metrics_server").and_return("Failed")
expect { nx.install_metrics_server }.to nap(65536)
end
it "naps forever when daemonizer2 returns something unknown" do
expect(sshable).to receive(:d_check).with("install_metrics_server").and_return("SomethingElse")
expect { nx.install_metrics_server }.to nap(65536)
end
end
describe "#sync_worker_mesh" do
let(:first_vm) { Prog::Vm::Nexus.assemble_with_sshable(customer_project.id).subject }
let(:first_ssh_key) { SshKey.generate }
let(:second_vm) { Prog::Vm::Nexus.assemble_with_sshable(customer_project.id).subject }
let(:second_ssh_key) { SshKey.generate }
before do
expect(kubernetes_cluster).to receive(:worker_vms).and_return([first_vm, second_vm])
expect(SshKey).to receive(:generate).and_return(first_ssh_key, second_ssh_key)
end
it "creates full mesh connectivity on cluster worker nodes" do
expect(first_vm.sshable).to receive(:cmd).with("tee ~/.ssh/id_ed25519 > /dev/null && chmod 0600 ~/.ssh/id_ed25519", stdin: first_ssh_key.private_key)
expect(first_vm.sshable).to receive(:cmd).with("tee ~/.ssh/authorized_keys > /dev/null && chmod 0600 ~/.ssh/authorized_keys", stdin: [first_vm.sshable.keys.first.public_key, first_ssh_key.public_key, second_ssh_key.public_key].join("\n"))
expect(second_vm.sshable).to receive(:cmd).with("tee ~/.ssh/id_ed25519 > /dev/null && chmod 0600 ~/.ssh/id_ed25519", stdin: second_ssh_key.private_key)
expect(second_vm.sshable).to receive(:cmd).with("tee ~/.ssh/authorized_keys > /dev/null && chmod 0600 ~/.ssh/authorized_keys", stdin: [second_vm.sshable.keys.first.public_key, first_ssh_key.public_key, second_ssh_key.public_key].join("\n"))
expect { nx.sync_worker_mesh }.to hop("wait")
end
end
describe "#destroy" do
it "donates if there are sub-programs running (Provision...)" do
st.update(prog: "Kubernetes::KubernetesClusterNexus", label: "destroy", stack: [{}])
Strand.create(parent_id: st.id, prog: "Kubernetes::ProvisionKubernetesNode", label: "start", stack: [{}], lease: Time.now + 10)
expect { nx.destroy }.to nap(120)
end
it "triggers deletion of associated resources and naps until all nodepools are gone" do
st.update(prog: "Kubernetes::KubernetesClusterNexus", label: "destroy", stack: [{}])
expect(kubernetes_cluster.api_server_lb).to receive(:incr_destroy)
expect(kubernetes_cluster.services_lb).to receive(:incr_destroy)
expect(kubernetes_cluster.cp_vms).to all(receive(:incr_destroy))
expect(kubernetes_cluster.nodepools).to all(receive(:incr_destroy))
expect(kubernetes_cluster.private_subnet).to receive(:incr_destroy)
expect(kubernetes_cluster).not_to receive(:destroy)
expect { nx.destroy }.to nap(5)
end
it "completes destroy when nodepools are gone" do
st.update(prog: "Kubernetes::KubernetesClusterNexus", label: "destroy", stack: [{}])
kubernetes_cluster.nodepools.first.destroy
kubernetes_cluster.reload
expect(kubernetes_cluster.api_server_lb).to receive(:incr_destroy)
expect(kubernetes_cluster.services_lb).to receive(:incr_destroy)
expect(kubernetes_cluster.cp_vms).to all(receive(:incr_destroy))
expect(kubernetes_cluster.nodepools).to be_empty
expect { nx.destroy }.to exit({"msg" => "kubernetes cluster is deleted"})
end
it "deletes the sub-subdomain DNS record if the DNS zone exists" do
dns_zone = DnsZone.create(project_id: Project.first.id, name: "k8s.ubicloud.com", last_purged_at: Time.now)
kubernetes_cluster.services_lb.update(custom_hostname_dns_zone_id: dns_zone.id)
dns_zone.insert_record(record_name: "*.#{kubernetes_cluster.services_lb.hostname}.", type: "CNAME", ttl: 123, data: "whatever.")
expect(DnsRecord[name: "*.#{kubernetes_cluster.services_lb.hostname}.", tombstoned: false]).not_to be_nil
expect { nx.destroy }.to nap(5)
expect(DnsRecord[name: "*.#{kubernetes_cluster.services_lb.hostname}.", tombstoned: true]).not_to be_nil
end
it "completes the destroy process even if the load balancers do not exist" do
kubernetes_cluster.update(api_server_lb_id: nil, services_lb_id: nil)
kubernetes_cluster.nodepools.first.destroy
kubernetes_cluster.reload
expect { nx.destroy }.to exit({"msg" => "kubernetes cluster is deleted"})
end
end
end