A recent bug report showed that when multiple VMs are provisioned, sometimes, the private subnet gets stuck in wait_inbound_setup. The main reason is that, when we start rekeying nics, we include all of the nics in wait_setup label. The problem with that is, sometimes the VM is not allocated just yet. If it's not allocated, its public ipv6 address is not set. Therefore, for tunnels that target this nic, RekeyNicTunnel prog errors out in subdivide_network method. To solve this, we add a new state to NicNexus to wait VM allocation, we jump to the next label only after the allocation is completed.
118 lines
2.7 KiB
Ruby
118 lines
2.7 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
class Prog::Vnet::NicNexus < Prog::Base
|
|
subject_is :nic
|
|
|
|
def self.assemble(private_subnet_id, name: nil, ipv6_addr: nil, ipv4_addr: nil)
|
|
unless (subnet = PrivateSubnet[private_subnet_id])
|
|
fail "Given subnet doesn't exist with the id #{private_subnet_id}"
|
|
end
|
|
|
|
ubid = Nic.generate_ubid
|
|
name ||= Nic.ubid_to_name(ubid)
|
|
|
|
ipv6_addr ||= subnet.random_private_ipv6.to_s
|
|
ipv4_addr ||= subnet.random_private_ipv4.to_s
|
|
|
|
DB.transaction do
|
|
nic = Nic.create(private_ipv6: ipv6_addr, private_ipv4: ipv4_addr, mac: gen_mac, name: name, private_subnet_id: private_subnet_id) { _1.id = ubid.to_uuid }
|
|
Strand.create(prog: "Vnet::NicNexus", label: "wait_allocation") { _1.id = nic.id }
|
|
end
|
|
end
|
|
|
|
def before_run
|
|
when_destroy_set? do
|
|
hop_destroy if strand.label != "destroy"
|
|
end
|
|
end
|
|
|
|
label def wait_allocation
|
|
if nic.vm.allocated_at
|
|
hop_wait_setup
|
|
end
|
|
nap 5
|
|
end
|
|
|
|
label def wait_setup
|
|
when_start_rekey_set? do
|
|
decr_setup_nic
|
|
hop_start_rekey
|
|
end
|
|
nap 5
|
|
end
|
|
|
|
label def wait
|
|
when_repopulate_set? do
|
|
nic.private_subnet.incr_refresh_keys
|
|
decr_repopulate
|
|
end
|
|
|
|
when_start_rekey_set? do
|
|
hop_start_rekey
|
|
end
|
|
|
|
nap 30
|
|
end
|
|
|
|
label def start_rekey
|
|
decr_start_rekey
|
|
|
|
if retval&.dig("msg") == "inbound_setup is complete"
|
|
hop_wait_rekey_outbound_trigger
|
|
end
|
|
|
|
push Prog::Vnet::RekeyNicTunnel, {}, :setup_inbound
|
|
end
|
|
|
|
label def wait_rekey_outbound_trigger
|
|
if retval&.dig("msg") == "outbound_setup is complete"
|
|
hop_wait_rekey_old_state_drop_trigger
|
|
end
|
|
|
|
when_trigger_outbound_update_set? do
|
|
decr_trigger_outbound_update
|
|
push Prog::Vnet::RekeyNicTunnel, {}, :setup_outbound
|
|
end
|
|
|
|
nap 5
|
|
end
|
|
|
|
label def wait_rekey_old_state_drop_trigger
|
|
if retval&.dig("msg")&.include?("drop_old_state is complete")
|
|
hop_wait
|
|
end
|
|
|
|
when_old_state_drop_trigger_set? do
|
|
decr_old_state_drop_trigger
|
|
push Prog::Vnet::RekeyNicTunnel, {}, :drop_old_state
|
|
end
|
|
|
|
nap 5
|
|
end
|
|
|
|
label def destroy
|
|
if nic.vm
|
|
Clog.emit("Cannot destroy nic with active vm, first clean up the attached resources") { nic }
|
|
nap 5
|
|
end
|
|
|
|
decr_destroy
|
|
|
|
nic.private_subnet.incr_refresh_keys
|
|
nic.destroy
|
|
|
|
pop "nic deleted"
|
|
end
|
|
|
|
# Generate a MAC with the "local" (generated, non-manufacturer) bit
|
|
# set and the multicast bit cleared in the first octet.
|
|
#
|
|
# Accuracy here is not a formality: otherwise assigning a ipv6 link
|
|
# local address errors out.
|
|
def self.gen_mac
|
|
([rand(256) & 0xFE | 0x02] + Array.new(5) { rand(256) }).map {
|
|
"%0.2X" % _1
|
|
}.join(":").downcase
|
|
end
|
|
end
|