ubicloud/prog/vnet/aws/nic_nexus.rb
Furkan Sahin dd3c5f7c8f Fix Subnet AZ pickup and controlplane records mixup
We used to call az_to_provision_subnet which picks a subnet az among 3
available "a", "b" and "c" if not a specific one is specified. Once we
use that picked az to provision the subnet, while making the update to
the nic_aws_resource entity, we were recalling the function. It results
in a mismatch.
2025-11-19 14:20:54 +01:00

202 lines
7 KiB
Ruby

# frozen_string_literal: true
require "aws-sdk-ec2"
class Prog::Vnet::Aws::NicNexus < Prog::Base
subject_is :nic
def before_run
when_destroy_set? do
hop_destroy unless ["destroy", "release_eip", "delete_subnet", "destroy_entities"].include?(strand.label)
end
end
label def start
NicAwsResource.create_with_id(nic.id)
hop_create_subnet
end
label def create_subnet
nap 2 unless private_subnet.strand.label == "wait"
register_deadline("attach_eip_network_interface", 3 * 60)
vpc_response = client.describe_vpcs({filters: [{name: "vpc-id", values: [vpc_id]}]}).vpcs[0]
# AWS VPCs use /56 prefix length for IPv6 CIDR blocks by default. We use /64
# subnets for consistency with Ubicloud's private subnet sizing, giving us
# 2^8 = 256 possible subnets to choose from.
#
# We randomly select a subnet rather than tracking allocations sequentially
# because:
# 1. Postgres on AWS typically provisions only up to 4 subnets concurrently
# 2. The collision probability is very low (4 out of 256)
# 3. AWS will fail the call if there's a conflict, and we can simply retry
ipv_6_cidr_block = NetAddr::IPv6Net.parse(vpc_response.ipv_6_cidr_block_association_set[0].ipv_6_cidr_block).nth_subnet(64, SecureRandom.random_number(2**8))
subnet_response = client.describe_subnets({filters: [{name: "tag:Name", values: [nic.name]}]})
subnet_id, subnet_az = if private_subnet.old_aws_subnet?
subnet = client.describe_subnets({filters: [{name: "vpc-id", values: [vpc_id]}]}).subnets[0]
[subnet.subnet_id, subnet.availability_zone]
elsif subnet_response.subnets.empty?
subnet_az = az_to_provision_subnet
subnet_id = client.create_subnet({
vpc_id:,
cidr_block: NetAddr::IPv4Net.new(nic.private_ipv4.network, NetAddr::Mask32.new(24)).to_s,
ipv_6_cidr_block: ipv_6_cidr_block.to_s,
availability_zone: private_subnet.location.name + subnet_az,
tag_specifications: Util.aws_tag_specifications("subnet", nic.name)
}).subnet.subnet_id
client.modify_subnet_attribute({
subnet_id:,
assign_ipv_6_address_on_creation: {value: true}
})
[subnet_id, subnet_az]
else
subnet = subnet_response.subnets[0]
[subnet.subnet_id, subnet.availability_zone]
end
nic.nic_aws_resource.update(subnet_id:, subnet_az:)
hop_wait_subnet_created
end
label def wait_subnet_created
subnet_response = if private_subnet.old_aws_subnet?
hop_create_network_interface
else
client.describe_subnets({filters: [{name: "tag:Name", values: [nic.name]}]}).subnets[0]
end
if subnet_response.state == "available"
route_table_response = client.describe_route_tables({filters: [{name: "vpc-id", values: [vpc_id]}]})
route_table_id = route_table_response.route_tables[0].route_table_id
route_table_details = client.describe_route_tables({route_table_ids: [route_table_id]}).route_tables.first
if route_table_details.associations.empty?
client.associate_route_table({
route_table_id:,
subnet_id: nic.nic_aws_resource.subnet_id
})
end
hop_create_network_interface
end
nap 1
end
label def create_network_interface
network_interface_response = client.create_network_interface({
subnet_id: nic.nic_aws_resource.subnet_id,
private_ip_address: nic.private_ipv4.network.to_s,
ipv_6_prefix_count: 1,
groups: [
private_subnet.private_subnet_aws_resource.security_group_id
],
tag_specifications: Util.aws_tag_specifications("network-interface", nic.name),
client_token: nic.id
})
network_interface_id = network_interface_response.network_interface.network_interface_id
nic.nic_aws_resource.update(network_interface_id:)
hop_assign_ipv6_address
end
label def assign_ipv6_address
client.assign_ipv_6_addresses({network_interface_id: nic.nic_aws_resource.network_interface_id, ipv_6_address_count: 1}) if get_network_interface.ipv_6_addresses.empty?
hop_wait_network_interface_created
end
label def wait_network_interface_created
if get_network_interface.status == "available"
hop_allocate_eip
end
nap 1
end
label def allocate_eip
eip_response = client.describe_addresses({filters: [{name: "tag:Name", values: [nic.name]}]})
eip_allocation_id = if eip_response.addresses.empty?
client.allocate_address(tag_specifications: Util.aws_tag_specifications("elastic-ip", nic.nic_aws_resource.network_interface_id)).allocation_id
else
eip_response.addresses[0].allocation_id
end
nic.nic_aws_resource.update(eip_allocation_id:)
hop_attach_eip_network_interface
end
label def attach_eip_network_interface
eip_response = client.describe_addresses({filters: [{name: "allocation-id", values: [nic.nic_aws_resource.eip_allocation_id]}]})
unless eip_response.addresses.first.network_interface_id
client.associate_address({allocation_id: nic.nic_aws_resource.eip_allocation_id, network_interface_id: nic.nic_aws_resource.network_interface_id})
end
hop_wait
end
label def wait
nap 1000000000
end
label def destroy
ignore_invalid_nic do
client.delete_network_interface({network_interface_id: nic.nic_aws_resource.network_interface_id})
end
hop_release_eip
end
label def release_eip
ignore_invalid_nic do
allocation_id = nic.nic_aws_resource&.eip_allocation_id
client.release_address({allocation_id:}) if allocation_id
end
hop_delete_subnet
end
label def delete_subnet
ignore_invalid_nic do
client.delete_subnet({subnet_id: nic.nic_aws_resource.subnet_id})
rescue Aws::EC2::Errors::DependencyViolation => e
raise e if private_subnet.nics.count == 1
Clog.emit("dependency violation for aws nic") { {ignored_aws_nic_failure: {exception: Util.exception_to_hash(e, backtrace: nil)}} }
end
hop_destroy_entities
end
label def destroy_entities
nic&.nic_aws_resource&.destroy
nic&.destroy
pop "nic deleted"
end
def client
@client ||= private_subnet.location.location_credential.client
end
def private_subnet
@private_subnet ||= nic.private_subnet
end
def vpc_id
@vpc_id ||= private_subnet.private_subnet_aws_resource.vpc_id
end
def az_to_provision_subnet
frame["availability_zone"] || (["a", "b", "c"] - (frame["exclude_availability_zones"] || [])).sample || "a"
end
def get_network_interface
client.describe_network_interfaces({filters: [{name: "network-interface-id", values: [nic.nic_aws_resource.network_interface_id]}, {name: "tag:Ubicloud", values: ["true"]}]}).network_interfaces[0]
end
private
def ignore_invalid_nic
yield
rescue ArgumentError,
Aws::EC2::Errors::InvalidNetworkInterfaceIDNotFound,
Aws::EC2::Errors::InvalidAllocationIDNotFound,
Aws::EC2::Errors::InvalidAddressIDNotFound,
Aws::EC2::Errors::InvalidSubnetIDNotFound => e
Clog.emit("ID not found for aws nic") { {ignored_aws_nic_failure: {exception: Util.exception_to_hash(e, backtrace: nil)}} }
end
end