When we try to destroy the VM completely before the load balancer completes the metadata endpoint clean-up, the vm gets stuck on removal. With this commit, we make sure the related networking cleanup is done only after load balancer completes its cleanup.
343 lines
13 KiB
Ruby
343 lines
13 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require "jwt"
|
|
require_relative "../model"
|
|
|
|
class Vm < Sequel::Model
|
|
one_to_one :strand, key: :id
|
|
many_to_one :vm_host
|
|
many_to_one :project
|
|
one_to_many :nics
|
|
many_to_many :private_subnets, join_table: :nic
|
|
one_to_one :sshable, key: :id
|
|
one_to_one :assigned_vm_address, key: :dst_vm_id
|
|
one_to_many :vm_storage_volumes, order: Sequel.desc(:boot)
|
|
one_to_many :active_billing_records, class: :BillingRecord, key: :resource_id, &:active
|
|
one_to_many :pci_devices
|
|
one_through_one :load_balancer
|
|
one_to_one :load_balancer_vm
|
|
many_to_many :load_balancer_vm_ports, join_table: :load_balancers_vms, right_key: :id, right_primary_key: :load_balancer_vm_id, read_only: true
|
|
many_to_one :vm_host_slice
|
|
many_to_one :location
|
|
one_to_one :aws_instance, key: :id
|
|
|
|
many_through_many :firewalls,
|
|
[
|
|
[:nic, :vm_id, :private_subnet_id],
|
|
[:firewalls_private_subnets, :private_subnet_id, :firewall_id]
|
|
]
|
|
|
|
plugin :association_dependencies, sshable: :destroy, assigned_vm_address: :destroy, vm_storage_volumes: :destroy, load_balancer_vm: :destroy
|
|
|
|
dataset_module Pagination
|
|
|
|
plugin ResourceMethods, redacted_columns: :public_key
|
|
plugin SemaphoreMethods, :destroy, :start_after_host_reboot, :prevent_destroy, :update_firewall_rules,
|
|
:checkup, :update_spdk_dependency, :waiting_for_capacity, :lb_expiry_started, :restart, :stop, :removed_from_lb
|
|
include HealthMonitorMethods
|
|
|
|
include ObjectTag::Cleanup
|
|
|
|
def display_location
|
|
location.display_name
|
|
end
|
|
|
|
def load_balancer_state
|
|
load_balancer_vm_ports.first&.state
|
|
end
|
|
|
|
def path
|
|
"/location/#{display_location}/vm/#{name}"
|
|
end
|
|
|
|
def ephemeral_net4
|
|
assigned_vm_address&.ip&.network
|
|
end
|
|
|
|
def ip4
|
|
assigned_vm_address&.ip
|
|
end
|
|
|
|
def ip6
|
|
location.aws? ? ephemeral_net6&.nth(0) : ephemeral_net6&.nth(2)
|
|
end
|
|
|
|
def nic
|
|
nics.first
|
|
end
|
|
|
|
def private_ipv4
|
|
ipv4 = nic.private_ipv4
|
|
(ipv4.netmask.prefix_len == 32) ? ipv4.network : ipv4.nth(1)
|
|
end
|
|
|
|
def private_ipv6
|
|
nic.private_ipv6.nth(2)
|
|
end
|
|
|
|
def runtime_token
|
|
JWT.encode({sub: ubid, iat: Time.now.to_i}, Config.clover_runtime_token_secret, "HS256")
|
|
end
|
|
|
|
def display_state
|
|
label = strand&.label
|
|
return "deleting" if destroy_set? || label == "destroy"
|
|
return "restarting" if restart_set? || label == "restart"
|
|
return "stopped" if stop_set? || label == "stopped"
|
|
if waiting_for_capacity_set?
|
|
return "no capacity available" if Time.now - created_at > 15 * 60
|
|
return "waiting for capacity"
|
|
end
|
|
super
|
|
end
|
|
|
|
# cloud-hypervisor takes topology information in this format:
|
|
#
|
|
# topology=<threads_per_core>:<cores_per_die>:<dies_per_package>:<packages>
|
|
#
|
|
# And the result of multiplication must equal the thread/vcpu count
|
|
# we wish to allocate:
|
|
#
|
|
# let total = t.threads_per_core * t.cores_per_die * t.dies_per_package * t.packages;
|
|
# if total != self.cpus.max_vcpus {
|
|
# return Err(ValidationError::CpuTopologyCount);
|
|
# }
|
|
CloudHypervisorCpuTopo = Struct.new(:threads_per_core, :cores_per_die, :dies_per_package, :packages) do
|
|
def to_s
|
|
to_a.join(":")
|
|
end
|
|
|
|
def max_vcpus
|
|
@max_vcpus ||= to_a.reduce(:*)
|
|
end
|
|
end
|
|
|
|
def cloud_hypervisor_cpu_topology
|
|
threads_per_core, r = vm_host.total_cpus.divmod vm_host.total_cores
|
|
fail "BUG" unless r.zero?
|
|
|
|
total_dies_per_package, r = vm_host.total_dies.divmod vm_host.total_sockets
|
|
fail "BUG" unless r.zero?
|
|
|
|
total_packages = vm_host.total_sockets
|
|
|
|
# Computed all-system statistics, now scale it down to meet VM needs.
|
|
if vcpus == 1 && threads_per_core > 1
|
|
# special case for single-threaded VMs
|
|
cores_from_cpus = 1r
|
|
threads_per_core = 1
|
|
else
|
|
cores_from_cpus = Rational(vcpus) / threads_per_core
|
|
end
|
|
proportion = cores_from_cpus / vm_host.total_cores
|
|
packages = (total_packages * proportion).ceil
|
|
dies_per_package = (total_dies_per_package * proportion).ceil
|
|
cores_per_die = cores_from_cpus / (packages * dies_per_package)
|
|
fail "BUG: need uniform number of cores allocated per die" unless cores_per_die.denominator == 1
|
|
|
|
topo = [threads_per_core, cores_per_die, dies_per_package, packages].map { |num|
|
|
# :nocov:
|
|
fail "BUG: non-integer in topology array" unless num.denominator == 1
|
|
# :nocov:
|
|
Integer(num)
|
|
}
|
|
|
|
# :nocov:
|
|
unless topo.reduce(:*) == vcpus
|
|
fail "BUG: arithmetic does not result in the correct number of vcpus"
|
|
end
|
|
# :nocov:
|
|
|
|
CloudHypervisorCpuTopo.new(*topo)
|
|
end
|
|
|
|
# Reverse look-up the vm_size instance that was used to create this VM
|
|
# and use its name as a display name.
|
|
def display_size
|
|
vm_size = Option::VmSizes.find {
|
|
it.family == family &&
|
|
it.arch == arch &&
|
|
it.vcpus == vcpus &&
|
|
(cpu_percent_limit.nil? || it.cpu_percent_limit == cpu_percent_limit)
|
|
}
|
|
vm_size.name
|
|
end
|
|
|
|
# Various names in linux, like interface names, are obliged to be
|
|
# short, so truncate the ubid. This does introduce the spectre of
|
|
# collisions. When the time comes, we'll have to ensure it doesn't
|
|
# happen on a single host, pushing into the allocation process.
|
|
def self.ubid_to_name(id)
|
|
id.to_s[0..7]
|
|
end
|
|
|
|
def inhost_name
|
|
self.class.ubid_to_name(UBID.from_uuidish(id))
|
|
end
|
|
|
|
def storage_size_gib
|
|
vm_storage_volumes.map { it.size_gib }.sum
|
|
end
|
|
|
|
def init_health_monitor_session
|
|
{
|
|
ssh_session: vm_host.sshable.start_fresh_session
|
|
}
|
|
end
|
|
|
|
def check_pulse(session:, previous_pulse:)
|
|
reading = begin
|
|
session[:ssh_session].exec!("systemctl is-active #{inhost_name} #{inhost_name}-dnsmasq").split("\n").all?("active") ? "up" : "down"
|
|
rescue
|
|
"down"
|
|
end
|
|
pulse = aggregate_readings(previous_pulse: previous_pulse, reading: reading)
|
|
|
|
if pulse[:reading] == "down" && pulse[:reading_rpt] > 5 && Time.now - pulse[:reading_chg] > 30 && !reload.checkup_set?
|
|
incr_checkup
|
|
end
|
|
|
|
pulse
|
|
end
|
|
|
|
def update_spdk_version(version)
|
|
spdk_installation = vm_host.spdk_installations_dataset[version: version]
|
|
fail "SPDK version #{version} not found on host" unless spdk_installation
|
|
vm_storage_volumes_dataset.update(spdk_installation_id: spdk_installation.id)
|
|
incr_update_spdk_dependency
|
|
end
|
|
|
|
def params_json(swap_size_bytes: nil, ch_version: nil, firmware_version: nil, hugepages: nil)
|
|
topo = cloud_hypervisor_cpu_topology
|
|
|
|
project_public_keys = project.get_ff_vm_public_ssh_keys || []
|
|
|
|
# we don't write secrets to params_json, because it
|
|
# shouldn't be stored in the host for security reasons.
|
|
JSON.pretty_generate(
|
|
vm_name: name,
|
|
public_ipv6: ephemeral_net6.to_s,
|
|
public_ipv4: ip4.to_s || "",
|
|
local_ipv4: local_vetho_ip.to_s.shellescape || "",
|
|
dns_ipv4: nic.private_subnet.net4.nth(2).to_s,
|
|
unix_user:,
|
|
ssh_public_keys: [public_key] + project_public_keys,
|
|
nics: nics.map { [it.private_ipv6.to_s, it.private_ipv4.to_s, it.ubid_to_tap_name, it.mac, it.private_ipv4_gateway] },
|
|
boot_image:,
|
|
max_vcpus: topo.max_vcpus,
|
|
cpu_topology: topo.to_s,
|
|
mem_gib: memory_gib,
|
|
ndp_needed: vm_host.ndp_needed,
|
|
storage_volumes:,
|
|
swap_size_bytes:,
|
|
pci_devices: pci_devices.map { [it.slot, it.iommu_group] },
|
|
slice_name: vm_host_slice&.inhost_name || "system.slice",
|
|
cpu_percent_limit: cpu_percent_limit || 0,
|
|
cpu_burst_percent_limit: cpu_burst_percent_limit || 0,
|
|
ch_version:,
|
|
firmware_version:,
|
|
hugepages:
|
|
)
|
|
end
|
|
|
|
def storage_volumes
|
|
vm_storage_volumes.map { |s|
|
|
{
|
|
"boot" => s.boot,
|
|
"image" => s.boot_image&.name,
|
|
"image_version" => s.boot_image&.version,
|
|
"size_gib" => s.size_gib,
|
|
"device_id" => s.device_id,
|
|
"disk_index" => s.disk_index,
|
|
"encrypted" => !s.key_encryption_key_1.nil?,
|
|
"spdk_version" => s.spdk_version,
|
|
"vhost_block_backend_version" => s.vhost_block_backend_version,
|
|
"use_bdev_ubi" => s.use_bdev_ubi,
|
|
"skip_sync" => s.skip_sync,
|
|
"storage_device" => s.storage_device.name,
|
|
"read_only" => s.size_gib == 0,
|
|
"max_read_mbytes_per_sec" => s.max_read_mbytes_per_sec,
|
|
"max_write_mbytes_per_sec" => s.max_write_mbytes_per_sec,
|
|
"slice_name" => vm_host_slice&.inhost_name || "system.slice",
|
|
"num_queues" => s.num_queues,
|
|
"queue_size" => s.queue_size,
|
|
"copy_on_read" => false
|
|
}
|
|
}
|
|
end
|
|
|
|
def storage_secrets
|
|
vm_storage_volumes.filter_map { |s|
|
|
if !s.key_encryption_key_1.nil?
|
|
[s.device_id, s.key_encryption_key_1.secret_key_material_hash]
|
|
end
|
|
}.to_h
|
|
end
|
|
|
|
ssh_public_key_line = /(([^# \r\n]|"[^"\r\n]+")+ +)? *[^# \r\n]+ +[A-Za-z0-9+\/]+=*( +[^\r\n]*)?/
|
|
VALID_SSH_PUBLIC_KEY_LINE = /^#{ssh_public_key_line}\r?$/
|
|
VALID_SSH_AUTHORIZED_KEYS = /\A(([ \t]*|(#[^\r\n]*)?|#{ssh_public_key_line})(\r?\n|\z))+\z/
|
|
|
|
def validate
|
|
super
|
|
if new?
|
|
validates_format(VALID_SSH_AUTHORIZED_KEYS, :public_key, message: "invalid SSH public key format")
|
|
unless errors.on(:public_key)
|
|
validates_format(VALID_SSH_PUBLIC_KEY_LINE, :public_key, message: "must contain at least one valid SSH public key")
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
# Table: vm
|
|
# Columns:
|
|
# id | uuid | PRIMARY KEY
|
|
# ephemeral_net6 | cidr |
|
|
# vm_host_id | uuid |
|
|
# unix_user | text | NOT NULL
|
|
# public_key | text | NOT NULL
|
|
# display_state | vm_display_state | NOT NULL DEFAULT 'creating'::vm_display_state
|
|
# name | text | NOT NULL
|
|
# boot_image | text | NOT NULL
|
|
# local_vetho_ip | text |
|
|
# ip4_enabled | boolean | NOT NULL DEFAULT false
|
|
# family | text | NOT NULL
|
|
# cores | integer | NOT NULL
|
|
# pool_id | uuid |
|
|
# created_at | timestamp with time zone | NOT NULL DEFAULT now()
|
|
# arch | arch | NOT NULL DEFAULT 'x64'::arch
|
|
# allocated_at | timestamp with time zone |
|
|
# provisioned_at | timestamp with time zone |
|
|
# vcpus | integer | NOT NULL
|
|
# memory_gib | integer | NOT NULL
|
|
# vm_host_slice_id | uuid |
|
|
# project_id | uuid | NOT NULL
|
|
# cpu_percent_limit | integer |
|
|
# cpu_burst_percent_limit | integer |
|
|
# location_id | uuid | NOT NULL
|
|
# Indexes:
|
|
# vm_pkey | PRIMARY KEY btree (id)
|
|
# vm_ephemeral_net6_key | UNIQUE btree (ephemeral_net6)
|
|
# vm_project_id_location_id_name_uidx | UNIQUE btree (project_id, location_id, name)
|
|
# vm_pool_id_index | btree (pool_id) WHERE pool_id IS NOT NULL
|
|
# Foreign key constraints:
|
|
# vm_location_id_fkey | (location_id) REFERENCES location(id)
|
|
# vm_pool_id_fkey | (pool_id) REFERENCES vm_pool(id)
|
|
# vm_project_id_fkey | (project_id) REFERENCES project(id)
|
|
# vm_vm_host_id_fkey | (vm_host_id) REFERENCES vm_host(id)
|
|
# vm_vm_host_slice_id_fkey | (vm_host_slice_id) REFERENCES vm_host_slice(id)
|
|
# Referenced By:
|
|
# assigned_vm_address | assigned_vm_address_dst_vm_id_fkey | (dst_vm_id) REFERENCES vm(id)
|
|
# dns_servers_vms | dns_servers_vms_vm_id_fkey | (vm_id) REFERENCES vm(id)
|
|
# inference_endpoint_replica | inference_endpoint_replica_vm_id_fkey | (vm_id) REFERENCES vm(id)
|
|
# inference_router_replica | inference_router_replica_vm_id_fkey | (vm_id) REFERENCES vm(id)
|
|
# kubernetes_clusters_cp_vms | kubernetes_clusters_cp_vms_cp_vm_id_fkey | (cp_vm_id) REFERENCES vm(id)
|
|
# kubernetes_nodepools_vms | kubernetes_nodepools_vms_vm_id_fkey | (vm_id) REFERENCES vm(id)
|
|
# load_balancers_vms | load_balancers_vms_vm_id_fkey | (vm_id) REFERENCES vm(id)
|
|
# minio_server | minio_server_vm_id_fkey | (vm_id) REFERENCES vm(id)
|
|
# nic | nic_vm_id_fkey | (vm_id) REFERENCES vm(id)
|
|
# pci_device | pci_device_vm_id_fkey | (vm_id) REFERENCES vm(id)
|
|
# postgres_server | postgres_server_vm_id_fkey | (vm_id) REFERENCES vm(id)
|
|
# victoria_metrics_server | victoria_metrics_server_vm_id_fkey | (vm_id) REFERENCES vm(id)
|
|
# vm_storage_volume | vm_storage_volume_vm_id_fkey | (vm_id) REFERENCES vm(id)
|