This allows us to remove the :load_balancer option from the VM serializer and the :vms_serialized option from the LoadBalancer serializer. Add some model methods for code that was previously in the serializers. To a better job of testing for expected values on the load balancer show page, instead of using the non-specific have_content for everything.
343 lines
13 KiB
Ruby
343 lines
13 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require "jwt"
|
|
require_relative "../model"
|
|
|
|
class Vm < Sequel::Model
|
|
one_to_one :strand, key: :id
|
|
many_to_one :vm_host
|
|
many_to_one :project
|
|
one_to_many :nics
|
|
many_to_many :private_subnets, join_table: :nic
|
|
one_to_one :sshable, key: :id
|
|
one_to_one :assigned_vm_address, key: :dst_vm_id
|
|
one_to_many :vm_storage_volumes, order: Sequel.desc(:boot)
|
|
one_to_many :active_billing_records, class: :BillingRecord, key: :resource_id, &:active
|
|
one_to_many :pci_devices
|
|
one_through_one :load_balancer
|
|
one_to_one :load_balancer_vm
|
|
many_to_many :load_balancer_vm_ports, join_table: :load_balancers_vms, right_key: :id, right_primary_key: :load_balancer_vm_id, read_only: true
|
|
many_to_one :vm_host_slice
|
|
many_to_one :location
|
|
one_to_one :aws_instance, key: :id
|
|
|
|
many_through_many :firewalls,
|
|
[
|
|
[:nic, :vm_id, :private_subnet_id],
|
|
[:firewalls_private_subnets, :private_subnet_id, :firewall_id]
|
|
]
|
|
|
|
plugin :association_dependencies, sshable: :destroy, assigned_vm_address: :destroy, vm_storage_volumes: :destroy, load_balancer_vm: :destroy
|
|
|
|
dataset_module Pagination
|
|
|
|
plugin ResourceMethods, redacted_columns: :public_key
|
|
plugin SemaphoreMethods, :destroy, :start_after_host_reboot, :prevent_destroy, :update_firewall_rules,
|
|
:checkup, :update_spdk_dependency, :waiting_for_capacity, :lb_expiry_started, :restart, :stop
|
|
include HealthMonitorMethods
|
|
|
|
include ObjectTag::Cleanup
|
|
|
|
def display_location
|
|
location.display_name
|
|
end
|
|
|
|
def load_balancer_state
|
|
load_balancer_vm_ports.first&.state
|
|
end
|
|
|
|
def path
|
|
"/location/#{display_location}/vm/#{name}"
|
|
end
|
|
|
|
def ephemeral_net4
|
|
assigned_vm_address&.ip&.network
|
|
end
|
|
|
|
def ip4
|
|
assigned_vm_address&.ip
|
|
end
|
|
|
|
def ip6
|
|
location.aws? ? ephemeral_net6&.nth(0) : ephemeral_net6&.nth(2)
|
|
end
|
|
|
|
def nic
|
|
nics.first
|
|
end
|
|
|
|
def private_ipv4
|
|
ipv4 = nic.private_ipv4
|
|
(ipv4.netmask.prefix_len == 32) ? ipv4.network : ipv4.nth(1)
|
|
end
|
|
|
|
def private_ipv6
|
|
nic.private_ipv6.nth(2)
|
|
end
|
|
|
|
def runtime_token
|
|
JWT.encode({sub: ubid, iat: Time.now.to_i}, Config.clover_runtime_token_secret, "HS256")
|
|
end
|
|
|
|
def display_state
|
|
label = strand&.label
|
|
return "deleting" if destroy_set? || label == "destroy"
|
|
return "restarting" if restart_set? || label == "restart"
|
|
return "stopped" if stop_set? || label == "stopped"
|
|
if waiting_for_capacity_set?
|
|
return "no capacity available" if Time.now - created_at > 15 * 60
|
|
return "waiting for capacity"
|
|
end
|
|
super
|
|
end
|
|
|
|
# cloud-hypervisor takes topology information in this format:
|
|
#
|
|
# topology=<threads_per_core>:<cores_per_die>:<dies_per_package>:<packages>
|
|
#
|
|
# And the result of multiplication must equal the thread/vcpu count
|
|
# we wish to allocate:
|
|
#
|
|
# let total = t.threads_per_core * t.cores_per_die * t.dies_per_package * t.packages;
|
|
# if total != self.cpus.max_vcpus {
|
|
# return Err(ValidationError::CpuTopologyCount);
|
|
# }
|
|
CloudHypervisorCpuTopo = Struct.new(:threads_per_core, :cores_per_die, :dies_per_package, :packages) do
|
|
def to_s
|
|
to_a.join(":")
|
|
end
|
|
|
|
def max_vcpus
|
|
@max_vcpus ||= to_a.reduce(:*)
|
|
end
|
|
end
|
|
|
|
def cloud_hypervisor_cpu_topology
|
|
threads_per_core, r = vm_host.total_cpus.divmod vm_host.total_cores
|
|
fail "BUG" unless r.zero?
|
|
|
|
total_dies_per_package, r = vm_host.total_dies.divmod vm_host.total_sockets
|
|
fail "BUG" unless r.zero?
|
|
|
|
total_packages = vm_host.total_sockets
|
|
|
|
# Computed all-system statistics, now scale it down to meet VM needs.
|
|
if vcpus == 1 && threads_per_core > 1
|
|
# special case for single-threaded VMs
|
|
cores_from_cpus = 1r
|
|
threads_per_core = 1
|
|
else
|
|
cores_from_cpus = Rational(vcpus) / threads_per_core
|
|
end
|
|
proportion = cores_from_cpus / vm_host.total_cores
|
|
packages = (total_packages * proportion).ceil
|
|
dies_per_package = (total_dies_per_package * proportion).ceil
|
|
cores_per_die = cores_from_cpus / (packages * dies_per_package)
|
|
fail "BUG: need uniform number of cores allocated per die" unless cores_per_die.denominator == 1
|
|
|
|
topo = [threads_per_core, cores_per_die, dies_per_package, packages].map { |num|
|
|
# :nocov:
|
|
fail "BUG: non-integer in topology array" unless num.denominator == 1
|
|
# :nocov:
|
|
Integer(num)
|
|
}
|
|
|
|
# :nocov:
|
|
unless topo.reduce(:*) == vcpus
|
|
fail "BUG: arithmetic does not result in the correct number of vcpus"
|
|
end
|
|
# :nocov:
|
|
|
|
CloudHypervisorCpuTopo.new(*topo)
|
|
end
|
|
|
|
# Reverse look-up the vm_size instance that was used to create this VM
|
|
# and use its name as a display name.
|
|
def display_size
|
|
vm_size = Option::VmSizes.find {
|
|
it.family == family &&
|
|
it.arch == arch &&
|
|
it.vcpus == vcpus &&
|
|
(cpu_percent_limit.nil? || it.cpu_percent_limit == cpu_percent_limit)
|
|
}
|
|
vm_size.name
|
|
end
|
|
|
|
# Various names in linux, like interface names, are obliged to be
|
|
# short, so truncate the ubid. This does introduce the spectre of
|
|
# collisions. When the time comes, we'll have to ensure it doesn't
|
|
# happen on a single host, pushing into the allocation process.
|
|
def self.ubid_to_name(id)
|
|
id.to_s[0..7]
|
|
end
|
|
|
|
def inhost_name
|
|
self.class.ubid_to_name(UBID.from_uuidish(id))
|
|
end
|
|
|
|
def storage_size_gib
|
|
vm_storage_volumes.map { it.size_gib }.sum
|
|
end
|
|
|
|
def init_health_monitor_session
|
|
{
|
|
ssh_session: vm_host.sshable.start_fresh_session
|
|
}
|
|
end
|
|
|
|
def check_pulse(session:, previous_pulse:)
|
|
reading = begin
|
|
session[:ssh_session].exec!("systemctl is-active #{inhost_name} #{inhost_name}-dnsmasq").split("\n").all?("active") ? "up" : "down"
|
|
rescue
|
|
"down"
|
|
end
|
|
pulse = aggregate_readings(previous_pulse: previous_pulse, reading: reading)
|
|
|
|
if pulse[:reading] == "down" && pulse[:reading_rpt] > 5 && Time.now - pulse[:reading_chg] > 30 && !reload.checkup_set?
|
|
incr_checkup
|
|
end
|
|
|
|
pulse
|
|
end
|
|
|
|
def update_spdk_version(version)
|
|
spdk_installation = vm_host.spdk_installations_dataset[version: version]
|
|
fail "SPDK version #{version} not found on host" unless spdk_installation
|
|
vm_storage_volumes_dataset.update(spdk_installation_id: spdk_installation.id)
|
|
incr_update_spdk_dependency
|
|
end
|
|
|
|
def params_json(swap_size_bytes: nil, ch_version: nil, firmware_version: nil, hugepages: nil)
|
|
topo = cloud_hypervisor_cpu_topology
|
|
|
|
project_public_keys = project.get_ff_vm_public_ssh_keys || []
|
|
|
|
# we don't write secrets to params_json, because it
|
|
# shouldn't be stored in the host for security reasons.
|
|
JSON.pretty_generate(
|
|
vm_name: name,
|
|
public_ipv6: ephemeral_net6.to_s,
|
|
public_ipv4: ip4.to_s || "",
|
|
local_ipv4: local_vetho_ip.to_s.shellescape || "",
|
|
dns_ipv4: nic.private_subnet.net4.nth(2).to_s,
|
|
unix_user:,
|
|
ssh_public_keys: [public_key] + project_public_keys,
|
|
nics: nics.map { [it.private_ipv6.to_s, it.private_ipv4.to_s, it.ubid_to_tap_name, it.mac, it.private_ipv4_gateway] },
|
|
boot_image:,
|
|
max_vcpus: topo.max_vcpus,
|
|
cpu_topology: topo.to_s,
|
|
mem_gib: memory_gib,
|
|
ndp_needed: vm_host.ndp_needed,
|
|
storage_volumes:,
|
|
swap_size_bytes:,
|
|
pci_devices: pci_devices.map { [it.slot, it.iommu_group] },
|
|
slice_name: vm_host_slice&.inhost_name || "system.slice",
|
|
cpu_percent_limit: cpu_percent_limit || 0,
|
|
cpu_burst_percent_limit: cpu_burst_percent_limit || 0,
|
|
ch_version:,
|
|
firmware_version:,
|
|
hugepages:
|
|
)
|
|
end
|
|
|
|
def storage_volumes
|
|
vm_storage_volumes.map { |s|
|
|
{
|
|
"boot" => s.boot,
|
|
"image" => s.boot_image&.name,
|
|
"image_version" => s.boot_image&.version,
|
|
"size_gib" => s.size_gib,
|
|
"device_id" => s.device_id,
|
|
"disk_index" => s.disk_index,
|
|
"encrypted" => !s.key_encryption_key_1.nil?,
|
|
"spdk_version" => s.spdk_version,
|
|
"vhost_block_backend_version" => s.vhost_block_backend_version,
|
|
"use_bdev_ubi" => s.use_bdev_ubi,
|
|
"skip_sync" => s.skip_sync,
|
|
"storage_device" => s.storage_device.name,
|
|
"read_only" => s.size_gib == 0,
|
|
"max_read_mbytes_per_sec" => s.max_read_mbytes_per_sec,
|
|
"max_write_mbytes_per_sec" => s.max_write_mbytes_per_sec,
|
|
"slice_name" => vm_host_slice&.inhost_name || "system.slice",
|
|
"num_queues" => s.num_queues,
|
|
"queue_size" => s.queue_size,
|
|
"copy_on_read" => false
|
|
}
|
|
}
|
|
end
|
|
|
|
def storage_secrets
|
|
vm_storage_volumes.filter_map { |s|
|
|
if !s.key_encryption_key_1.nil?
|
|
[s.device_id, s.key_encryption_key_1.secret_key_material_hash]
|
|
end
|
|
}.to_h
|
|
end
|
|
|
|
ssh_public_key_line = /(([^# \r\n]|"[^"\r\n]+")+ +)? *[^# \r\n]+ +[A-Za-z0-9+\/]+=*( +[^\r\n]*)?/
|
|
VALID_SSH_PUBLIC_KEY_LINE = /^#{ssh_public_key_line}\r?$/
|
|
VALID_SSH_AUTHORIZED_KEYS = /\A(([ \t]*|(#[^\r\n]*)?|#{ssh_public_key_line})(\r?\n|\z))+\z/
|
|
|
|
def validate
|
|
super
|
|
if new?
|
|
validates_format(VALID_SSH_AUTHORIZED_KEYS, :public_key, message: "invalid SSH public key format")
|
|
unless errors.on(:public_key)
|
|
validates_format(VALID_SSH_PUBLIC_KEY_LINE, :public_key, message: "must contain at least one valid SSH public key")
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
# Table: vm
|
|
# Columns:
|
|
# id | uuid | PRIMARY KEY
|
|
# ephemeral_net6 | cidr |
|
|
# vm_host_id | uuid |
|
|
# unix_user | text | NOT NULL
|
|
# public_key | text | NOT NULL
|
|
# display_state | vm_display_state | NOT NULL DEFAULT 'creating'::vm_display_state
|
|
# name | text | NOT NULL
|
|
# boot_image | text | NOT NULL
|
|
# local_vetho_ip | text |
|
|
# ip4_enabled | boolean | NOT NULL DEFAULT false
|
|
# family | text | NOT NULL
|
|
# cores | integer | NOT NULL
|
|
# pool_id | uuid |
|
|
# created_at | timestamp with time zone | NOT NULL DEFAULT now()
|
|
# arch | arch | NOT NULL DEFAULT 'x64'::arch
|
|
# allocated_at | timestamp with time zone |
|
|
# provisioned_at | timestamp with time zone |
|
|
# vcpus | integer | NOT NULL
|
|
# memory_gib | integer | NOT NULL
|
|
# vm_host_slice_id | uuid |
|
|
# project_id | uuid | NOT NULL
|
|
# cpu_percent_limit | integer |
|
|
# cpu_burst_percent_limit | integer |
|
|
# location_id | uuid | NOT NULL
|
|
# Indexes:
|
|
# vm_pkey | PRIMARY KEY btree (id)
|
|
# vm_ephemeral_net6_key | UNIQUE btree (ephemeral_net6)
|
|
# vm_project_id_location_id_name_uidx | UNIQUE btree (project_id, location_id, name)
|
|
# vm_pool_id_index | btree (pool_id) WHERE pool_id IS NOT NULL
|
|
# Foreign key constraints:
|
|
# vm_location_id_fkey | (location_id) REFERENCES location(id)
|
|
# vm_pool_id_fkey | (pool_id) REFERENCES vm_pool(id)
|
|
# vm_project_id_fkey | (project_id) REFERENCES project(id)
|
|
# vm_vm_host_id_fkey | (vm_host_id) REFERENCES vm_host(id)
|
|
# vm_vm_host_slice_id_fkey | (vm_host_slice_id) REFERENCES vm_host_slice(id)
|
|
# Referenced By:
|
|
# assigned_vm_address | assigned_vm_address_dst_vm_id_fkey | (dst_vm_id) REFERENCES vm(id)
|
|
# dns_servers_vms | dns_servers_vms_vm_id_fkey | (vm_id) REFERENCES vm(id)
|
|
# inference_endpoint_replica | inference_endpoint_replica_vm_id_fkey | (vm_id) REFERENCES vm(id)
|
|
# inference_router_replica | inference_router_replica_vm_id_fkey | (vm_id) REFERENCES vm(id)
|
|
# kubernetes_clusters_cp_vms | kubernetes_clusters_cp_vms_cp_vm_id_fkey | (cp_vm_id) REFERENCES vm(id)
|
|
# kubernetes_nodepools_vms | kubernetes_nodepools_vms_vm_id_fkey | (vm_id) REFERENCES vm(id)
|
|
# load_balancers_vms | load_balancers_vms_vm_id_fkey | (vm_id) REFERENCES vm(id)
|
|
# minio_server | minio_server_vm_id_fkey | (vm_id) REFERENCES vm(id)
|
|
# nic | nic_vm_id_fkey | (vm_id) REFERENCES vm(id)
|
|
# pci_device | pci_device_vm_id_fkey | (vm_id) REFERENCES vm(id)
|
|
# postgres_server | postgres_server_vm_id_fkey | (vm_id) REFERENCES vm(id)
|
|
# victoria_metrics_server | victoria_metrics_server_vm_id_fkey | (vm_id) REFERENCES vm(id)
|
|
# vm_storage_volume | vm_storage_volume_vm_id_fkey | (vm_id) REFERENCES vm(id)
|