ubicloud/model/vm.rb

# frozen_string_literal: true

require "jwt"
require_relative "../model"

class Vm < Sequel::Model
  one_to_one :strand, key: :id
  many_to_one :vm_host
  one_to_many :nics, key: :vm_id, class: :Nic
  many_to_many :private_subnets, join_table: :nic, left_key: :vm_id, right_key: :private_subnet_id
  one_to_one :sshable, key: :id
  one_to_one :assigned_vm_address, key: :dst_vm_id, class: :AssignedVmAddress
  one_to_many :vm_storage_volumes, key: :vm_id, order: Sequel.desc(:boot)
  one_to_many :active_billing_records, class: :BillingRecord, key: :resource_id do |ds| ds.active end
  one_to_many :pci_devices, key: :vm_id, class: :PciDevice
  one_through_one :load_balancer, left_key: :vm_id, right_key: :load_balancer_id, join_table: :load_balancers_vms
  one_to_one :load_balancers_vms, key: :vm_id, class: :LoadBalancersVms

  plugin :association_dependencies, sshable: :destroy, assigned_vm_address: :destroy, vm_storage_volumes: :destroy, load_balancers_vms: :destroy

  dataset_module Pagination
  dataset_module Authorization::Dataset

  include ResourceMethods
  include SemaphoreMethods
  include HealthMonitorMethods
  semaphore :destroy, :start_after_host_reboot, :prevent_destroy, :update_firewall_rules, :checkup, :update_spdk_dependency, :waiting_for_capacity, :lb_expiry_started

  include Authorization::HyperTagMethods

  def hyper_tag_name(project)
    "project/#{project.ubid}/location/#{display_location}/vm/#{name}"
  end

  include Authorization::TaggableMethods

  def firewalls
    private_subnets.flat_map(&:firewalls)
  end

  def display_location
    LocationNameConverter.to_display_name(location)
  end

  def path
    "/location/#{display_location}/vm/#{name}"
  end

  def ephemeral_net4
    assigned_vm_address&.ip&.network
  end

  def ip4
    assigned_vm_address&.ip
  end

  def runtime_token
    JWT.encode({sub: ubid, iat: Time.now.to_i}, Config.clover_runtime_token_secret, "HS256")
  end

  def display_state
    return "deleting" if destroy_set? || strand&.label == "destroy"
    if waiting_for_capacity_set?
      return "no capacity available" if Time.now - created_at > 15 * 60
      return "waiting for capacity"
    end
    super
  end

  def mem_gib_ratio
    return 3.2 if arch == "arm64"
    # Special case for GPUs
    return 10.68 if family == "standard-gpu"
    8
  end

  def mem_gib
    (cores * mem_gib_ratio).to_i
  end

  # cloud-hypervisor takes topology information in this format:
  #
  # topology=<threads_per_core>:<cores_per_die>:<dies_per_package>:<packages>
  #
  # And the result of multiplication must equal the thread/vcpu count
  # we wish to allocate:
  #
  #     let total = t.threads_per_core * t.cores_per_die * t.dies_per_package * t.packages;
  #     if total != self.cpus.max_vcpus {
  #         return Err(ValidationError::CpuTopologyCount);
  #     }
  CloudHypervisorCpuTopo = Struct.new(:threads_per_core, :cores_per_die, :dies_per_package, :packages) do
    def to_s
      to_a.map(&:to_s).join(":")
    end

    def max_vcpus
      @max_vcpus ||= to_a.reduce(&:*)
    end
  end

  def cloud_hypervisor_cpu_topology
    threads_per_core, r = vm_host.total_cpus.divmod vm_host.total_cores
    fail "BUG" unless r.zero?

    total_dies_per_package, r = vm_host.total_dies.divmod vm_host.total_sockets
    fail "BUG" unless r.zero?

    total_packages = vm_host.total_sockets

    # Computed all-system statistics, now scale it down to meet VM needs.
    proportion = Rational(cores) / vm_host.total_cores
    packages = (total_packages * proportion).ceil
    dies_per_package = (total_dies_per_package * proportion).ceil
    cores_per_die = Rational(cores) / (packages * dies_per_package)
    fail "BUG: need uniform number of cores allocated per die" unless cores_per_die.denominator == 1

    topo = [threads_per_core, cores_per_die, dies_per_package, packages].map { |num|
      # :nocov:
      fail "BUG: non-integer in topology array" unless num.denominator == 1
      # :nocov:
      Integer(num)
    }

    # :nocov:
    unless topo.reduce(&:*) == threads_per_core * cores
      fail "BUG: arithmetic does not result in the correct number of vcpus"
    end
    # :nocov:

    CloudHypervisorCpuTopo.new(*topo)
  end

  def display_size
    # With additional product families, it is likely that we hit a
    # case where this conversion wouldn't work. We can use map or
    # when/case block at that time.

    # Define suffix integer as 2 * numcores. This coincides with
    # SMT-enabled x86 processors, to give people the right idea if
    # they compare the product code integer to the preponderance of
    # spec sheets on the web.
    #
    # With non-SMT processors, maybe we'll keep it that way too,
    # even though it doesn't describe any attribute about the
    # processor.  But, it does allow "standard-2" is compared to
    # another "standard-2" variant regardless of SMT,
    # e.g. "standard-2-arm", instead of making people interpreting
    # the code adjust the scale factor to do the comparison
    # themselves.
    #
    # Another weakness of this approach, besides it being indirect
    # in description of non-SMT processors, is having "standard-2"
    # be the smallest unit of product is also noisier than
    # "standard-1".
    "#{family}-#{cores * 2}"
  end

  # Various names in linux, like interface names, are obliged to be
  # short, so truncate the ubid. This does introduce the spectre of
  # collisions.  When the time comes, we'll have to ensure it doesn't
  # happen on a single host, pushing into the allocation process.
  def self.ubid_to_name(id)
    id.to_s[0..7]
  end

  def inhost_name
    self.class.ubid_to_name(UBID.from_uuidish(id))
  end

  def storage_size_gib
    vm_storage_volumes.map { _1.size_gib }.sum
  end

  def init_health_monitor_session
    {
      ssh_session: vm_host.sshable.start_fresh_session
    }
  end

  def check_pulse(session:, previous_pulse:)
    reading = begin
      session[:ssh_session].exec!("systemctl is-active #{inhost_name} #{inhost_name}-dnsmasq").split("\n").all?("active") ? "up" : "down"
    rescue
      "down"
    end
    pulse = aggregate_readings(previous_pulse: previous_pulse, reading: reading)

    if pulse[:reading] == "down" && pulse[:reading_rpt] > 5 && Time.now - pulse[:reading_chg] > 30 && !reload.checkup_set?
      incr_checkup
    end

    pulse
  end

  def update_spdk_version(version)
    spdk_installation = vm_host.spdk_installations_dataset[version: version]
    fail "SPDK version #{version} not found on host" unless spdk_installation
    vm_storage_volumes_dataset.update(spdk_installation_id: spdk_installation.id)
    incr_update_spdk_dependency
  end

  def self.redacted_columns
    super + [:public_key]
  end

  def params_json(swap_size_bytes)
    topo = cloud_hypervisor_cpu_topology

    project_public_keys = projects.first.get_ff_vm_public_ssh_keys || []

    # we don't write secrets to params_json, because it
    # shouldn't be stored in the host for security reasons.
    JSON.pretty_generate({
      "vm_name" => name,
      "public_ipv6" => ephemeral_net6.to_s,
      "public_ipv4" => ip4.to_s || "",
      "local_ipv4" => local_vetho_ip.to_s.shellescape || "",
      "dns_ipv4" => nics.first.private_subnet.net4.nth(2).to_s,
      "unix_user" => unix_user,
      "ssh_public_keys" => [public_key] + project_public_keys,
      "nics" => nics.map { |nic| [nic.private_ipv6.to_s, nic.private_ipv4.to_s, nic.ubid_to_tap_name, nic.mac, nic.private_ipv4_gateway] },
      "boot_image" => boot_image,
      "max_vcpus" => topo.max_vcpus,
      "cpu_topology" => topo.to_s,
      "mem_gib" => mem_gib,
      "ndp_needed" => vm_host.ndp_needed,
      "storage_volumes" => storage_volumes,
      "swap_size_bytes" => swap_size_bytes,
      "pci_devices" => pci_devices.map { [_1.slot, _1.iommu_group] }
    })
  end

  def storage_volumes
    vm_storage_volumes.map { |s|
      {
        "boot" => s.boot,
        "image" => s.boot_image&.name,
        "image_version" => s.boot_image&.version,
        "size_gib" => s.size_gib,
        "device_id" => s.device_id,
        "disk_index" => s.disk_index,
        "encrypted" => !s.key_encryption_key_1.nil?,
        "spdk_version" => s.spdk_version,
        "use_bdev_ubi" => s.use_bdev_ubi,
        "skip_sync" => s.skip_sync,
        "storage_device" => s.storage_device.name,
        "read_only" => s.size_gib == 0,
        "max_ios_per_sec" => s.max_ios_per_sec,
        "max_read_mbytes_per_sec" => s.max_read_mbytes_per_sec,
        "max_write_mbytes_per_sec" => s.max_write_mbytes_per_sec
      }
    }
  end

  def storage_secrets
    vm_storage_volumes.filter_map { |s|
      if !s.key_encryption_key_1.nil?
        [s.device_id, s.key_encryption_key_1.secret_key_material_hash]
      end
    }.to_h
  end
end

# Table: vm
# Columns:
#  id             | uuid                     | PRIMARY KEY
#  ephemeral_net6 | cidr                     |
#  vm_host_id     | uuid                     |
#  unix_user      | text                     | NOT NULL
#  public_key     | text                     | NOT NULL
#  display_state  | vm_display_state         | NOT NULL DEFAULT 'creating'::vm_display_state
#  name           | text                     | NOT NULL
#  location       | text                     | NOT NULL
#  boot_image     | text                     | NOT NULL
#  local_vetho_ip | text                     |
#  ip4_enabled    | boolean                  | NOT NULL DEFAULT false
#  family         | text                     | NOT NULL
#  cores          | integer                  | NOT NULL
#  pool_id        | uuid                     |
#  created_at     | timestamp with time zone | NOT NULL DEFAULT now()
#  arch           | arch                     | NOT NULL DEFAULT 'x64'::arch
#  allocated_at   | timestamp with time zone |
#  provisioned_at | timestamp with time zone |
# Indexes:
#  vm_pkey               | PRIMARY KEY btree (id)
#  vm_ephemeral_net6_key | UNIQUE btree (ephemeral_net6)
# Foreign key constraints:
#  vm_pool_id_fkey    | (pool_id) REFERENCES vm_pool(id)
#  vm_vm_host_id_fkey | (vm_host_id) REFERENCES vm_host(id)
# Referenced By:
#  assigned_vm_address        | assigned_vm_address_dst_vm_id_fkey    | (dst_vm_id) REFERENCES vm(id)
#  dns_servers_vms            | dns_servers_vms_vm_id_fkey            | (vm_id) REFERENCES vm(id)
#  inference_endpoint_replica | inference_endpoint_replica_vm_id_fkey | (vm_id) REFERENCES vm(id)
#  load_balancers_vms         | load_balancers_vms_vm_id_fkey         | (vm_id) REFERENCES vm(id)
#  minio_server               | minio_server_vm_id_fkey               | (vm_id) REFERENCES vm(id)
#  nic                        | nic_vm_id_fkey                        | (vm_id) REFERENCES vm(id)
#  pci_device                 | pci_device_vm_id_fkey                 | (vm_id) REFERENCES vm(id)
#  postgres_server            | postgres_server_vm_id_fkey            | (vm_id) REFERENCES vm(id)
#  vm_storage_volume          | vm_storage_volume_vm_id_fkey          | (vm_id) REFERENCES vm(id)