Using the VmHostSlice introduced earlier to allocate a Standard VM. The main premise here is that VMs-in-slices are hosted on separate hosts from standalone VMs (the current code). This logic is guarded by two flags: * VmHost.accept_slices - indicates host instances that can accept VMs allocated in slices. Hosts with this flag set to `true` will not be used for standalone VMs * Project feature flag - `use_slices_for_allocation` - only projects with this flag enabled will use the new logic of creating VMs in slices. With those two flags we can control the rollout of this feature. Note that if project is marked with `use_slices_for_allocation` but there are not VmHosts that can accept_slices, the user will see 'no capacity' error. The allocator is modified to use a new VmHostSliceAllocation class that wraps and replaces VmHostAllocation when slice logic is enabled. It finds the capacity the same way as before, but when `update` is called it creates a slice first together with allocation of VmHostCpus before starting the VM creation process. This is a building block that will be expanded upon when we introduce 'burstable' instances later, when multiple VMs can share a slice. For now, for 'Standard' family there is a 1:1 mapping between the slice and a VM inside it, and their lifespans. Slice is created first, then the VM. Slice creation updates the utilization of the VmHost, and the VM creation updates the utilization of the slice hosting it. Again, this will be more relevant when multiple VMs will be sharing a slice.
123 lines
3.4 KiB
Ruby
123 lines
3.4 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
class Prog::Vm::VmHostSliceNexus < Prog::Base
|
|
subject_is :vm_host_slice
|
|
|
|
def self.assemble_with_host(name, vm_host, family:, allowed_cpus:, memory_gib:, is_shared: false)
|
|
DB.transaction do
|
|
vm_host_slice = VmHostSlice.create(
|
|
name: name,
|
|
is_shared: is_shared,
|
|
family: family,
|
|
cores: 0,
|
|
total_cpu_percent: 0,
|
|
used_cpu_percent: 0,
|
|
total_memory_gib: memory_gib,
|
|
used_memory_gib: 0,
|
|
vm_host_id: vm_host.id
|
|
)
|
|
|
|
# This will update the CPU allocation as well as total_cpu_percent and cores values
|
|
vm_host_slice.set_allowed_cpus(allowed_cpus)
|
|
|
|
Strand.create(prog: "Vm::VmHostSliceNexus", label: "prep") { _1.id = vm_host_slice.id }
|
|
end
|
|
end
|
|
|
|
def host
|
|
@host ||= vm_host_slice.vm_host
|
|
end
|
|
|
|
def before_run
|
|
when_destroy_set? do
|
|
hop_destroy if strand.label != "destroy"
|
|
end
|
|
end
|
|
|
|
label def prep
|
|
case host.sshable.cmd("common/bin/daemonizer --check prep_#{vm_host_slice.name}")
|
|
when "Succeeded"
|
|
host.sshable.cmd("common/bin/daemonizer --clean prep_#{vm_host_slice.name}")
|
|
vm_host_slice.update(enabled: true)
|
|
hop_wait
|
|
when "NotStarted", "Failed"
|
|
host.sshable.cmd("common/bin/daemonizer 'sudo host/bin/setup-slice prep #{vm_host_slice.inhost_name} \"#{vm_host_slice.allowed_cpus_cgroup}\"' prep_#{vm_host_slice.name}")
|
|
end
|
|
|
|
nap 1
|
|
end
|
|
|
|
label def wait
|
|
when_start_after_host_reboot_set? do
|
|
register_deadline(:wait, 5 * 60)
|
|
hop_start_after_host_reboot
|
|
end
|
|
|
|
when_checkup_set? do
|
|
hop_unavailable if !available?
|
|
decr_checkup
|
|
rescue Sshable::SshError
|
|
# Host is likely to be down, which will be handled by HostNexus. We still
|
|
# go to the unavailable state for keeping track of the state.
|
|
hop_unavailable
|
|
end
|
|
|
|
nap 30
|
|
end
|
|
|
|
label def unavailable
|
|
# If the slice becomes unavailable due to host unavailability, it first needs to
|
|
# go through start_after_host_reboot state to be able to recover.
|
|
when_start_after_host_reboot_set? do
|
|
incr_checkup
|
|
hop_start_after_host_reboot
|
|
end
|
|
|
|
begin
|
|
if available?
|
|
Page.from_tag_parts("VmHostSliceUnavailable", vm_host_slice.ubid)&.incr_resolve
|
|
decr_checkup
|
|
hop_wait
|
|
else
|
|
Prog::PageNexus.assemble("#{vm_host_slice.inhost_name} is unavailable", ["VmHostSliceUnavailable", vm_host_slice.ubid], vm_host_slice.ubid)
|
|
end
|
|
rescue Net::SSH::Disconnect, Net::SSH::ConnectionTimeout, Errno::ECONNRESET, Errno::ECONNREFUSED, IOError
|
|
# Host is likely to be down, which will be handled by HostNexus. No need
|
|
# to create a page for this case.
|
|
end
|
|
|
|
nap 30
|
|
end
|
|
|
|
label def destroy
|
|
decr_destroy
|
|
|
|
host.sshable.cmd("sudo host/bin/setup-slice delete #{vm_host_slice.inhost_name}")
|
|
|
|
VmHost.dataset.where(id: host.id).update(
|
|
used_cores: Sequel[:used_cores] - vm_host_slice.cores,
|
|
used_hugepages_1g: Sequel[:used_hugepages_1g] - vm_host_slice.total_memory_gib
|
|
)
|
|
|
|
vm_host_slice.destroy
|
|
|
|
pop "vm_host_slice destroyed"
|
|
end
|
|
|
|
label def start_after_host_reboot
|
|
host.sshable.cmd("sudo host/bin/setup-slice recreate-unpersisted #{vm_host_slice.inhost_name}")
|
|
decr_start_after_host_reboot
|
|
|
|
hop_wait
|
|
end
|
|
|
|
def available?
|
|
available = false
|
|
host.sshable.start_fresh_session do |session|
|
|
available = vm_host_slice.up? session
|
|
end
|
|
|
|
available
|
|
end
|
|
end
|