Files
ubicloud/prog/ai/inference_router_target_nexus.rb
Jeremy Evans 59621ae323 Use create_with_id in progs and routes
Only changes to the specs in this commit are to fix mocking issues.
2025-08-07 02:13:08 +09:00

129 lines
3.6 KiB
Ruby

# frozen_string_literal: true
require "digest"
require "json"
require "forwardable"
require_relative "../../lib/util"
class Prog::Ai::InferenceRouterTargetNexus < Prog::Base
subject_is :inference_router_target
extend Forwardable
def_delegators :inference_router_target, :inference_router, :inference_router_model
def self.assemble(inference_router_id:, inference_router_model_id:, name:, priority:, inflight_limit:, type: "manual", host: "", enabled: false, config: {}, extra_configs: {}, api_key: SecureRandom.alphanumeric(32))
DB.transaction do
target = InferenceRouterTarget.create(
inference_router_id:,
inference_router_model_id:,
name:,
host:,
api_key:,
inflight_limit:,
priority:,
config:,
type:,
extra_configs:,
enabled:
)
Strand.create_with_id(target.id, prog: "Ai::InferenceRouterTargetNexus", label: "start")
end
end
def before_run
when_destroy_set? do
if strand.label != "destroy"
hop_destroy
elsif strand.stack.count > 1
pop "operation is cancelled due to the destruction of the inference router target"
end
end
end
label def start
hop_setup if inference_router_target.type == "runpod"
hop_wait
end
label def setup
register_deadline("wait", 30 * 60)
pod_config = default_runpod_config
overlay_config!(pod_config, inference_router_target.config)
pod_id = runpod_client.create_pod(inference_router_target.ubid, pod_config)
inference_router_target.update(state: {"pod_id" => pod_id}, host: "#{pod_id}-8080.proxy.runpod.net")
hop_wait_setup
end
label def wait_setup
pod = runpod_client.get_pod(inference_router_target.state["pod_id"])
hop_wait unless pod["publicIp"].to_s.empty?
nap 10
end
label def wait
nap 60 * 60 * 24 * 30
end
label def destroy
decr_destroy
if (pod_id = inference_router_target.state["pod_id"])
runpod_client.delete_pod(pod_id)
inference_router_target.update(state: {})
end
inference_router_target.destroy
pop "inference router target is deleted"
end
def runpod_client
@runpod_client ||= RunpodClient.new
end
def default_runpod_config
{
"name" => inference_router_target.ubid,
"cloudType" => "SECURE",
"computeType" => "GPU",
"containerDiskInGb" => 50,
"dockerEntrypoint" => ["bash", "-c"],
"dockerStartCmd" => [
<<~CMD.gsub(/\s+/, " ").strip
apt update;
DEBIAN_FRONTEND=noninteractive apt-get install openssh-server -y;
mkdir -p ~/.ssh; cd $_; chmod 700 ~/.ssh;
echo "$PUBLIC_KEY" >> authorized_keys; chmod 700 authorized_keys;
service ssh start;
huggingface-cli download ${HF_MODEL} --repo-type model --local-dir /model;
vllm serve /model --served-model-name ${HF_MODEL}
--api-key ${VLLM_API_KEY} --port 8080 --disable-log-requests ${VLLM_PARAMS}
CMD
],
"env" => {
"HF_TOKEN" => "{{ RUNPOD_SECRET_HF_TOKEN }}",
"HF_MODEL" => inference_router_model.model_name,
"VLLM_API_KEY" => inference_router_target.api_key
},
"imageName" => "vllm/vllm-openai:latest",
"ports" => ["8080/http", "22/tcp"],
"volumeMountPath" => "/model",
"volumeInGb" => 50
}
end
def overlay_config!(base_config, overlay_config)
overlay_config.each do |key, value|
if base_config[key].is_a?(Hash) && value.is_a?(Hash)
overlay_config!(base_config[key], value)
else
base_config[key] = value
end
end
base_config
end
end