Files
ubicloud/model/ai/inference_endpoint.rb
Jeremy Evans 80d75d394d Convert SemaphoreMethods to Sequel plugin
Have it take the semaphores as arguments.  This allows using a
simple attr_reader for the semaphore names.  It also makes sure
the semaphore name array is frozen.
2025-07-09 00:42:44 +09:00

81 lines
3.6 KiB
Ruby

# frozen_string_literal: true
require_relative "../../model"
class InferenceEndpoint < Sequel::Model
one_to_one :strand, key: :id
many_to_one :project
one_to_many :replicas, class: :InferenceEndpointReplica, key: :inference_endpoint_id
one_to_one :load_balancer, key: :id, primary_key: :load_balancer_id
one_to_one :private_subnet, key: :id, primary_key: :private_subnet_id
many_to_one :location, key: :location_id, class: :Location
dataset_module Pagination
plugin ResourceMethods
plugin SemaphoreMethods, :destroy, :maintenance
include ObjectTag::Cleanup
def display_location
location.display_name
end
def path
"/location/#{display_location}/inference-endpoint/#{name}"
end
def display_state
label = strand.label
return "running" if label == "wait"
return "deleting" if destroy_set? || label == "destroy"
"creating"
end
def chat_completion_request(content, hostname, api_key)
uri = URI.parse("#{load_balancer.health_check_protocol}://#{hostname}/v1/chat/completions")
header = {"Content-Type": "application/json", Authorization: "Bearer " + api_key}
http = Net::HTTP.new(uri.host, uri.port)
http.read_timeout = 30
http.verify_mode = OpenSSL::SSL::VERIFY_NONE if Config.development?
http.use_ssl = (uri.scheme == "https")
req = Net::HTTP::Post.new(uri.request_uri, header)
req.body = {model: model_name, messages: [{role: "user", content: content}]}.to_json
http.request(req)
end
end
# Table: inference_endpoint
# Columns:
# id | uuid | PRIMARY KEY
# created_at | timestamp with time zone | NOT NULL DEFAULT now()
# updated_at | timestamp with time zone | NOT NULL DEFAULT now()
# is_public | boolean | NOT NULL DEFAULT false
# visible | boolean | NOT NULL DEFAULT true
# boot_image | text | NOT NULL
# name | text | NOT NULL
# vm_size | text | NOT NULL
# model_name | text | NOT NULL
# storage_volumes | jsonb | NOT NULL
# engine | text | NOT NULL
# engine_params | text | NOT NULL
# replica_count | integer | NOT NULL
# project_id | uuid | NOT NULL
# load_balancer_id | uuid | NOT NULL
# private_subnet_id | uuid | NOT NULL
# gpu_count | integer | NOT NULL DEFAULT 1
# tags | jsonb | NOT NULL DEFAULT '{}'::jsonb
# max_requests | integer | NOT NULL DEFAULT 500
# max_project_rps | integer | NOT NULL DEFAULT 100
# max_project_tps | integer | NOT NULL DEFAULT 10000
# location_id | uuid | NOT NULL
# external_config | jsonb | NOT NULL DEFAULT '{}'::jsonb
# Indexes:
# inference_endpoint_pkey | PRIMARY KEY btree (id)
# Foreign key constraints:
# inference_endpoint_load_balancer_id_fkey | (load_balancer_id) REFERENCES load_balancer(id)
# inference_endpoint_location_id_fkey | (location_id) REFERENCES location(id)
# inference_endpoint_private_subnet_id_fkey | (private_subnet_id) REFERENCES private_subnet(id)
# inference_endpoint_project_id_fkey | (project_id) REFERENCES project(id)
# Referenced By:
# inference_endpoint_replica | inference_endpoint_replica_inference_endpoint_id_fkey | (inference_endpoint_id) REFERENCES inference_endpoint(id)