InferenceEndpoints can be valid ObjectTag members. ApiKeys can be valid SubjectTag members (personal access tokens are added to Admin SubjectTag by default), and can also be valid ObjectTag members (for inference tokens).
81 lines
3.3 KiB
Ruby
81 lines
3.3 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require_relative "../../model"
|
|
|
|
class InferenceEndpoint < Sequel::Model
|
|
one_to_one :strand, key: :id
|
|
many_to_one :project
|
|
one_to_many :replicas, class: :InferenceEndpointReplica, key: :inference_endpoint_id
|
|
one_to_one :load_balancer, key: :id, primary_key: :load_balancer_id
|
|
one_to_one :private_subnet, key: :id, primary_key: :private_subnet_id
|
|
|
|
dataset_module Pagination
|
|
|
|
include ResourceMethods
|
|
include SemaphoreMethods
|
|
include Authorization::HyperTagMethods
|
|
include ObjectTag::Cleanup
|
|
|
|
semaphore :destroy, :maintenance
|
|
|
|
def display_location
|
|
LocationNameConverter.to_display_name(location)
|
|
end
|
|
|
|
def path
|
|
"/location/#{display_location}/inference-endpoint/#{name}"
|
|
end
|
|
|
|
def hyper_tag_name(project)
|
|
"project/#{project.ubid}/location/#{display_location}/inference-endpoint/#{name}"
|
|
end
|
|
|
|
def display_state
|
|
return "running" if ["wait"].include?(strand.label)
|
|
return "deleting" if destroy_set? || strand.label == "destroy"
|
|
"creating"
|
|
end
|
|
|
|
def chat_completion_request(content, hostname, api_key)
|
|
uri = URI.parse("#{load_balancer.health_check_protocol}://#{hostname}/v1/chat/completions")
|
|
header = {"Content-Type": "application/json", Authorization: "Bearer " + api_key}
|
|
http = Net::HTTP.new(uri.host, uri.port)
|
|
http.read_timeout = 30
|
|
http.verify_mode = OpenSSL::SSL::VERIFY_NONE if Config.development?
|
|
http.use_ssl = (uri.scheme == "https")
|
|
req = Net::HTTP::Post.new(uri.request_uri, header)
|
|
req.body = {model: model_name, messages: [{role: "user", content: content}]}.to_json
|
|
http.request(req)
|
|
end
|
|
end
|
|
|
|
# Table: inference_endpoint
|
|
# Columns:
|
|
# id | uuid | PRIMARY KEY
|
|
# created_at | timestamp with time zone | NOT NULL DEFAULT now()
|
|
# updated_at | timestamp with time zone | NOT NULL DEFAULT now()
|
|
# is_public | boolean | NOT NULL DEFAULT false
|
|
# visible | boolean | NOT NULL DEFAULT true
|
|
# location | text | NOT NULL
|
|
# boot_image | text | NOT NULL
|
|
# name | text | NOT NULL
|
|
# vm_size | text | NOT NULL
|
|
# model_name | text | NOT NULL
|
|
# storage_volumes | jsonb | NOT NULL
|
|
# engine | text | NOT NULL
|
|
# engine_params | text | NOT NULL
|
|
# replica_count | integer | NOT NULL
|
|
# project_id | uuid | NOT NULL
|
|
# load_balancer_id | uuid | NOT NULL
|
|
# private_subnet_id | uuid | NOT NULL
|
|
# gpu_count | integer | NOT NULL DEFAULT 1
|
|
# tags | jsonb | NOT NULL DEFAULT '{}'::jsonb
|
|
# Indexes:
|
|
# inference_endpoint_pkey | PRIMARY KEY btree (id)
|
|
# Foreign key constraints:
|
|
# inference_endpoint_load_balancer_id_fkey | (load_balancer_id) REFERENCES load_balancer(id)
|
|
# inference_endpoint_private_subnet_id_fkey | (private_subnet_id) REFERENCES private_subnet(id)
|
|
# inference_endpoint_project_id_fkey | (project_id) REFERENCES project(id)
|
|
# Referenced By:
|
|
# inference_endpoint_replica | inference_endpoint_replica_inference_endpoint_id_fkey | (inference_endpoint_id) REFERENCES inference_endpoint(id)
|