ubicloud/model/ai/inference_endpoint.rb
Jeremy Evans 30247a3800 Include model annotations at the bottom of all model files
This makes it easier for developers new to the codebase to
easily get important information on the model's table in the
same file as the model code.

To ensure the model annotations stay accurate, run them on
test_up/test_down.  In CI, regenerate the annotations, and
check for no changes, similar to how the linters work.
2024-11-13 09:13:30 -08:00

82 lines
3.5 KiB
Ruby

# frozen_string_literal: true
require_relative "../../model"
class InferenceEndpoint < Sequel::Model
one_to_one :strand, key: :id
many_to_one :project
one_to_many :replicas, class: :InferenceEndpointReplica, key: :inference_endpoint_id
one_to_one :load_balancer, key: :id, primary_key: :load_balancer_id
one_to_one :private_subnet, key: :id, primary_key: :private_subnet_id
one_to_many :api_keys, key: :owner_id, class: :ApiKey, conditions: {owner_table: "inference_endpoint", used_for: "inference_endpoint"}
plugin :association_dependencies, api_keys: :destroy
dataset_module Authorization::Dataset
dataset_module Pagination
include ResourceMethods
include SemaphoreMethods
include Authorization::HyperTagMethods
include Authorization::TaggableMethods
semaphore :destroy
def display_location
LocationNameConverter.to_display_name(location)
end
def path
"/location/#{display_location}/inference-endpoint/#{name}"
end
def hyper_tag_name(project)
"project/#{project.ubid}/location/#{display_location}/inference-endpoint/#{name}"
end
def display_state
return "running" if ["wait"].include?(strand.label)
return "deleting" if destroy_set? || strand.label == "destroy"
"creating"
end
def chat_completion_request(content, hostname, api_key)
uri = URI.parse("#{load_balancer.health_check_protocol}://#{hostname}/v1/chat/completions")
header = {"Content-Type": "application/json", Authorization: "Bearer " + api_key}
http = Net::HTTP.new(uri.host, uri.port)
http.read_timeout = 30
http.verify_mode = OpenSSL::SSL::VERIFY_NONE if Config.development?
http.use_ssl = (uri.scheme == "https")
req = Net::HTTP::Post.new(uri.request_uri, header)
req.body = {model: model_name, messages: [{role: "user", content: content}]}.to_json
http.request(req)
end
end
# Table: inference_endpoint
# Columns:
# id | uuid | PRIMARY KEY
# created_at | timestamp with time zone | NOT NULL DEFAULT now()
# updated_at | timestamp with time zone | NOT NULL DEFAULT now()
# is_public | boolean | NOT NULL DEFAULT false
# visible | boolean | NOT NULL DEFAULT true
# location | text | NOT NULL
# boot_image | text | NOT NULL
# name | text | NOT NULL
# vm_size | text | NOT NULL
# model_name | text | NOT NULL
# storage_volumes | jsonb | NOT NULL
# engine | text | NOT NULL
# engine_params | text | NOT NULL
# replica_count | integer | NOT NULL
# project_id | uuid | NOT NULL
# load_balancer_id | uuid | NOT NULL
# private_subnet_id | uuid | NOT NULL
# gpu_count | integer | NOT NULL DEFAULT 1
# Indexes:
# inference_endpoint_pkey | PRIMARY KEY btree (id)
# Foreign key constraints:
# inference_endpoint_load_balancer_id_fkey | (load_balancer_id) REFERENCES load_balancer(id)
# inference_endpoint_private_subnet_id_fkey | (private_subnet_id) REFERENCES private_subnet(id)
# inference_endpoint_project_id_fkey | (project_id) REFERENCES project(id)
# Referenced By:
# inference_endpoint_replica | inference_endpoint_replica_inference_endpoint_id_fkey | (inference_endpoint_id) REFERENCES inference_endpoint(id)