ubicloud/model/ai/inference_endpoint.rb
Benjamin Satzger 2eb85f14cb Introduce Inference Endpoints
This introduces the concept of inference endpoints and inference endpoint
replicas. An inference endpoint describes a model and is backed by a
private network, a load balancer and multiple replicas. A replica
is basically a VM that runs the model. The replica VM is part of the
inference endpoints private subnet and attached to its load balancer.
2024-10-01 15:36:57 +02:00

53 lines
1.8 KiB
Ruby

# frozen_string_literal: true
require_relative "../../model"
class InferenceEndpoint < Sequel::Model
one_to_one :strand, key: :id
many_to_one :project
one_to_many :replicas, class: :InferenceEndpointReplica, key: :inference_endpoint_id
one_to_one :load_balancer, key: :id, primary_key: :load_balancer_id
one_to_one :private_subnet, key: :id, primary_key: :private_subnet_id
one_to_many :api_keys, key: :owner_id, class: :ApiKey, conditions: {owner_table: "inference_endpoint", used_for: "inference_endpoint"}
plugin :association_dependencies, api_keys: :destroy
dataset_module Authorization::Dataset
dataset_module Pagination
include ResourceMethods
include SemaphoreMethods
include Authorization::HyperTagMethods
include Authorization::TaggableMethods
semaphore :destroy
def display_location
LocationNameConverter.to_display_name(location)
end
def path
"/location/#{display_location}/inference-endpoint/#{name}"
end
def hyper_tag_name(project)
"project/#{project.ubid}/location/#{display_location}/inference-endpoint/#{name}"
end
def display_state
return "running" if ["wait"].include?(strand.label)
return "deleting" if destroy_set? || strand.label == "destroy"
"creating"
end
def chat_completion_request(content, hostname, api_key)
uri = URI.parse("#{load_balancer.health_check_protocol}://#{hostname}/v1/chat/completions")
header = {"Content-Type": "application/json", Authorization: "Bearer " + api_key}
http = Net::HTTP.new(uri.host, uri.port)
http.read_timeout = 30
http.verify_mode = OpenSSL::SSL::VERIFY_NONE if Config.development?
http.use_ssl = (uri.scheme == "https")
req = Net::HTTP::Post.new(uri.request_uri, header)
req.body = {model: model_name, messages: [{role: "user", content: content}]}.to_json
http.request(req)
end
end