Files
ubicloud/rhizome/inference_endpoint/bin/setup-replica
Benjamin Satzger 09cec4e2bc Set max_concurrency of inference gateway
In a previous commit we added max_concurrency to inference endpoints. In
this commit we set inference gateway's corresponding setting
IG_MAX_REQUESTS to that value during replica setup.
2025-02-11 17:47:12 +01:00

36 lines
879 B
Ruby
Executable File

#!/bin/env ruby
# frozen_string_literal: true
require_relative "../../common/lib/util"
require_relative "../lib/replica_setup"
replica_setup = ReplicaSetup.new
params = JSON.parse($stdin.read)
begin
gpu_count = params.fetch("gpu_count")
inference_engine = params.fetch("inference_engine")
inference_engine_params = params.fetch("inference_engine_params")
model = params.fetch("model")
replica_ubid = params.fetch("replica_ubid")
ssl_crt_path = params.fetch("ssl_crt_path")
ssl_key_path = params.fetch("ssl_key_path")
gateway_port = params.fetch("gateway_port")
max_requests = params.fetch("max_requests")
rescue KeyError => e
puts "Needed #{e.key} in parameters"
exit 1
end
replica_setup.prep(
gpu_count:,
inference_engine:,
inference_engine_params:,
model:,
replica_ubid:,
ssl_crt_path:,
ssl_key_path:,
gateway_port:,
max_requests:
)