Add three columns to inference_endpoints: * max_requests to limit the number of concurrently runnings requests * max_project_rps to define how many requests per second we allow a project to make * max_project_tps to define how many tokens per second we allow a project to consume
12 lines
333 B
Ruby
12 lines
333 B
Ruby
# frozen_string_literal: true
|
|
|
|
Sequel.migration do
|
|
change do
|
|
alter_table(:inference_endpoint) do
|
|
add_column :max_requests, :integer, null: false, default: 500
|
|
add_column :max_project_rps, :integer, null: false, default: 100
|
|
add_column :max_project_tps, :integer, null: false, default: 10000
|
|
end
|
|
end
|
|
end
|