Files
ubicloud/bin/monitor
Furkan Sahin d0e1269d89 Switch using monitor and remove health probes prog
With this commit, we are removing the prog and completely switching to
monitor for health probes.
2025-01-29 16:34:15 +01:00

59 lines
1.6 KiB
Ruby
Executable File

#!/usr/bin/env ruby
# frozen_string_literal: true
require_relative "../loader"
clover_freeze
resources = {}
mutex = Mutex.new
resource_scanner = Thread.new do
monitorable_resource_types = [VmHost, PostgresServer, Vm.where(~Sshable.where(id: Sequel[:vm][:id]).exists), MinioServer, GithubRunner, VmHostSlice, LoadBalancersVms]
loop do
mutex.synchronize do
Enumerator::Chain.new(*monitorable_resource_types).each do |r|
resources[r.id] ||= MonitorableResource.new(r)
end
end
sleep 60
end
rescue => ex
Clog.emit("Resource scanning has failed.") { {resource_scanning_failure: {exception: Util.exception_to_hash(ex)}} }
ThreadPrinter.run
Kernel.exit!
end
pulse_checker = Thread.new do
loop do
mutex.synchronize do
# Deduct 3 for the threads that are always running: main, resource_scanner, pulse_checker
Clog.emit("Active threads count.") { {active_threads_count: Thread.list.count - 3} }
resources.each do |r_id, r|
sleep 1 while Thread.list.count + 2 > Config.max_monitor_threads
r.force_stop_if_stuck
Thread.new do
r.lock_no_wait do
r.open_resource_session
r.process_event_loop
r.check_pulse
end
end
end
resources.select { |r_id, r| r.deleted }.each { |r_id, r| resources.delete(r_id) }
end
sleep 5
end
rescue => ex
Clog.emit("Pulse checking has failed.") { {pulse_checking_failure: {exception: Util.exception_to_hash(ex)}} }
ThreadPrinter.run
Kernel.exit!
end
resource_scanner.join
pulse_checker.join