We are investigating frequent stuck pulse checks problem. This commit will help us to understand what is going on at the time of the stuck pulse checks.
59 lines
1.5 KiB
Ruby
Executable File
59 lines
1.5 KiB
Ruby
Executable File
#!/usr/bin/env ruby
|
|
# frozen_string_literal: true
|
|
|
|
require_relative "../loader"
|
|
clover_freeze
|
|
|
|
resources = {}
|
|
mutex = Mutex.new
|
|
|
|
resource_scanner = Thread.new do
|
|
monitorable_resource_types = [VmHost, PostgresServer, Vm.where(~Sshable.where(id: Sequel[:vm][:id]).exists), MinioServer, GithubRunner]
|
|
|
|
loop do
|
|
mutex.synchronize do
|
|
Enumerator::Chain.new(*monitorable_resource_types).each do |r|
|
|
resources[r.id] ||= MonitorableResource.new(r)
|
|
end
|
|
end
|
|
|
|
sleep 60
|
|
end
|
|
rescue => ex
|
|
Clog.emit("Resource scanning has failed.") { {resource_scanning_failure: {exception: Util.exception_to_hash(ex)}} }
|
|
ThreadPrinter.run
|
|
Kernel.exit!
|
|
end
|
|
|
|
pulse_checker = Thread.new do
|
|
loop do
|
|
mutex.synchronize do
|
|
# Deduct 3 for the threads that are always running: main, resource_scanner, pulse_checker
|
|
Clog.emit("Active threads count.") { {active_threads_count: Thread.list.count - 3} }
|
|
resources.each do |r_id, r|
|
|
sleep 1 while Thread.list.count + 2 > Config.max_monitor_threads
|
|
|
|
r.force_stop_if_stuck
|
|
|
|
Thread.new do
|
|
r.lock_no_wait do
|
|
r.open_resource_session
|
|
r.process_event_loop
|
|
r.check_pulse
|
|
end
|
|
end
|
|
end
|
|
|
|
resources.select { |r_id, r| r.deleted }.each { |r_id, r| resources.delete(r_id) }
|
|
end
|
|
sleep 5
|
|
end
|
|
rescue => ex
|
|
Clog.emit("Pulse checking has failed.") { {pulse_checking_failure: {exception: Util.exception_to_hash(ex)}} }
|
|
ThreadPrinter.run
|
|
Kernel.exit!
|
|
end
|
|
|
|
resource_scanner.join
|
|
pulse_checker.join
|