ubicloud/lib/monitorable_resource.rb
Jeremy Evans 1392a022d5 Avoid unsynchronized setting of monitorable resource session to nil
Merge the process_event_loop method into the check_pulse method. Have the
pulse thread change a local variable if there was an error during the event
loop.  Have the monitor thread (not the pulse thread) clear the session,
after the pulse thread has ended, if the event loop failed.

Don't sleep until run_event_loop is true. With the merged code,
run_event_loop would be set immediately after the start of the thread, so
there is no reason to sleep. Even before this change, I don't think there
was a reason to sleep previously.
2025-09-05 02:21:42 +09:00

65 lines
1.9 KiB
Ruby

# frozen_string_literal: true
class MonitorableResource
attr_reader :deleted, :resource
attr_accessor :monitor_job_started_at, :monitor_job_finished_at
def initialize(resource)
@resource = resource
@session = nil
@pulse = {}
@pulse_check_started_at = Time.now
@pulse_thread = nil
@deleted = false
end
def open_resource_session
return if @session && @pulse[:reading] == "up"
@session = @resource.reload.init_health_monitor_session
rescue => ex
if ex.is_a?(Sequel::NoExistingObject)
Clog.emit("Resource is deleted.") { {resource_deleted: {ubid: @resource.ubid}} }
@session = nil
@deleted = true
end
end
def check_pulse
return unless @session
if @resource.needs_event_loop_for_pulse_check?
run_event_loop = true
event_loop_failed = false
pulse_thread = Thread.new do
@session[:ssh_session].loop(0.01) { run_event_loop }
rescue => ex
event_loop_failed = true
Clog.emit("SSH event loop has failed.") { {event_loop_failure: {ubid: @resource.ubid, exception: Util.exception_to_hash(ex)}} }
end
end
@pulse_check_started_at = Time.now
begin
@pulse = @resource.check_pulse(session: @session, previous_pulse: @pulse)
Clog.emit("Got new pulse.") { {got_pulse: {ubid: @resource.ubid, pulse: @pulse}} } if (rpt = @pulse[:reading_rpt]) && (rpt < 6 || rpt % 5 == 1) || @pulse[:reading] != "up"
rescue => ex
Clog.emit("Pulse checking has failed.") { {pulse_check_failure: {ubid: @resource.ubid, exception: Util.exception_to_hash(ex)}} }
end
run_event_loop = false
pulse_thread&.join
close_resource_session if event_loop_failed
end
def close_resource_session
return if @session.nil?
@session[:ssh_session].shutdown!
begin
@session[:ssh_session].close
rescue
end
@session = nil
end
end