mirror of
https://github.com/ubicloud/ubicloud.git
synced 2025-10-04 22:02:18 +08:00
This changes the Channel#wait call to a loop that checks that the timeout has not been exceeded. If the timeout has been exceeded, an SshTimeout error (subclass of SshError) is raised. This should catch any shell commands that take too long to execute, as well as any hangs in the SSH connections, which should prevent most current causes of apoptosis. I'm including the timeout in the Clog emit, since it may be useful. This switches the newly added `sudo timeout 10s` in VM nexus command with a `timeout: 10` argument. We can consider expanding the use of explicit timeouts to other commands, because the default timeout is fairly long (2-10 seconds less than the apoptosis timeout). This removes the SSH Channel#wait call inside the Channel#exec block. Session#open_channel returns the same channel it yields, and Channel#exec yields the receiver, so previously, we were waiting twice on the same channel. This removes the duplicated wait inside the block, as the wait outside the block will always be called.
297 lines
11 KiB
Ruby
297 lines
11 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require_relative "spec_helper"
|
|
|
|
RSpec.describe Sshable do
|
|
# Avoid using excessive entropy by using one generated key for all
|
|
# tests.
|
|
key = SshKey.generate.keypair.freeze
|
|
|
|
subject(:sa) {
|
|
described_class.new(
|
|
id: described_class.generate_uuid,
|
|
host: "test.localhost",
|
|
unix_user: "testuser",
|
|
raw_private_key_1: key
|
|
)
|
|
}
|
|
|
|
it "can encrypt and decrypt a field" do
|
|
sa.save_changes
|
|
|
|
expect(sa.values[:raw_private_key_1] =~ /\AA[AgQ]..A/).not_to be_nil
|
|
expect(sa.raw_private_key_1).to eq(key)
|
|
end
|
|
|
|
describe "#maybe_ssh_session_lock_name" do
|
|
it "does not yield if SSH_SESSION_LOCK_NAME is not defined" do
|
|
expect(sa.maybe_ssh_session_lock_name).to be_nil
|
|
end
|
|
|
|
unless ENV["CLOVER_FREEZE"]
|
|
it "yields if SSH_SESSION_LOCK_NAME is defined" do
|
|
stub_const("SSH_SESSION_LOCK_NAME", "testlockname")
|
|
expect(sa.maybe_ssh_session_lock_name).to eq("testlockname")
|
|
end
|
|
end
|
|
end
|
|
|
|
describe "session locking" do
|
|
lock_script = <<LOCK
|
|
exec 999>/dev/shm/session-lock-testlockname || exit 92
|
|
flock -xn 999 || { echo "Another session active: " testlockname; exit 124; }
|
|
exec -a session-lock-testlockname sleep infinity </dev/null >/dev/null 2>&1 &
|
|
disown
|
|
LOCK
|
|
|
|
if File.directory?("/dev/shm")
|
|
it "interlocks" do
|
|
portable_pkill = lambda { system(%q(ps -eo pid,args | awk '$2=="session-lock-testlockname"{print $1}' | xargs -I {} sh -c 'test -n "{}" && kill {}')) }
|
|
portable_pkill.call
|
|
q_lock_script = lock_script.shellescape
|
|
expect([`bash -c #{q_lock_script}`, $?.exitstatus]).to eq(["", 0])
|
|
expect([`bash -c #{q_lock_script}`, $?.exitstatus]).to eq(["Another session active: testlockname\n", 124])
|
|
expect(portable_pkill.call).to be true
|
|
end
|
|
end
|
|
|
|
describe "exit code handling" do
|
|
before do
|
|
expect(sa).to receive(:maybe_ssh_session_lock_name).and_return("testlockname")
|
|
sa.invalidate_cache_entry
|
|
expect(Net::SSH).to receive(:start) do
|
|
instance_double(Net::SSH::Connection::Session, close: nil)
|
|
end
|
|
end
|
|
|
|
it "runs the session lock script if SSH_SESSION_LOCK_NAME is set" do
|
|
expect(sa).to receive(:cmd).with(lock_script, log: false)
|
|
sa.connect
|
|
end
|
|
|
|
it "reports a failure to obtain a file descriptor with an obscure exit code" do
|
|
expect(sa).to receive(:cmd).with(lock_script, log: false).and_raise(Sshable::SshError.new(lock_script, "", "", 92, nil))
|
|
expect(Clog).to receive(:emit).with("session lock failure").and_wrap_original do |m, a, &b|
|
|
expect(b.call.dig(:contended_session_lock, :session_fail_msg)).to eq("could not create session lock file for testlockname")
|
|
end
|
|
sa.connect
|
|
end
|
|
|
|
it "reports lock conflicts when an obscure exit code is raised" do
|
|
sa.id = "624ec0d1-95d9-8f31-bbaa-bcccb76fe98b"
|
|
expect(sa).to receive(:cmd).with(lock_script, log: false).and_raise(Sshable::SshError.new(lock_script, "", "", 124, nil))
|
|
expect(Clog).to receive(:emit).with("session lock failure").and_wrap_original do |m, a, &b|
|
|
expect(b.call).to eq(contended_session_lock: {
|
|
exit_code: 124,
|
|
session_fail_msg: "session lock conflict for testlockname",
|
|
sshable_ubid: "shc97c1mcnv67qenbsk5qdzmrp",
|
|
prog: nil
|
|
})
|
|
end
|
|
sa.connect
|
|
end
|
|
|
|
it "has a generic message for unrecognized errors" do
|
|
expect(sa).to receive(:cmd).with(lock_script, log: false).and_raise(Sshable::SshError.new(lock_script, "", "", 1, nil))
|
|
expect(Clog).to receive(:emit).with("session lock failure").and_wrap_original do |m, a, &b|
|
|
expect(b.call.dig(:contended_session_lock, :session_fail_msg)).to eq("unknown SshError")
|
|
end
|
|
sa.connect
|
|
end
|
|
end
|
|
end
|
|
|
|
describe "caching" do
|
|
# The cache is thread local, so re-set the thread state by boxing
|
|
# each test in a new thread.
|
|
around do |ex|
|
|
Thread.new {
|
|
ex.run
|
|
}.join
|
|
end
|
|
|
|
it "can cache SSH connections" do
|
|
expect(Net::SSH).to receive(:start) do
|
|
instance_double(Net::SSH::Connection::Session, close: nil)
|
|
end
|
|
|
|
expect(Thread.current[:clover_ssh_cache]).to be_nil
|
|
first_time = sa.connect
|
|
expect(Thread.current[:clover_ssh_cache].size).to eq(1)
|
|
second_time = sa.connect
|
|
expect(first_time).to equal(second_time)
|
|
|
|
expect(described_class.reset_cache).to eq []
|
|
expect(Thread.current[:clover_ssh_cache]).to be_empty
|
|
end
|
|
|
|
it "does not crash if a cache has never been made" do
|
|
expect {
|
|
sa.invalidate_cache_entry
|
|
}.not_to raise_error
|
|
end
|
|
|
|
it "can invalidate a single cache entry" do
|
|
sess = instance_double(Net::SSH::Connection::Session, close: nil)
|
|
expect(Net::SSH).to receive(:start).and_return sess
|
|
sa.connect
|
|
expect {
|
|
sa.invalidate_cache_entry
|
|
}.to change { Thread.current[:clover_ssh_cache] }.from({["test.localhost", "testuser"] => sess}).to({})
|
|
end
|
|
|
|
it "can reset caches when has cached connection" do
|
|
sess = instance_double(Net::SSH::Connection::Session, close: nil)
|
|
expect(Net::SSH).to receive(:start).and_return sess
|
|
sa.connect
|
|
expect {
|
|
described_class.reset_cache
|
|
}.to change { Thread.current[:clover_ssh_cache] }.from({["test.localhost", "testuser"] => sess}).to({})
|
|
end
|
|
|
|
it "can reset caches when has no cached connection" do
|
|
expect(described_class.reset_cache).to eq([])
|
|
end
|
|
|
|
it "can reset caches even if session fails while closing" do
|
|
sess = instance_double(Net::SSH::Connection::Session)
|
|
expect(sess).to receive(:close).and_raise Sshable::SshError.new("bogus", "", "", nil, nil)
|
|
expect(Net::SSH).to receive(:start).and_return sess
|
|
sa.connect
|
|
|
|
expect(described_class.reset_cache.first).to be_a Sshable::SshError
|
|
expect(Thread.current[:clover_ssh_cache]).to eq({})
|
|
end
|
|
end
|
|
|
|
describe "#cmd" do
|
|
let(:session) { instance_double(Net::SSH::Connection::Session) }
|
|
|
|
before do
|
|
expect(sa).to receive(:connect).and_return(session).at_least(:once)
|
|
end
|
|
|
|
def simulate(cmd:, exit_status:, exit_signal:, stdout:, stderr:)
|
|
allow(session).to receive(:loop).and_yield
|
|
expect(session).to receive(:open_channel) do |&blk|
|
|
chan = instance_spy(Net::SSH::Connection::Channel)
|
|
allow(chan).to receive(:connection).and_return(session)
|
|
expect(chan).to receive(:exec).with(cmd) do |&blk|
|
|
chan2 = instance_spy(Net::SSH::Connection::Channel)
|
|
expect(chan2).to receive(:on_request).with("exit-status") do |&blk|
|
|
buf = instance_double(Net::SSH::Buffer)
|
|
expect(buf).to receive(:read_long).and_return(exit_status)
|
|
blk.call(nil, buf)
|
|
end
|
|
|
|
expect(chan2).to receive(:on_request).with("exit-signal") do |&blk|
|
|
buf = instance_double(Net::SSH::Buffer)
|
|
expect(buf).to receive(:read_long).and_return(exit_signal)
|
|
blk.call(nil, buf)
|
|
end
|
|
expect(chan2).to receive(:on_data).and_yield(instance_double(Net::SSH::Connection::Channel), stdout)
|
|
expect(chan2).to receive(:on_extended_data).and_yield(nil, 1, stderr)
|
|
allow(chan2).to receive(:connection).and_return(session)
|
|
|
|
blk.call(chan2, true)
|
|
end
|
|
blk.call(chan, true)
|
|
chan
|
|
end
|
|
end
|
|
|
|
it "can run a command" do
|
|
[false, true].each do |repl_value|
|
|
[false, true].each do |log_value|
|
|
allow(described_class).to receive(:repl?).and_return(repl_value)
|
|
if repl_value
|
|
# Note that in the REPL, stdout and stderr get multiplexed
|
|
# into stderr in real time, packet by packet.
|
|
expect($stderr).to receive(:write).with("hello")
|
|
expect($stderr).to receive(:write).with("world")
|
|
end
|
|
|
|
if log_value
|
|
sa.instance_variable_set(:@connect_duration, 1.1)
|
|
expect(Clog).to receive(:emit).with("ssh cmd execution") do |&blk|
|
|
dat = blk.call
|
|
if repl_value
|
|
expect(dat[:ssh].slice(:stdout, :stderr)).to be_empty
|
|
else
|
|
expect(dat[:ssh].slice(:stdout, :stderr)).to eq({stdout: "hello", stderr: "world"})
|
|
end
|
|
end
|
|
end
|
|
simulate(cmd: "echo hello", exit_status: 0, exit_signal: nil, stdout: "hello", stderr: "world")
|
|
expect(sa.cmd("echo hello", log: log_value, timeout: nil)).to eq("hello")
|
|
end
|
|
end
|
|
end
|
|
|
|
it "raises an SshError with a non-zero exit status" do
|
|
simulate(cmd: "exit 1", exit_status: 1, exit_signal: 127, stderr: "", stdout: "")
|
|
expect { sa.cmd("exit 1", timeout: nil) }.to raise_error Sshable::SshError, "command exited with an error: exit 1"
|
|
end
|
|
|
|
it "raises an SshError with a nil exit status" do
|
|
simulate(cmd: "exit 1", exit_status: nil, exit_signal: nil, stderr: "", stdout: "")
|
|
expect { sa.cmd("exit 1", timeout: nil) }.to raise_error Sshable::SshTimeout, "command timed out: exit 1"
|
|
end
|
|
|
|
it "supports custom timeout" do
|
|
simulate(cmd: "echo hello", exit_status: 0, exit_signal: nil, stdout: "hello", stderr: "world")
|
|
expect(sa.cmd("echo hello", log: false, timeout: 2)).to eq("hello")
|
|
end
|
|
|
|
it "suports default timeout" do
|
|
simulate(cmd: "echo hello", exit_status: 0, exit_signal: nil, stdout: "hello", stderr: "world")
|
|
expect(sa.cmd("echo hello", log: false)).to eq("hello")
|
|
end
|
|
|
|
it "supports default timeout based on thread apoptosis_at variable if no explicit timeout is given if variable is available" do
|
|
Thread.current[:apoptosis_at] = Time.now + 60
|
|
simulate(cmd: "echo hello", exit_status: 0, exit_signal: nil, stdout: "hello", stderr: "world")
|
|
expect(sa.cmd("echo hello", log: false)).to eq("hello")
|
|
ensure
|
|
Thread.current[:apoptosis_at] = nil
|
|
end
|
|
|
|
it "invalidates the cache if the session raises an error" do
|
|
err = IOError.new("the party is over")
|
|
expect(session).to receive(:open_channel).and_raise err
|
|
expect(sa).to receive(:invalidate_cache_entry)
|
|
expect { sa.cmd("irrelevant") }.to raise_error err
|
|
end
|
|
end
|
|
|
|
describe "daemonizer methods" do
|
|
let(:unit_name) { "test_unit" }
|
|
let(:run_command) { "sudo host/bin/setup-vm prep test_unit" }
|
|
let(:stdin_data) { "secret_data" }
|
|
|
|
it "calls cmd with the correct check command" do
|
|
expect(sa).to receive(:cmd).with("common/bin/daemonizer2 check test_unit")
|
|
sa.d_check(unit_name)
|
|
end
|
|
|
|
it "calls cmd with the correct clean command" do
|
|
expect(sa).to receive(:cmd).with("common/bin/daemonizer2 clean test_unit")
|
|
sa.d_clean(unit_name)
|
|
end
|
|
|
|
it "calls cmd with the correct restart command" do
|
|
expect(sa).to receive(:cmd).with("common/bin/daemonizer2 restart test_unit")
|
|
sa.d_restart(unit_name)
|
|
end
|
|
|
|
it "calls cmd with the correct run command and no stdin" do
|
|
expect(sa).to receive(:cmd).with("common/bin/daemonizer2 run test_unit sudo\\ host/bin/setup-vm\\ prep\\ test_unit", stdin: nil, log: true)
|
|
sa.d_run(unit_name, run_command)
|
|
end
|
|
|
|
it "calls cmd with the correct run command and passes stdin" do
|
|
expect(sa).to receive(:cmd).with("common/bin/daemonizer2 run test_unit sudo\\ host/bin/setup-vm\\ prep\\ test_unit", stdin: stdin_data, log: true)
|
|
sa.d_run(unit_name, run_command, stdin: stdin_data)
|
|
end
|
|
end
|
|
end
|