ubicloud/spec/model/sshable_spec.rb
Jeremy Evans c360c357d1 Use a timeout by default in Sshable#cmd
This changes the Channel#wait call to a loop that checks
that the timeout has not been exceeded.  If the timeout
has been exceeded, an SshTimeout error (subclass of SshError)
is raised.

This should catch any shell commands that take too long to
execute, as well as any hangs in the SSH connections, which
should prevent most current causes of apoptosis.

I'm including the timeout in the Clog emit, since it may
be useful.

This switches the newly added `sudo timeout 10s` in VM nexus
command with a `timeout: 10` argument.  We can consider
expanding the use of explicit timeouts to other commands,
because the default timeout is fairly long (2-10 seconds less
than the apoptosis timeout).

This removes the SSH Channel#wait call inside the Channel#exec
block.  Session#open_channel returns the same channel it yields,
and Channel#exec yields the receiver, so previously, we were waiting
twice on the same channel.  This removes the duplicated wait inside
the block, as the wait outside the block will always be called.
2025-10-02 08:14:56 -07:00

297 lines
11 KiB
Ruby

# frozen_string_literal: true
require_relative "spec_helper"
RSpec.describe Sshable do
# Avoid using excessive entropy by using one generated key for all
# tests.
key = SshKey.generate.keypair.freeze
subject(:sa) {
described_class.new(
id: described_class.generate_uuid,
host: "test.localhost",
unix_user: "testuser",
raw_private_key_1: key
)
}
it "can encrypt and decrypt a field" do
sa.save_changes
expect(sa.values[:raw_private_key_1] =~ /\AA[AgQ]..A/).not_to be_nil
expect(sa.raw_private_key_1).to eq(key)
end
describe "#maybe_ssh_session_lock_name" do
it "does not yield if SSH_SESSION_LOCK_NAME is not defined" do
expect(sa.maybe_ssh_session_lock_name).to be_nil
end
unless ENV["CLOVER_FREEZE"]
it "yields if SSH_SESSION_LOCK_NAME is defined" do
stub_const("SSH_SESSION_LOCK_NAME", "testlockname")
expect(sa.maybe_ssh_session_lock_name).to eq("testlockname")
end
end
end
describe "session locking" do
lock_script = <<LOCK
exec 999>/dev/shm/session-lock-testlockname || exit 92
flock -xn 999 || { echo "Another session active: " testlockname; exit 124; }
exec -a session-lock-testlockname sleep infinity </dev/null >/dev/null 2>&1 &
disown
LOCK
if File.directory?("/dev/shm")
it "interlocks" do
portable_pkill = lambda { system(%q(ps -eo pid,args | awk '$2=="session-lock-testlockname"{print $1}' | xargs -I {} sh -c 'test -n "{}" && kill {}')) }
portable_pkill.call
q_lock_script = lock_script.shellescape
expect([`bash -c #{q_lock_script}`, $?.exitstatus]).to eq(["", 0])
expect([`bash -c #{q_lock_script}`, $?.exitstatus]).to eq(["Another session active: testlockname\n", 124])
expect(portable_pkill.call).to be true
end
end
describe "exit code handling" do
before do
expect(sa).to receive(:maybe_ssh_session_lock_name).and_return("testlockname")
sa.invalidate_cache_entry
expect(Net::SSH).to receive(:start) do
instance_double(Net::SSH::Connection::Session, close: nil)
end
end
it "runs the session lock script if SSH_SESSION_LOCK_NAME is set" do
expect(sa).to receive(:cmd).with(lock_script, log: false)
sa.connect
end
it "reports a failure to obtain a file descriptor with an obscure exit code" do
expect(sa).to receive(:cmd).with(lock_script, log: false).and_raise(Sshable::SshError.new(lock_script, "", "", 92, nil))
expect(Clog).to receive(:emit).with("session lock failure").and_wrap_original do |m, a, &b|
expect(b.call.dig(:contended_session_lock, :session_fail_msg)).to eq("could not create session lock file for testlockname")
end
sa.connect
end
it "reports lock conflicts when an obscure exit code is raised" do
sa.id = "624ec0d1-95d9-8f31-bbaa-bcccb76fe98b"
expect(sa).to receive(:cmd).with(lock_script, log: false).and_raise(Sshable::SshError.new(lock_script, "", "", 124, nil))
expect(Clog).to receive(:emit).with("session lock failure").and_wrap_original do |m, a, &b|
expect(b.call).to eq(contended_session_lock: {
exit_code: 124,
session_fail_msg: "session lock conflict for testlockname",
sshable_ubid: "shc97c1mcnv67qenbsk5qdzmrp",
prog: nil
})
end
sa.connect
end
it "has a generic message for unrecognized errors" do
expect(sa).to receive(:cmd).with(lock_script, log: false).and_raise(Sshable::SshError.new(lock_script, "", "", 1, nil))
expect(Clog).to receive(:emit).with("session lock failure").and_wrap_original do |m, a, &b|
expect(b.call.dig(:contended_session_lock, :session_fail_msg)).to eq("unknown SshError")
end
sa.connect
end
end
end
describe "caching" do
# The cache is thread local, so re-set the thread state by boxing
# each test in a new thread.
around do |ex|
Thread.new {
ex.run
}.join
end
it "can cache SSH connections" do
expect(Net::SSH).to receive(:start) do
instance_double(Net::SSH::Connection::Session, close: nil)
end
expect(Thread.current[:clover_ssh_cache]).to be_nil
first_time = sa.connect
expect(Thread.current[:clover_ssh_cache].size).to eq(1)
second_time = sa.connect
expect(first_time).to equal(second_time)
expect(described_class.reset_cache).to eq []
expect(Thread.current[:clover_ssh_cache]).to be_empty
end
it "does not crash if a cache has never been made" do
expect {
sa.invalidate_cache_entry
}.not_to raise_error
end
it "can invalidate a single cache entry" do
sess = instance_double(Net::SSH::Connection::Session, close: nil)
expect(Net::SSH).to receive(:start).and_return sess
sa.connect
expect {
sa.invalidate_cache_entry
}.to change { Thread.current[:clover_ssh_cache] }.from({["test.localhost", "testuser"] => sess}).to({})
end
it "can reset caches when has cached connection" do
sess = instance_double(Net::SSH::Connection::Session, close: nil)
expect(Net::SSH).to receive(:start).and_return sess
sa.connect
expect {
described_class.reset_cache
}.to change { Thread.current[:clover_ssh_cache] }.from({["test.localhost", "testuser"] => sess}).to({})
end
it "can reset caches when has no cached connection" do
expect(described_class.reset_cache).to eq([])
end
it "can reset caches even if session fails while closing" do
sess = instance_double(Net::SSH::Connection::Session)
expect(sess).to receive(:close).and_raise Sshable::SshError.new("bogus", "", "", nil, nil)
expect(Net::SSH).to receive(:start).and_return sess
sa.connect
expect(described_class.reset_cache.first).to be_a Sshable::SshError
expect(Thread.current[:clover_ssh_cache]).to eq({})
end
end
describe "#cmd" do
let(:session) { instance_double(Net::SSH::Connection::Session) }
before do
expect(sa).to receive(:connect).and_return(session).at_least(:once)
end
def simulate(cmd:, exit_status:, exit_signal:, stdout:, stderr:)
allow(session).to receive(:loop).and_yield
expect(session).to receive(:open_channel) do |&blk|
chan = instance_spy(Net::SSH::Connection::Channel)
allow(chan).to receive(:connection).and_return(session)
expect(chan).to receive(:exec).with(cmd) do |&blk|
chan2 = instance_spy(Net::SSH::Connection::Channel)
expect(chan2).to receive(:on_request).with("exit-status") do |&blk|
buf = instance_double(Net::SSH::Buffer)
expect(buf).to receive(:read_long).and_return(exit_status)
blk.call(nil, buf)
end
expect(chan2).to receive(:on_request).with("exit-signal") do |&blk|
buf = instance_double(Net::SSH::Buffer)
expect(buf).to receive(:read_long).and_return(exit_signal)
blk.call(nil, buf)
end
expect(chan2).to receive(:on_data).and_yield(instance_double(Net::SSH::Connection::Channel), stdout)
expect(chan2).to receive(:on_extended_data).and_yield(nil, 1, stderr)
allow(chan2).to receive(:connection).and_return(session)
blk.call(chan2, true)
end
blk.call(chan, true)
chan
end
end
it "can run a command" do
[false, true].each do |repl_value|
[false, true].each do |log_value|
allow(described_class).to receive(:repl?).and_return(repl_value)
if repl_value
# Note that in the REPL, stdout and stderr get multiplexed
# into stderr in real time, packet by packet.
expect($stderr).to receive(:write).with("hello")
expect($stderr).to receive(:write).with("world")
end
if log_value
sa.instance_variable_set(:@connect_duration, 1.1)
expect(Clog).to receive(:emit).with("ssh cmd execution") do |&blk|
dat = blk.call
if repl_value
expect(dat[:ssh].slice(:stdout, :stderr)).to be_empty
else
expect(dat[:ssh].slice(:stdout, :stderr)).to eq({stdout: "hello", stderr: "world"})
end
end
end
simulate(cmd: "echo hello", exit_status: 0, exit_signal: nil, stdout: "hello", stderr: "world")
expect(sa.cmd("echo hello", log: log_value, timeout: nil)).to eq("hello")
end
end
end
it "raises an SshError with a non-zero exit status" do
simulate(cmd: "exit 1", exit_status: 1, exit_signal: 127, stderr: "", stdout: "")
expect { sa.cmd("exit 1", timeout: nil) }.to raise_error Sshable::SshError, "command exited with an error: exit 1"
end
it "raises an SshError with a nil exit status" do
simulate(cmd: "exit 1", exit_status: nil, exit_signal: nil, stderr: "", stdout: "")
expect { sa.cmd("exit 1", timeout: nil) }.to raise_error Sshable::SshTimeout, "command timed out: exit 1"
end
it "supports custom timeout" do
simulate(cmd: "echo hello", exit_status: 0, exit_signal: nil, stdout: "hello", stderr: "world")
expect(sa.cmd("echo hello", log: false, timeout: 2)).to eq("hello")
end
it "suports default timeout" do
simulate(cmd: "echo hello", exit_status: 0, exit_signal: nil, stdout: "hello", stderr: "world")
expect(sa.cmd("echo hello", log: false)).to eq("hello")
end
it "supports default timeout based on thread apoptosis_at variable if no explicit timeout is given if variable is available" do
Thread.current[:apoptosis_at] = Time.now + 60
simulate(cmd: "echo hello", exit_status: 0, exit_signal: nil, stdout: "hello", stderr: "world")
expect(sa.cmd("echo hello", log: false)).to eq("hello")
ensure
Thread.current[:apoptosis_at] = nil
end
it "invalidates the cache if the session raises an error" do
err = IOError.new("the party is over")
expect(session).to receive(:open_channel).and_raise err
expect(sa).to receive(:invalidate_cache_entry)
expect { sa.cmd("irrelevant") }.to raise_error err
end
end
describe "daemonizer methods" do
let(:unit_name) { "test_unit" }
let(:run_command) { "sudo host/bin/setup-vm prep test_unit" }
let(:stdin_data) { "secret_data" }
it "calls cmd with the correct check command" do
expect(sa).to receive(:cmd).with("common/bin/daemonizer2 check test_unit")
sa.d_check(unit_name)
end
it "calls cmd with the correct clean command" do
expect(sa).to receive(:cmd).with("common/bin/daemonizer2 clean test_unit")
sa.d_clean(unit_name)
end
it "calls cmd with the correct restart command" do
expect(sa).to receive(:cmd).with("common/bin/daemonizer2 restart test_unit")
sa.d_restart(unit_name)
end
it "calls cmd with the correct run command and no stdin" do
expect(sa).to receive(:cmd).with("common/bin/daemonizer2 run test_unit sudo\\ host/bin/setup-vm\\ prep\\ test_unit", stdin: nil, log: true)
sa.d_run(unit_name, run_command)
end
it "calls cmd with the correct run command and passes stdin" do
expect(sa).to receive(:cmd).with("common/bin/daemonizer2 run test_unit sudo\\ host/bin/setup-vm\\ prep\\ test_unit", stdin: stdin_data, log: true)
sa.d_run(unit_name, run_command, stdin: stdin_data)
end
end
end