Since the device_path returned by the VmStorageVolume on AWS is an ephemeral identifier, we need to look at the disk sizes to determine the correct device to format when mounting data disk.
925 lines
48 KiB
Ruby
925 lines
48 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require_relative "../../model/spec_helper"
|
|
|
|
RSpec.describe Prog::Postgres::PostgresServerNexus do
|
|
subject(:nx) { described_class.new(st) }
|
|
|
|
let(:st) { Strand.create(id: "0d77964d-c416-8edb-9237-7e7dd5d6fcf8", prog: "Postgres::PostgresServerNexus", label: "start") }
|
|
|
|
let(:postgres_server) {
|
|
instance_double(
|
|
PostgresServer,
|
|
id: "0d77964d-c416-8edb-9237-7e7dd5d6fcf8",
|
|
ubid: "pgubid",
|
|
timeline: instance_double(
|
|
PostgresTimeline,
|
|
id: "f6644aae-9759-8ada-9aef-9b6cfccdc167",
|
|
generate_walg_config: "walg config",
|
|
blob_storage: instance_double(MinioCluster, root_certs: "certs"),
|
|
aws?: false
|
|
),
|
|
vm: instance_double(
|
|
Vm,
|
|
id: "1c7d59ee-8d46-8374-9553-6144490ecec5",
|
|
sshable: sshable,
|
|
ephemeral_net4: "1.1.1.1",
|
|
private_subnets: [instance_double(PrivateSubnet)]
|
|
),
|
|
data_device_path: "/dev/vdb"
|
|
)
|
|
}
|
|
|
|
let(:resource) {
|
|
instance_double(
|
|
PostgresResource,
|
|
ubid: "pgresourcesubid",
|
|
root_cert_1: "root_cert_1",
|
|
root_cert_2: "root_cert_2",
|
|
server_cert: "server_cert",
|
|
server_cert_key: "server_cert_key",
|
|
superuser_password: "dummy-password",
|
|
version: "16",
|
|
representative_server: postgres_server,
|
|
metric_destinations: [instance_double(PostgresMetricDestination, ubid: "pgmetricubid", url: "url", username: "username", password: "password")],
|
|
ca_certificates: "root_cert_1\nroot_cert_2",
|
|
location_id: Location::HETZNER_FSN1_ID
|
|
)
|
|
}
|
|
|
|
let(:sshable) { instance_double(Sshable) }
|
|
|
|
before do
|
|
allow(nx).to receive(:postgres_server).and_return(postgres_server)
|
|
allow(postgres_server).to receive_messages(resource: resource, read_replica?: false)
|
|
end
|
|
|
|
describe ".assemble" do
|
|
let(:user_project) { Project.create_with_id(name: "default") }
|
|
let(:postgres_resource) {
|
|
PostgresResource.create_with_id(
|
|
project_id: user_project.id,
|
|
location_id: Location::HETZNER_FSN1_ID,
|
|
name: "pg-name",
|
|
target_vm_size: "standard-2",
|
|
target_storage_size_gib: 128,
|
|
superuser_password: "dummy-password"
|
|
)
|
|
}
|
|
|
|
it "creates postgres server and vm with sshable" do
|
|
postgres_timeline = PostgresTimeline.create_with_id
|
|
postgres_project = Project.create_with_id(name: "default")
|
|
expect(Config).to receive(:postgres_service_project_id).and_return(postgres_project.id).at_least(:once)
|
|
|
|
st = described_class.assemble(resource_id: postgres_resource.id, timeline_id: postgres_timeline.id, timeline_access: "push", representative_at: Time.now)
|
|
postgres_server = PostgresServer[st.id]
|
|
expect(postgres_server).not_to be_nil
|
|
expect(postgres_server.vm).not_to be_nil
|
|
expect(postgres_server.vm.sshable).not_to be_nil
|
|
|
|
st = described_class.assemble(resource_id: postgres_resource.id, timeline_id: postgres_timeline.id, timeline_access: "push")
|
|
expect(PostgresServer[st.id].synchronization_status).to eq("catching_up")
|
|
end
|
|
|
|
it "picks correct base image for Lantern" do
|
|
expect(PostgresResource).to receive(:[]).and_return(postgres_resource)
|
|
expect(postgres_resource).to receive(:flavor).and_return(PostgresResource::Flavor::LANTERN).at_least(:once)
|
|
expect(Prog::Vm::Nexus).to receive(:assemble_with_sshable).with(anything, hash_including(boot_image: "postgres16-lantern-ubuntu-2204")).and_return(instance_double(Strand, id: "62c62ddb-5b5a-4e9e-b534-e73c16f86bcb"))
|
|
expect(PostgresServer).to receive(:create).and_return(instance_double(PostgresServer, id: "5c13fd6a-25c2-4fa4-be48-2846f127526a"))
|
|
described_class.assemble(resource_id: postgres_resource.id, timeline_id: "91588cda-7122-4d6a-b01c-f33c30cb17d8", timeline_access: "push", representative_at: Time.now)
|
|
end
|
|
|
|
it "picks correct base image for AWS-pg16" do
|
|
expect(PostgresResource).to receive(:[]).and_return(postgres_resource)
|
|
loc = Location.create(
|
|
name: "us-west-2",
|
|
display_name: "aws-us-west-2",
|
|
ui_name: "aws-us-west-2",
|
|
visible: true,
|
|
provider: "aws",
|
|
project_id: user_project.id
|
|
)
|
|
expect(postgres_resource).to receive(:location).and_return(loc).at_least(:once)
|
|
expect(postgres_resource).to receive(:version).and_return("16").at_least(:once)
|
|
expect(Prog::Vm::Nexus).to receive(:assemble_with_sshable).with(anything, hash_including(boot_image: "ami-0c54521352e6e92bb")).and_return(instance_double(Strand, id: "62c62ddb-5b5a-4e9e-b534-e73c16f86bcb"))
|
|
expect(PostgresServer).to receive(:create).and_return(instance_double(PostgresServer, id: "5c13fd6a-25c2-4fa4-be48-2846f127526a"))
|
|
described_class.assemble(resource_id: postgres_resource.id, timeline_id: "91588cda-7122-4d6a-b01c-f33c30cb17d8", timeline_access: "push", representative_at: Time.now)
|
|
end
|
|
|
|
it "picks correct base image for AWS-pg17" do
|
|
expect(PostgresResource).to receive(:[]).and_return(postgres_resource)
|
|
loc = Location.create(
|
|
name: "us-west-2",
|
|
display_name: "aws-us-west-2",
|
|
ui_name: "aws-us-west-2",
|
|
visible: true,
|
|
provider: "aws",
|
|
project_id: user_project.id
|
|
)
|
|
expect(postgres_resource).to receive(:version).and_return("17").at_least(:once)
|
|
expect(postgres_resource).to receive(:location).and_return(loc).at_least(:once)
|
|
expect(postgres_resource).to receive(:location_id).and_return(loc.id).at_least(:once)
|
|
expect(Prog::Vm::Nexus).to receive(:assemble_with_sshable).with(anything, hash_including(boot_image: "ami-0ba58268c42166e1d")).and_return(instance_double(Strand, id: "62c62ddb-5b5a-4e9e-b534-e73c16f86bcb"))
|
|
expect(PostgresServer).to receive(:create).and_return(instance_double(PostgresServer, id: "5c13fd6a-25c2-4fa4-be48-2846f127526a"))
|
|
described_class.assemble(resource_id: postgres_resource.id, timeline_id: "91588cda-7122-4d6a-b01c-f33c30cb17d8", timeline_access: "push", representative_at: Time.now)
|
|
end
|
|
|
|
it "raises error if the version is not supported for AWS" do
|
|
expect(PostgresResource).to receive(:[]).and_return(postgres_resource)
|
|
loc = Location.create(
|
|
name: "us-west-2",
|
|
display_name: "aws-us-west-2",
|
|
ui_name: "aws-us-west-2",
|
|
visible: true,
|
|
provider: "aws",
|
|
project_id: user_project.id
|
|
)
|
|
expect(postgres_resource).to receive(:location).and_return(loc).at_least(:once)
|
|
expect(postgres_resource).to receive(:version).and_return("18").at_least(:once)
|
|
expect {
|
|
described_class.assemble(resource_id: postgres_resource.id, timeline_id: "91588cda-7122-4d6a-b01c-f33c30cb17d8", timeline_access: "push", representative_at: Time.now)
|
|
}.to raise_error NoMethodError, "undefined method 'aws_ami_id' for nil"
|
|
end
|
|
|
|
it "errors out for unknown flavor" do
|
|
expect(PostgresResource).to receive(:[]).and_return(postgres_resource)
|
|
expect(postgres_resource).to receive(:flavor).and_return("boring_flavor").at_least(:once)
|
|
expect {
|
|
described_class.assemble(resource_id: postgres_resource.id, timeline_id: "91588cda-7122-4d6a-b01c-f33c30cb17d8", timeline_access: "push", representative_at: Time.now)
|
|
}.to raise_error RuntimeError, "Unknown PostgreSQL flavor: boring_flavor"
|
|
end
|
|
end
|
|
|
|
describe "#before_run" do
|
|
it "hops to destroy when needed" do
|
|
expect(nx).to receive(:when_destroy_set?).and_yield
|
|
expect(postgres_server).to receive(:resource).and_return(nil)
|
|
expect { nx.before_run }.to hop("destroy")
|
|
end
|
|
|
|
it "does not hop to destroy if already in the destroy state" do
|
|
expect(nx).to receive(:when_destroy_set?).and_yield
|
|
expect(resource).to receive(:strand).and_return(nil)
|
|
expect(nx.strand).to receive(:label).and_return("destroy").at_least(:once)
|
|
expect { nx.before_run }.not_to hop("destroy")
|
|
end
|
|
|
|
it "cancels the destroy if the server is picked up for take over" do
|
|
expect(nx).to receive(:when_destroy_set?).and_yield
|
|
expect(resource).to receive(:strand).and_return(instance_double(Strand, label: "wait"))
|
|
expect(nx.strand).to receive(:label).and_return("prepare_for_take_over").at_least(:once)
|
|
expect(nx).to receive(:decr_destroy)
|
|
expect { nx.before_run }.not_to hop("destroy")
|
|
end
|
|
|
|
it "pops additional operations from stack" do
|
|
expect(nx).to receive(:when_destroy_set?).and_yield
|
|
expect(resource).to receive(:strand).and_return(instance_double(Strand, label: "destroy"))
|
|
expect(nx.strand).to receive(:label).and_return("destroy").at_least(:once)
|
|
expect(nx.strand.stack).to receive(:count).and_return(2)
|
|
expect { nx.before_run }.to exit({"msg" => "operation is cancelled due to the destruction of the postgres server"})
|
|
end
|
|
end
|
|
|
|
describe "#start" do
|
|
it "naps if vm not ready" do
|
|
expect(postgres_server.vm).to receive(:strand).and_return(instance_double(Strand, label: "prep"))
|
|
expect { nx.start }.to nap(5)
|
|
end
|
|
|
|
it "update sshable host and hops" do
|
|
expect(postgres_server.vm).to receive(:strand).and_return(instance_double(Strand, label: "wait"))
|
|
expect(postgres_server).to receive(:incr_initial_provisioning)
|
|
expect { nx.start }.to hop("bootstrap_rhizome")
|
|
end
|
|
end
|
|
|
|
describe "#bootstrap_rhizome" do
|
|
it "buds a bootstrap rhizome process" do
|
|
expect(postgres_server).to receive(:primary?).and_return(true)
|
|
expect(nx).to receive(:bud).with(Prog::BootstrapRhizome, {"target_folder" => "postgres", "subject_id" => postgres_server.vm.id, "user" => "ubi"})
|
|
expect { nx.bootstrap_rhizome }.to hop("wait_bootstrap_rhizome")
|
|
end
|
|
|
|
it "sets longer deadline for non-primary servers" do
|
|
expect(postgres_server).to receive(:primary?).and_return(false)
|
|
expect(nx).to receive(:register_deadline).with("wait", 120 * 60)
|
|
expect { nx.bootstrap_rhizome }.to hop("wait_bootstrap_rhizome")
|
|
end
|
|
end
|
|
|
|
describe "#wait_bootstrap_rhizome" do
|
|
it "hops to mount_data_disk if there are no sub-programs running" do
|
|
expect { nx.wait_bootstrap_rhizome }.to hop("mount_data_disk")
|
|
end
|
|
|
|
it "donates if there are sub-programs running" do
|
|
Strand.create(parent_id: st.id, prog: "BootstrapRhizome", label: "start", stack: [{}], lease: Time.now + 10)
|
|
expect { nx.wait_bootstrap_rhizome }.to nap(5)
|
|
end
|
|
end
|
|
|
|
describe "#mount_data_disk" do
|
|
it "formats data disk if format command is not sent yet or failed" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer 'sudo mkfs --type ext4 /dev/vdb' format_disk").twice
|
|
|
|
# NotStarted
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check format_disk").and_return("NotStarted")
|
|
expect { nx.mount_data_disk }.to nap(5)
|
|
|
|
# Failed
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check format_disk").and_return("Failed")
|
|
expect { nx.mount_data_disk }.to nap(5)
|
|
end
|
|
|
|
it "mounts data disk if format disk is succeeded and hops to configure_walg_credentials" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check format_disk").and_return("Succeeded")
|
|
expect(sshable).to receive(:cmd).with("sudo mkdir -p /dat")
|
|
expect(sshable).to receive(:cmd).with("sudo common/bin/add_to_fstab /dev/vdb /dat ext4 defaults 0 0")
|
|
expect(sshable).to receive(:cmd).with("sudo mount /dev/vdb /dat")
|
|
expect { nx.mount_data_disk }.to hop("configure_walg_credentials")
|
|
end
|
|
|
|
it "naps if script return unknown status" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check format_disk").and_return("Unknown")
|
|
expect { nx.mount_data_disk }.to nap(5)
|
|
end
|
|
end
|
|
|
|
describe "#configure_walg_credentials" do
|
|
it "hops to initialize_empty_database if the server is primary" do
|
|
expect(sshable).to receive(:cmd).with("sudo -u postgres tee /etc/postgresql/wal-g.env > /dev/null", stdin: "walg config")
|
|
expect(sshable).to receive(:cmd).with("sudo tee /usr/lib/ssl/certs/blob_storage_ca.crt > /dev/null", stdin: "certs")
|
|
expect(postgres_server).to receive(:primary?).and_return(true)
|
|
|
|
expect { nx.configure_walg_credentials }.to hop("initialize_empty_database")
|
|
end
|
|
|
|
it "hops to initialize_database_from_backup if the server is not primary" do
|
|
expect(sshable).to receive(:cmd).with("sudo -u postgres tee /etc/postgresql/wal-g.env > /dev/null", stdin: "walg config")
|
|
expect(sshable).to receive(:cmd).with("sudo tee /usr/lib/ssl/certs/blob_storage_ca.crt > /dev/null", stdin: "certs")
|
|
expect(postgres_server).to receive(:primary?).and_return(false)
|
|
expect { nx.configure_walg_credentials }.to hop("initialize_database_from_backup")
|
|
end
|
|
|
|
it "doesn't put the blob_storage_ca if the timeline is aws" do
|
|
expect(postgres_server.timeline).to receive(:aws?).and_return(true)
|
|
expect(sshable).to receive(:cmd).with("sudo -u postgres tee /etc/postgresql/wal-g.env > /dev/null", stdin: "walg config")
|
|
expect(sshable).not_to receive(:cmd).with("sudo tee /usr/lib/ssl/certs/blob_storage_ca.crt > /dev/null", stdin: "certs")
|
|
expect(postgres_server).to receive(:primary?).and_return(true)
|
|
|
|
expect { nx.configure_walg_credentials }.to hop("initialize_empty_database")
|
|
end
|
|
end
|
|
|
|
describe "#initialize_empty_database" do
|
|
it "triggers initialize_empty_database if initialize_empty_database command is not sent yet or failed" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer 'sudo postgres/bin/initialize-empty-database 16' initialize_empty_database").twice
|
|
|
|
# NotStarted
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check initialize_empty_database").and_return("NotStarted")
|
|
expect { nx.initialize_empty_database }.to nap(5)
|
|
|
|
# Failed
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check initialize_empty_database").and_return("Failed")
|
|
expect { nx.initialize_empty_database }.to nap(5)
|
|
end
|
|
|
|
it "hops to refresh_certificates if initialize_empty_database command is succeeded" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check initialize_empty_database").and_return("Succeeded")
|
|
expect { nx.initialize_empty_database }.to hop("refresh_certificates")
|
|
end
|
|
|
|
it "naps if script return unknown status" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check initialize_empty_database").and_return("Unknown")
|
|
expect { nx.initialize_empty_database }.to nap(5)
|
|
end
|
|
end
|
|
|
|
describe "#initialize_database_from_backup" do
|
|
it "triggers initialize_database_from_backup if initialize_database_from_backup command is not sent yet or failed" do
|
|
expect(postgres_server.resource).to receive(:restore_target).and_return(Time.now).twice
|
|
expect(postgres_server.timeline).to receive(:latest_backup_label_before_target).and_return("backup-label").twice
|
|
expect(postgres_server).to receive(:standby?).and_return(false).twice
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer 'sudo postgres/bin/initialize-database-from-backup 16 backup-label' initialize_database_from_backup").twice
|
|
|
|
# NotStarted
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check initialize_database_from_backup").and_return("NotStarted")
|
|
expect { nx.initialize_database_from_backup }.to nap(5)
|
|
|
|
# Failed
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check initialize_database_from_backup").and_return("Failed")
|
|
expect { nx.initialize_database_from_backup }.to nap(5)
|
|
end
|
|
|
|
it "hops to refresh_certificates if initialize_database_from_backup command is succeeded" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check initialize_database_from_backup").and_return("Succeeded")
|
|
expect { nx.initialize_database_from_backup }.to hop("refresh_certificates")
|
|
end
|
|
|
|
it "naps if script return unknown status" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check initialize_database_from_backup").and_return("Unknown")
|
|
expect { nx.initialize_database_from_backup }.to nap(5)
|
|
end
|
|
|
|
it "triggers initialize_database_from_backup with LATEST as backup_label for standbys" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check initialize_database_from_backup").and_return("NotStarted")
|
|
expect(postgres_server).to receive(:standby?).and_return(true)
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer 'sudo postgres/bin/initialize-database-from-backup 16 LATEST' initialize_database_from_backup")
|
|
expect { nx.initialize_database_from_backup }.to nap(5)
|
|
end
|
|
end
|
|
|
|
describe "#refresh_certificates" do
|
|
it "waits for certificate creation by the parent resource" do
|
|
expect(postgres_server.resource).to receive(:server_cert).and_return(nil)
|
|
expect { nx.refresh_certificates }.to nap(5)
|
|
end
|
|
|
|
it "pushes certificates to vm and hops to configure_prometheus during initial provisioning" do
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/ssl/certs/ca.crt > /dev/null", stdin: "root_cert_1\nroot_cert_2")
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/ssl/certs/server.crt > /dev/null", stdin: "server_cert")
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/ssl/certs/server.key > /dev/null", stdin: "server_cert_key")
|
|
expect(sshable).to receive(:cmd).with("sudo chgrp cert_readers /etc/ssl/certs/ca.crt && sudo chmod 640 /etc/ssl/certs/ca.crt")
|
|
expect(sshable).to receive(:cmd).with("sudo chgrp cert_readers /etc/ssl/certs/server.crt && sudo chmod 640 /etc/ssl/certs/server.crt")
|
|
expect(sshable).to receive(:cmd).with("sudo chgrp cert_readers /etc/ssl/certs/server.key && sudo chmod 640 /etc/ssl/certs/server.key")
|
|
|
|
expect(nx).to receive(:refresh_walg_credentials)
|
|
|
|
expect(nx).to receive(:when_initial_provisioning_set?).and_yield
|
|
expect { nx.refresh_certificates }.to hop("configure_metrics")
|
|
end
|
|
|
|
it "hops to wait at times other than the initial provisioning" do
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/ssl/certs/ca.crt > /dev/null", stdin: "root_cert_1\nroot_cert_2")
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/ssl/certs/server.crt > /dev/null", stdin: "server_cert")
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/ssl/certs/server.key > /dev/null", stdin: "server_cert_key")
|
|
expect(sshable).to receive(:cmd).with("sudo chgrp cert_readers /etc/ssl/certs/ca.crt && sudo chmod 640 /etc/ssl/certs/ca.crt")
|
|
expect(sshable).to receive(:cmd).with("sudo chgrp cert_readers /etc/ssl/certs/server.crt && sudo chmod 640 /etc/ssl/certs/server.crt")
|
|
expect(sshable).to receive(:cmd).with("sudo chgrp cert_readers /etc/ssl/certs/server.key && sudo chmod 640 /etc/ssl/certs/server.key")
|
|
expect(sshable).to receive(:cmd).with("sudo -u postgres pg_ctlcluster 16 main reload")
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl reload pgbouncer@*.service")
|
|
expect(nx).to receive(:refresh_walg_credentials)
|
|
expect { nx.refresh_certificates }.to hop("wait")
|
|
end
|
|
end
|
|
|
|
describe "#configure_metrics" do
|
|
let(:metrics_config) { {interval: "30s", endpoints: ["https://localhost:9100/metrics"], metrics_dir: "/home/ubi/postgres/metrics"} }
|
|
|
|
it "configures prometheus and metrics during initial provisioning" do
|
|
expect(nx).to receive(:when_initial_provisioning_set?).and_yield
|
|
expect(sshable).to receive(:cmd).with("sudo -u prometheus tee /home/prometheus/web-config.yml > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo -u prometheus tee /home/prometheus/prometheus.yml > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl enable --now postgres_exporter")
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl enable --now node_exporter")
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl enable --now prometheus")
|
|
|
|
# Configure metrics expectations
|
|
expect(postgres_server).to receive(:metrics_config).and_return(metrics_config)
|
|
expect(sshable).to receive(:cmd).with("mkdir -p /home/ubi/postgres/metrics")
|
|
expect(sshable).to receive(:cmd).with("tee /home/ubi/postgres/metrics/config.json > /dev/null", stdin: metrics_config.to_json)
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/systemd/system/postgres-metrics.service > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/systemd/system/postgres-metrics.timer > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl daemon-reload")
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl enable --now postgres-metrics.timer")
|
|
|
|
expect { nx.configure_metrics }.to hop("setup_hugepages")
|
|
end
|
|
|
|
it "configures prometheus and metrics and hops to wait at times other than initial provisioning" do
|
|
# Prometheus expectations
|
|
expect(sshable).to receive(:cmd).with("sudo -u prometheus tee /home/prometheus/web-config.yml > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo -u prometheus tee /home/prometheus/prometheus.yml > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl reload postgres_exporter || sudo systemctl restart postgres_exporter")
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl reload node_exporter || sudo systemctl restart node_exporter")
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl reload prometheus || sudo systemctl restart prometheus")
|
|
|
|
# Configure metrics expectations
|
|
expect(postgres_server).to receive(:metrics_config).and_return(metrics_config)
|
|
expect(sshable).to receive(:cmd).with("mkdir -p /home/ubi/postgres/metrics")
|
|
expect(sshable).to receive(:cmd).with("tee /home/ubi/postgres/metrics/config.json > /dev/null", stdin: metrics_config.to_json)
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/systemd/system/postgres-metrics.service > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/systemd/system/postgres-metrics.timer > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl daemon-reload")
|
|
|
|
expect(resource).to receive(:representative_server).and_return(instance_double(PostgresServer, id: "random-id"))
|
|
expect { nx.configure_metrics }.to hop("wait")
|
|
end
|
|
|
|
it "uses default interval if not specified in config" do
|
|
config_without_interval = {endpoints: ["https://localhost:9100/metrics"], metrics_dir: "/home/ubi/postgres/metrics"}
|
|
|
|
# Prometheus expectations
|
|
expect(sshable).to receive(:cmd).with("sudo -u prometheus tee /home/prometheus/web-config.yml > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo -u prometheus tee /home/prometheus/prometheus.yml > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl reload postgres_exporter || sudo systemctl restart postgres_exporter")
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl reload node_exporter || sudo systemctl restart node_exporter")
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl reload prometheus || sudo systemctl restart prometheus")
|
|
|
|
# Configure metrics expectations with default interval
|
|
expect(postgres_server).to receive(:metrics_config).and_return(config_without_interval)
|
|
expect(sshable).to receive(:cmd).with("mkdir -p /home/ubi/postgres/metrics")
|
|
expect(sshable).to receive(:cmd).with("tee /home/ubi/postgres/metrics/config.json > /dev/null", stdin: config_without_interval.to_json)
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/systemd/system/postgres-metrics.service > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/systemd/system/postgres-metrics.timer > /dev/null", stdin: /OnUnitActiveSec=15s/)
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl daemon-reload")
|
|
|
|
expect(resource).to receive(:representative_server).and_return(instance_double(PostgresServer, id: "random-id"))
|
|
expect { nx.configure_metrics }.to hop("wait")
|
|
end
|
|
end
|
|
|
|
describe "#setup_hugepages" do
|
|
it "hops to configure if the setup succeeds" do
|
|
expect(sshable).to receive(:d_check).with("setup_hugepages").and_return("Succeeded")
|
|
expect(sshable).to receive(:d_clean).with("setup_hugepages")
|
|
expect { nx.setup_hugepages }.to hop("configure")
|
|
end
|
|
|
|
it "retries the setup if it fails" do
|
|
expect(sshable).to receive(:d_check).with("setup_hugepages").and_return("Failed")
|
|
expect(sshable).to receive(:d_run).with("setup_hugepages", "sudo", "postgres/bin/setup-hugepages")
|
|
expect { nx.setup_hugepages }.to nap(5)
|
|
end
|
|
|
|
it "starts the setup if it is not started" do
|
|
expect(sshable).to receive(:d_check).with("setup_hugepages").and_return("NotStarted")
|
|
expect(sshable).to receive(:d_run).with("setup_hugepages", "sudo", "postgres/bin/setup-hugepages")
|
|
expect { nx.setup_hugepages }.to nap(5)
|
|
end
|
|
|
|
it "naps for 5 seconds if the setup is unknown" do
|
|
expect(sshable).to receive(:d_check).with("setup_hugepages").and_return("Unknown")
|
|
expect { nx.setup_hugepages }.to nap(5)
|
|
end
|
|
end
|
|
|
|
describe "#configure" do
|
|
it "triggers configure if configure command is not sent yet or failed" do
|
|
expect(postgres_server).to receive(:configure_hash).and_return("dummy-configure-hash").twice
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer 'sudo postgres/bin/configure 16' configure_postgres", stdin: JSON.generate("dummy-configure-hash")).twice
|
|
|
|
# NotStarted
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check configure_postgres").and_return("NotStarted")
|
|
expect { nx.configure }.to nap(5)
|
|
|
|
# Failed
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check configure_postgres").and_return("Failed")
|
|
expect { nx.configure }.to nap(5)
|
|
end
|
|
|
|
it "hops to update_superuser_password if configure command is succeeded during the initial provisioning and if the server is primary" do
|
|
expect(nx).to receive(:when_initial_provisioning_set?).and_yield
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --clean configure_postgres").and_return("Succeeded")
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check configure_postgres").and_return("Succeeded")
|
|
expect(postgres_server).to receive(:primary?).and_return(true)
|
|
expect { nx.configure }.to hop("update_superuser_password")
|
|
end
|
|
|
|
it "hops to wait_catch_up if configure command is succeeded during the initial provisioning and if the server is standby" do
|
|
expect(nx).to receive(:when_initial_provisioning_set?).and_yield
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --clean configure_postgres").and_return("Succeeded")
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check configure_postgres").and_return("Succeeded")
|
|
expect(postgres_server).to receive(:primary?).and_return(false)
|
|
expect(postgres_server).to receive(:standby?).and_return(true)
|
|
expect { nx.configure }.to hop("wait_catch_up")
|
|
end
|
|
|
|
it "hops to wait_recovery_completion if configure command is succeeded during the initial provisioning and if the server is doing pitr" do
|
|
expect(nx).to receive(:when_initial_provisioning_set?).and_yield
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --clean configure_postgres").and_return("Succeeded")
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check configure_postgres").and_return("Succeeded")
|
|
expect(postgres_server).to receive(:primary?).and_return(false)
|
|
expect(postgres_server).to receive(:standby?).and_return(false)
|
|
expect { nx.configure }.to hop("wait_recovery_completion")
|
|
end
|
|
|
|
it "hops to wait for primaries if configure command is succeeded at times other than the initial provisioning" do
|
|
expect(nx).to receive(:when_initial_provisioning_set?)
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --clean configure_postgres").and_return("Succeeded")
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check configure_postgres").and_return("Succeeded")
|
|
expect(postgres_server).to receive(:standby?).and_return(false)
|
|
expect { nx.configure }.to hop("wait")
|
|
end
|
|
|
|
it "hops to wait_catchup for standbys if configure command is succeeded at times other than the initial provisioning" do
|
|
expect(nx).to receive(:when_initial_provisioning_set?)
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --clean configure_postgres").and_return("Succeeded")
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check configure_postgres").and_return("Succeeded")
|
|
expect(postgres_server).to receive(:standby?).and_return(true)
|
|
expect(postgres_server).to receive(:synchronization_status).and_return("catching_up")
|
|
expect { nx.configure }.to hop("wait_catch_up")
|
|
end
|
|
|
|
it "hops to wait for read replicas if configure command is succeeded" do
|
|
expect(nx).to receive(:when_initial_provisioning_set?).and_yield
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --clean configure_postgres").and_return("Succeeded")
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check configure_postgres").and_return("Succeeded")
|
|
expect(postgres_server).to receive(:primary?).and_return(false)
|
|
expect(postgres_server).to receive(:standby?).and_return(false)
|
|
expect(postgres_server).to receive(:read_replica?).and_return(true)
|
|
expect { nx.configure }.to hop("wait_catch_up")
|
|
end
|
|
|
|
it "naps if script return unknown status" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check configure_postgres").and_return("Unknown")
|
|
expect { nx.configure }.to nap(5)
|
|
end
|
|
end
|
|
|
|
describe "#update_superuser_password" do
|
|
it "updates password and pushes restart during the initial provisioning" do
|
|
expect(nx).to receive(:when_initial_provisioning_set?).and_yield
|
|
expect(postgres_server).to receive(:run_query).with(/log_statement = 'none'.*\n.*SCRAM-SHA-256/)
|
|
expect(nx).to receive(:push).with(described_class, {}, "restart").and_call_original
|
|
expect { nx.update_superuser_password }.to hop("restart")
|
|
end
|
|
|
|
it "updates password and hops to wait during initial provisioning if restart is already executed" do
|
|
expect(nx).to receive(:when_initial_provisioning_set?).and_yield
|
|
expect(postgres_server).to receive(:run_query).with(/log_statement = 'none'.*\n.*SCRAM-SHA-256/)
|
|
expect(nx.strand).to receive(:retval).and_return({"msg" => "postgres server is restarted"})
|
|
expect(postgres_server).to receive(:primary?).and_return(true)
|
|
expect(resource).to receive(:flavor).and_return(PostgresResource::Flavor::STANDARD)
|
|
expect { nx.update_superuser_password }.to hop("wait")
|
|
end
|
|
|
|
it "updates password and hops to run_post_installation_script during initial provisioning for non-standard flavors if restart is already executed" do
|
|
expect(nx).to receive(:when_initial_provisioning_set?).and_yield
|
|
expect(postgres_server).to receive(:run_query).with(/log_statement = 'none'.*\n.*SCRAM-SHA-256/)
|
|
expect(nx.strand).to receive(:retval).and_return({"msg" => "postgres server is restarted"})
|
|
expect(postgres_server).to receive(:primary?).and_return(true)
|
|
expect(resource).to receive(:flavor).and_return(PostgresResource::Flavor::PARADEDB)
|
|
expect { nx.update_superuser_password }.to hop("run_post_installation_script")
|
|
end
|
|
|
|
it "updates password and hops to wait at times other than the initial provisioning" do
|
|
expect(nx).to receive(:when_initial_provisioning_set?)
|
|
expect(postgres_server).to receive(:run_query).with(/log_statement = 'none'.*\n.*SCRAM-SHA-256/)
|
|
expect { nx.update_superuser_password }.to hop("wait")
|
|
end
|
|
end
|
|
|
|
describe "#run_post_installation_script" do
|
|
it "runs post installation script and hops wait" do
|
|
expect(sshable).to receive(:cmd).with(/post-installation-script/)
|
|
expect { nx.run_post_installation_script }.to hop("wait")
|
|
end
|
|
end
|
|
|
|
describe "#wait_catch_up" do
|
|
it "naps if the lag is too high" do
|
|
expect(postgres_server).to receive(:lsn_caught_up).and_return(false, false)
|
|
expect { nx.wait_catch_up }.to nap(30)
|
|
expect { nx.wait_catch_up }.to nap(30)
|
|
end
|
|
|
|
it "sets the synchronization_status and hops to wait_synchronization for sync replication" do
|
|
expect(postgres_server).to receive(:lsn_caught_up).and_return(true)
|
|
expect(postgres_server).to receive(:update).with(synchronization_status: "ready")
|
|
expect(postgres_server).to receive(:incr_configure)
|
|
expect(postgres_server.resource).to receive(:ha_type).and_return(PostgresResource::HaType::SYNC)
|
|
expect { nx.wait_catch_up }.to hop("wait_synchronization")
|
|
end
|
|
|
|
it "sets the synchronization_status and hops to wait for async replication" do
|
|
expect(postgres_server).to receive(:lsn_caught_up).and_return(true)
|
|
expect(postgres_server).to receive(:update).with(synchronization_status: "ready")
|
|
expect(postgres_server).to receive(:incr_configure)
|
|
expect(postgres_server.resource).to receive(:ha_type).and_return(PostgresResource::HaType::ASYNC)
|
|
expect { nx.wait_catch_up }.to hop("wait")
|
|
end
|
|
|
|
it "hops to wait if replica and caught up" do
|
|
expect(postgres_server).to receive(:read_replica?).and_return(true)
|
|
expect(postgres_server).to receive(:lsn_caught_up).and_return(true)
|
|
expect { nx.wait_catch_up }.to hop("wait")
|
|
end
|
|
end
|
|
|
|
describe "#wait_synchronization" do
|
|
it "hops to wait if sync replication is established" do
|
|
expect(postgres_server).to receive(:run_query).and_return("quorum", "sync")
|
|
expect { nx.wait_synchronization }.to hop("wait")
|
|
expect { nx.wait_synchronization }.to hop("wait")
|
|
end
|
|
|
|
it "naps if sync replication is not established" do
|
|
expect(postgres_server).to receive(:run_query).and_return("", "async")
|
|
expect { nx.wait_synchronization }.to nap(30)
|
|
expect { nx.wait_synchronization }.to nap(30)
|
|
end
|
|
end
|
|
|
|
describe "#wait_recovery_completion" do
|
|
it "naps if it is still in recovery and wal replay is not paused" do
|
|
expect(postgres_server).to receive(:run_query).with("SELECT pg_is_in_recovery()").and_return("t")
|
|
expect(postgres_server).to receive(:run_query).with("SELECT pg_get_wal_replay_pause_state()").and_return("not paused")
|
|
expect { nx.wait_recovery_completion }.to nap(5)
|
|
end
|
|
|
|
it "naps if it cannot connect to database due to recovery" do
|
|
expect(postgres_server).to receive(:run_query).with("SELECT pg_is_in_recovery()").and_raise(Sshable::SshError.new("", nil, "Consistent recovery state has not been yet reached.", nil, nil))
|
|
expect { nx.wait_recovery_completion }.to nap(5)
|
|
end
|
|
|
|
it "raises error if it cannot connect to database due a problem other than to continueing recovery" do
|
|
expect(postgres_server).to receive(:run_query).with("SELECT pg_is_in_recovery()").and_raise(Sshable::SshError.new("", nil, "Bogus", nil, nil))
|
|
expect { nx.wait_recovery_completion }.to raise_error(Sshable::SshError)
|
|
end
|
|
|
|
it "stops wal replay and switches to new timeline if it is still in recovery but wal replay is paused" do
|
|
expect(postgres_server).to receive(:run_query).with("SELECT pg_is_in_recovery()").and_return("t")
|
|
expect(postgres_server).to receive(:run_query).with("SELECT pg_get_wal_replay_pause_state()").and_return("paused")
|
|
expect(sshable).to receive(:cmd).with("sudo -u postgres tee /etc/postgresql/wal-g.env > /dev/null", stdin: "walg config")
|
|
expect(sshable).to receive(:cmd).with("sudo tee /usr/lib/ssl/certs/blob_storage_ca.crt > /dev/null", stdin: "certs")
|
|
|
|
expect(postgres_server).to receive(:run_query).with("SELECT pg_wal_replay_resume()")
|
|
expect(Prog::Postgres::PostgresTimelineNexus).to receive(:assemble).and_return(instance_double(Strand, id: "375b1399-ec21-8eda-8859-2faee6ff6613"))
|
|
expect(postgres_server).to receive(:timeline_id=).with("375b1399-ec21-8eda-8859-2faee6ff6613")
|
|
expect(postgres_server).to receive(:timeline_access=).with("push")
|
|
expect(postgres_server).to receive(:save_changes)
|
|
expect { nx.wait_recovery_completion }.to hop("configure")
|
|
end
|
|
|
|
it "switches to new timeline if the recovery is completed" do
|
|
expect(postgres_server).to receive(:run_query).with("SELECT pg_is_in_recovery()").and_return("f")
|
|
expect(sshable).to receive(:cmd).with("sudo -u postgres tee /etc/postgresql/wal-g.env > /dev/null", stdin: "walg config")
|
|
expect(sshable).to receive(:cmd).with("sudo tee /usr/lib/ssl/certs/blob_storage_ca.crt > /dev/null", stdin: "certs")
|
|
|
|
expect(Prog::Postgres::PostgresTimelineNexus).to receive(:assemble).and_return(instance_double(Strand, id: "375b1399-ec21-8eda-8859-2faee6ff6613"))
|
|
expect(postgres_server).to receive(:timeline_id=).with("375b1399-ec21-8eda-8859-2faee6ff6613")
|
|
expect(postgres_server).to receive(:timeline_access=).with("push")
|
|
expect(postgres_server).to receive(:save_changes)
|
|
expect { nx.wait_recovery_completion }.to hop("configure")
|
|
end
|
|
end
|
|
|
|
describe "#wait" do
|
|
it "naps" do
|
|
expect { nx.wait }.to nap(6 * 60 * 60)
|
|
end
|
|
|
|
it "hops to prepare_for_take_over if take_over is set" do
|
|
expect(nx).to receive(:when_take_over_set?).and_yield
|
|
expect { nx.wait }.to hop("prepare_for_take_over")
|
|
end
|
|
|
|
it "hops to refresh_certificates if refresh_certificates is set" do
|
|
expect(nx).to receive(:when_refresh_certificates_set?).and_yield
|
|
expect { nx.wait }.to hop("refresh_certificates")
|
|
end
|
|
|
|
it "hops to update_superuser_password if update_superuser_password is set" do
|
|
expect(nx).to receive(:when_update_superuser_password_set?).and_yield
|
|
expect { nx.wait }.to hop("update_superuser_password")
|
|
end
|
|
|
|
it "hops to unavailable if checkup is set and the server is not available" do
|
|
expect(nx).to receive(:when_checkup_set?).and_yield
|
|
expect(nx).to receive(:available?).and_return(false)
|
|
expect { nx.wait }.to hop("unavailable")
|
|
end
|
|
|
|
it "naps if checkup is set but the server is available" do
|
|
expect(nx).to receive(:when_checkup_set?).and_yield
|
|
expect(nx).to receive(:available?).and_return(true)
|
|
expect { nx.wait }.to nap(6 * 60 * 60)
|
|
end
|
|
|
|
it "hops to configure_metrics if configure_metrics is set" do
|
|
expect(nx).to receive(:when_configure_metrics_set?).and_yield
|
|
expect { nx.wait }.to hop("configure_metrics")
|
|
end
|
|
|
|
it "hops to configure if configure is set" do
|
|
expect(nx).to receive(:when_configure_set?).and_yield
|
|
expect { nx.wait }.to hop("configure")
|
|
end
|
|
|
|
it "decrements and calls refresh_walg_credentials if refresh_walg_credentials is set" do
|
|
expect(nx).to receive(:when_refresh_walg_credentials_set?).and_yield
|
|
expect(nx).to receive(:decr_refresh_walg_credentials)
|
|
expect(nx).to receive(:refresh_walg_credentials)
|
|
expect { nx.wait }.to nap(6 * 60 * 60)
|
|
end
|
|
|
|
it "pushes restart if restart is set" do
|
|
expect(nx).to receive(:when_restart_set?).and_yield
|
|
expect(nx).to receive(:push).with(described_class, {}, "restart").and_call_original
|
|
expect { nx.wait }.to hop("restart")
|
|
end
|
|
|
|
it "promotes" do
|
|
expect(nx).to receive(:when_promote_set?).and_yield
|
|
expect(nx).to receive(:switch_to_new_timeline)
|
|
expect { nx.wait }.to hop("taking_over")
|
|
end
|
|
|
|
describe "read replica" do
|
|
before do
|
|
expect(postgres_server).to receive(:read_replica?).and_return(true)
|
|
expect(postgres_server.resource).to receive(:parent).and_return(true)
|
|
end
|
|
|
|
it "checks if it was already lagging and the lag continues, if so, starts recycling" do
|
|
expect(postgres_server).to receive(:lsn_caught_up).and_return(false)
|
|
expect(postgres_server).to receive(:current_lsn).and_return("1/A")
|
|
|
|
expect(nx.strand).to receive(:stack).and_return([{"lsn" => "1/A"}]).at_least(:once)
|
|
expect(postgres_server).to receive(:lsn_diff).with("1/A", "1/A").and_return(0)
|
|
expect(postgres_server).to receive(:recycle_set?).and_return(false)
|
|
expect(postgres_server).to receive(:incr_recycle)
|
|
expect { nx.wait }.to nap(60)
|
|
end
|
|
|
|
it "does not increment recycle if it is incremented already" do
|
|
expect(postgres_server).to receive(:lsn_caught_up).and_return(false)
|
|
expect(postgres_server).to receive(:current_lsn).and_return("1/A")
|
|
|
|
expect(nx.strand).to receive(:stack).and_return([{"lsn" => "1/A"}]).at_least(:once)
|
|
expect(postgres_server).to receive(:lsn_diff).with("1/A", "1/A").and_return(0)
|
|
expect(postgres_server).to receive(:recycle_set?).and_return(true)
|
|
expect(postgres_server).not_to receive(:incr_recycle)
|
|
expect { nx.wait }.to nap(60)
|
|
end
|
|
|
|
it "checks if it wasn't already lagging but the lag exists, if so, update the stack and nap" do
|
|
expect(postgres_server).to receive(:lsn_caught_up).and_return(false)
|
|
expect(postgres_server).to receive(:current_lsn).and_return("1/A")
|
|
|
|
expect(nx.strand).to receive(:stack).and_return([{}]).at_least(:once)
|
|
expect(nx).to receive(:update_stack_lsn).with("1/A")
|
|
expect { nx.wait }.to nap(900)
|
|
end
|
|
|
|
it "checks if there is no lag, simply naps" do
|
|
expect(postgres_server).to receive(:lsn_caught_up).and_return(true)
|
|
expect { nx.wait }.to nap(60)
|
|
end
|
|
|
|
it "checks if there was a lag, and it still exist but we are progressing, so, we update the stack and nap" do
|
|
expect(postgres_server).to receive(:lsn_caught_up).and_return(false)
|
|
expect(postgres_server).to receive(:current_lsn).and_return("1/A")
|
|
|
|
expect(nx.strand).to receive(:stack).and_return([{"lsn" => "1/9"}]).at_least(:once)
|
|
expect(postgres_server).to receive(:lsn_diff).with("1/A", "1/9").and_return(1)
|
|
expect(nx).to receive(:decr_recycle)
|
|
expect(nx).to receive(:update_stack_lsn).with("1/A")
|
|
expect { nx.wait }.to nap(900)
|
|
end
|
|
end
|
|
end
|
|
|
|
describe "#unavailable" do
|
|
it "hops to wait if the server is available" do
|
|
expect(postgres_server).to receive(:trigger_failover).and_return(false)
|
|
expect(nx).to receive(:available?).and_return(true)
|
|
expect { nx.unavailable }.to hop("wait")
|
|
end
|
|
|
|
it "buds restart if the server is not available" do
|
|
expect(postgres_server).to receive(:trigger_failover).and_return(false)
|
|
expect(nx).to receive(:available?).and_return(false)
|
|
expect(nx).to receive(:bud).with(described_class, {}, :restart)
|
|
expect { nx.unavailable }.to nap(5)
|
|
end
|
|
|
|
it "does not bud restart if there is already one restart going on" do
|
|
Strand.create(parent_id: st.id, prog: "Postgres::PostgresServerNexus", label: "restart", stack: [{}], lease: Time.now + 10)
|
|
expect(postgres_server).to receive(:trigger_failover).and_return(false)
|
|
expect { nx.unavailable }.to nap(5)
|
|
expect(Strand.where(prog: "Postgres::PostgresServerNexus", label: "restart").count).to eq 1
|
|
end
|
|
|
|
it "trigger_failover succeeds, naps 0" do
|
|
expect(postgres_server).to receive(:trigger_failover).and_return(true)
|
|
expect { nx.unavailable }.to nap(0)
|
|
end
|
|
end
|
|
|
|
describe "#prepare_for_take_over" do
|
|
it "naps if primary still exists" do
|
|
expect(nx).to receive(:decr_take_over)
|
|
representative_server = instance_double(PostgresServer, id: "something", vm: instance_double(Vm, sshable: instance_double(Sshable)))
|
|
expect(representative_server.vm.sshable).to receive(:cmd).with("sudo pg_ctlcluster 16 main stop -m immediate").and_raise(Sshable::SshError.new("", "", "", "", ""))
|
|
expect(representative_server).to receive(:incr_destroy)
|
|
expect(postgres_server.resource).to receive(:representative_server).and_return(representative_server).at_least(:once)
|
|
expect { nx.prepare_for_take_over }.to nap(5)
|
|
end
|
|
|
|
it "hops to taking_over if primary still exists" do
|
|
expect(nx).to receive(:decr_take_over)
|
|
expect(postgres_server.resource).to receive(:representative_server).and_return(nil)
|
|
expect { nx.prepare_for_take_over }.to hop("taking_over")
|
|
end
|
|
end
|
|
|
|
describe "#taking_over" do
|
|
it "triggers promote if promote command is not sent yet or failed" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer 'sudo pg_ctlcluster 16 main promote' promote_postgres").twice
|
|
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check promote_postgres").and_return("NotStarted", "Failed")
|
|
expect { nx.taking_over }.to nap(0)
|
|
expect { nx.taking_over }.to nap(0)
|
|
end
|
|
|
|
it "updates the metadata and hops to configure if promote command is succeeded" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check promote_postgres").and_return("Succeeded")
|
|
|
|
expect(postgres_server).to receive(:update).with(timeline_access: "push", representative_at: anything, synchronization_status: "ready")
|
|
expect(postgres_server.resource).to receive(:incr_refresh_dns_record)
|
|
expect(postgres_server).to receive(:primary?).and_return(true)
|
|
expect(postgres_server).to receive(:incr_configure)
|
|
expect(postgres_server).to receive(:incr_configure_metrics)
|
|
expect(postgres_server).to receive(:incr_restart)
|
|
|
|
standby = instance_double(PostgresServer, primary?: false)
|
|
expect(standby).to receive(:update).with(synchronization_status: "catching_up")
|
|
expect(standby).to receive(:incr_configure)
|
|
expect(standby).to receive(:incr_configure_metrics)
|
|
|
|
expect(postgres_server.resource).to receive(:servers).at_least(:once).and_return([postgres_server, standby])
|
|
|
|
expect { nx.taking_over }.to hop("configure")
|
|
end
|
|
|
|
it "naps if script return unknown status" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check promote_postgres").and_return("Unknown")
|
|
expect { nx.taking_over }.to nap(5)
|
|
end
|
|
|
|
describe "read_replica" do
|
|
it "updates the representative server, refreshes dns and destroys the old representative_server and hops to configure when read_replica" do
|
|
time = Time.now
|
|
expect(postgres_server).to receive(:read_replica?).and_return(true)
|
|
expect(Time).to receive(:now).and_return(time)
|
|
expect(postgres_server).to receive(:update).with(representative_at: time)
|
|
expect(postgres_server.resource).to receive(:incr_refresh_dns_record)
|
|
expect(postgres_server.resource).to receive(:servers).at_least(:once).and_return([postgres_server])
|
|
expect(postgres_server).to receive(:incr_configure_metrics)
|
|
expect { nx.taking_over }.to hop("configure")
|
|
end
|
|
end
|
|
end
|
|
|
|
describe "#destroy" do
|
|
it "deletes resources and exits" do
|
|
expect(postgres_server.vm).to receive(:incr_destroy)
|
|
expect(postgres_server).to receive(:destroy)
|
|
|
|
expect { nx.destroy }.to exit({"msg" => "postgres server is deleted"})
|
|
end
|
|
end
|
|
|
|
describe "#restart" do
|
|
it "restarts and exits" do
|
|
expect(sshable).to receive(:cmd).with("sudo postgres/bin/restart 16")
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl restart pgbouncer@*.service")
|
|
expect { nx.restart }.to exit({"msg" => "postgres server is restarted"})
|
|
end
|
|
end
|
|
|
|
describe "#refresh_walg_credentials" do
|
|
it "returns nil if blob storage is not configures" do
|
|
expect(postgres_server.timeline).to receive(:blob_storage).and_return(nil)
|
|
expect(nx.refresh_walg_credentials).to be_nil
|
|
end
|
|
end
|
|
|
|
describe "#available?" do
|
|
before do
|
|
expect(sshable).to receive(:invalidate_cache_entry)
|
|
end
|
|
|
|
it "returns true if health check is successful" do
|
|
expect(postgres_server).to receive(:run_query).with("SELECT 1").and_return("1")
|
|
expect(nx.available?).to be(true)
|
|
end
|
|
|
|
it "returns true if the database is in crash recovery" do
|
|
expect(postgres_server).to receive(:run_query).with("SELECT 1").and_raise(Sshable::SshError)
|
|
expect(sshable).to receive(:cmd).with("sudo tail -n 5 /dat/16/data/pg_log/postgresql.log").and_return("redo in progress")
|
|
expect(nx.available?).to be(true)
|
|
end
|
|
|
|
it "returns false otherwise" do
|
|
expect(postgres_server).to receive(:run_query).with("SELECT 1").and_raise(Sshable::SshError)
|
|
expect(sshable).to receive(:cmd).with("sudo tail -n 5 /dat/16/data/pg_log/postgresql.log").and_return("not doing redo")
|
|
expect(nx.available?).to be(false)
|
|
end
|
|
end
|
|
|
|
describe ".update_stack_lsn" do
|
|
it "updates the lsn in the current frame" do
|
|
frame = [{"lsn" => "hello"}]
|
|
nx.strand.stack = frame
|
|
expect(nx.strand).to receive(:modified!)
|
|
nx.update_stack_lsn("update")
|
|
expect(frame.first["lsn"]).to eq("update")
|
|
end
|
|
end
|
|
end
|