It hasn't been necessary to use create_with_id since
ebc79622df
, in December 2024.
I have plans to introduce:
```ruby
def create_with_id(id, values)
obj = new(values)
obj.id = id
obj.save_changes
end
```
This will make it easier to use the same id when creating
multiple objects. The first step is removing the existing
uses of create_with_id.
1035 lines
55 KiB
Ruby
1035 lines
55 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require_relative "../../model/spec_helper"
|
|
|
|
RSpec.describe Prog::Postgres::PostgresServerNexus do
|
|
subject(:nx) { described_class.new(st) }
|
|
|
|
let(:st) { Strand.create(id: "0d77964d-c416-8edb-9237-7e7dd5d6fcf8", prog: "Postgres::PostgresServerNexus", label: "start") }
|
|
|
|
let(:postgres_server) {
|
|
instance_double(
|
|
PostgresServer,
|
|
id: "0d77964d-c416-8edb-9237-7e7dd5d6fcf8",
|
|
ubid: "pgubid",
|
|
timeline: instance_double(
|
|
PostgresTimeline,
|
|
id: "f6644aae-9759-8ada-9aef-9b6cfccdc167",
|
|
generate_walg_config: "walg config",
|
|
blob_storage: instance_double(MinioCluster, root_certs: "certs"),
|
|
aws?: false
|
|
),
|
|
vm: instance_double(
|
|
Vm,
|
|
id: "1c7d59ee-8d46-8374-9553-6144490ecec5",
|
|
sshable: sshable,
|
|
ephemeral_net4: "1.1.1.1",
|
|
private_subnets: [instance_double(PrivateSubnet)]
|
|
),
|
|
storage_device_paths: ["/dev/vdb"]
|
|
)
|
|
}
|
|
|
|
let(:resource) {
|
|
instance_double(
|
|
PostgresResource,
|
|
ubid: "pgresourcesubid",
|
|
root_cert_1: "root_cert_1",
|
|
root_cert_2: "root_cert_2",
|
|
server_cert: "server_cert",
|
|
server_cert_key: "server_cert_key",
|
|
superuser_password: "dummy-password",
|
|
version: "16",
|
|
representative_server: postgres_server,
|
|
metric_destinations: [instance_double(PostgresMetricDestination, ubid: "pgmetricubid", url: "url", username: "username", password: "password")],
|
|
ca_certificates: "root_cert_1\nroot_cert_2",
|
|
location_id: Location::HETZNER_FSN1_ID,
|
|
project: instance_double(Project, get_ff_aws_cloudwatch_logs: true)
|
|
)
|
|
}
|
|
|
|
let(:sshable) { instance_double(Sshable) }
|
|
|
|
before do
|
|
allow(nx).to receive(:postgres_server).and_return(postgres_server)
|
|
allow(postgres_server).to receive_messages(resource: resource, read_replica?: false)
|
|
end
|
|
|
|
describe ".assemble" do
|
|
let(:user_project) { Project.create(name: "default") }
|
|
let(:postgres_resource) {
|
|
PostgresResource.create(
|
|
project_id: user_project.id,
|
|
location_id: Location::HETZNER_FSN1_ID,
|
|
name: "pg-name",
|
|
target_vm_size: "standard-2",
|
|
target_storage_size_gib: 128,
|
|
superuser_password: "dummy-password"
|
|
)
|
|
}
|
|
|
|
it "creates postgres server and vm with sshable" do
|
|
postgres_timeline = PostgresTimeline.create
|
|
postgres_project = Project.create(name: "default")
|
|
expect(Config).to receive(:postgres_service_project_id).and_return(postgres_project.id).at_least(:once)
|
|
|
|
st = described_class.assemble(resource_id: postgres_resource.id, timeline_id: postgres_timeline.id, timeline_access: "push", representative_at: Time.now)
|
|
postgres_server = PostgresServer[st.id]
|
|
expect(postgres_server).not_to be_nil
|
|
expect(postgres_server.vm).not_to be_nil
|
|
expect(postgres_server.vm.sshable).not_to be_nil
|
|
|
|
st = described_class.assemble(resource_id: postgres_resource.id, timeline_id: postgres_timeline.id, timeline_access: "push")
|
|
expect(PostgresServer[st.id].synchronization_status).to eq("catching_up")
|
|
end
|
|
|
|
it "picks correct base image for Lantern" do
|
|
expect(PostgresResource).to receive(:[]).and_return(postgres_resource)
|
|
expect(postgres_resource).to receive(:flavor).and_return(PostgresResource::Flavor::LANTERN).at_least(:once)
|
|
expect(Prog::Vm::Nexus).to receive(:assemble_with_sshable).with(anything, hash_including(boot_image: "postgres16-lantern-ubuntu-2204")).and_return(instance_double(Strand, id: "62c62ddb-5b5a-4e9e-b534-e73c16f86bcb"))
|
|
expect(PostgresServer).to receive(:create).and_return(instance_double(PostgresServer, id: "5c13fd6a-25c2-4fa4-be48-2846f127526a"))
|
|
described_class.assemble(resource_id: postgres_resource.id, timeline_id: "91588cda-7122-4d6a-b01c-f33c30cb17d8", timeline_access: "push", representative_at: Time.now)
|
|
end
|
|
|
|
it "picks correct base image for AWS-pg16" do
|
|
expect(PostgresResource).to receive(:[]).and_return(postgres_resource)
|
|
loc = Location.create(
|
|
name: "us-west-2",
|
|
display_name: "aws-us-west-2",
|
|
ui_name: "aws-us-west-2",
|
|
visible: true,
|
|
provider: "aws",
|
|
project_id: user_project.id
|
|
)
|
|
expect(postgres_resource).to receive(:location).and_return(loc).at_least(:once)
|
|
expect(postgres_resource).to receive(:version).and_return("16").at_least(:once)
|
|
expect(Prog::Vm::Nexus).to receive(:assemble_with_sshable).and_return(instance_double(Strand, id: "62c62ddb-5b5a-4e9e-b534-e73c16f86bcb"))
|
|
expect(PostgresServer).to receive(:create).and_return(instance_double(PostgresServer, id: "5c13fd6a-25c2-4fa4-be48-2846f127526a"))
|
|
described_class.assemble(resource_id: postgres_resource.id, timeline_id: "91588cda-7122-4d6a-b01c-f33c30cb17d8", timeline_access: "push", representative_at: Time.now)
|
|
end
|
|
|
|
it "picks correct base image for AWS-pg17" do
|
|
expect(PostgresResource).to receive(:[]).and_return(postgres_resource)
|
|
loc = Location.create(
|
|
name: "us-west-2",
|
|
display_name: "aws-us-west-2",
|
|
ui_name: "aws-us-west-2",
|
|
visible: true,
|
|
provider: "aws",
|
|
project_id: user_project.id
|
|
)
|
|
expect(postgres_resource).to receive(:version).and_return("17").at_least(:once)
|
|
expect(postgres_resource).to receive(:location).and_return(loc).at_least(:once)
|
|
expect(postgres_resource).to receive(:location_id).and_return(loc.id).at_least(:once)
|
|
expect(Prog::Vm::Nexus).to receive(:assemble_with_sshable).and_return(instance_double(Strand, id: "62c62ddb-5b5a-4e9e-b534-e73c16f86bcb"))
|
|
expect(PostgresServer).to receive(:create).and_return(instance_double(PostgresServer, id: "5c13fd6a-25c2-4fa4-be48-2846f127526a"))
|
|
described_class.assemble(resource_id: postgres_resource.id, timeline_id: "91588cda-7122-4d6a-b01c-f33c30cb17d8", timeline_access: "push", representative_at: Time.now)
|
|
end
|
|
|
|
it "raises error if the version is not supported for AWS" do
|
|
expect(PostgresResource).to receive(:[]).and_return(postgres_resource)
|
|
loc = Location.create(
|
|
name: "us-west-2",
|
|
display_name: "aws-us-west-2",
|
|
ui_name: "aws-us-west-2",
|
|
visible: true,
|
|
provider: "aws",
|
|
project_id: user_project.id
|
|
)
|
|
expect(postgres_resource).to receive(:location).and_return(loc).at_least(:once)
|
|
expect(postgres_resource).to receive(:version).and_return("18").at_least(:once)
|
|
expect {
|
|
described_class.assemble(resource_id: postgres_resource.id, timeline_id: "91588cda-7122-4d6a-b01c-f33c30cb17d8", timeline_access: "push", representative_at: Time.now)
|
|
}.to raise_error NoMethodError, "undefined method 'aws_ami_id' for nil"
|
|
end
|
|
|
|
it "errors out for unknown flavor" do
|
|
expect(PostgresResource).to receive(:[]).and_return(postgres_resource)
|
|
expect(postgres_resource).to receive(:flavor).and_return("boring_flavor").at_least(:once)
|
|
expect {
|
|
described_class.assemble(resource_id: postgres_resource.id, timeline_id: "91588cda-7122-4d6a-b01c-f33c30cb17d8", timeline_access: "push", representative_at: Time.now)
|
|
}.to raise_error RuntimeError, "Unknown PostgreSQL flavor: boring_flavor"
|
|
end
|
|
end
|
|
|
|
describe "#before_run" do
|
|
it "hops to destroy when needed" do
|
|
expect(nx).to receive(:when_destroy_set?).and_yield
|
|
expect(postgres_server).to receive(:resource).and_return(nil)
|
|
expect { nx.before_run }.to hop("destroy")
|
|
end
|
|
|
|
it "does not hop to destroy if already in the destroy state" do
|
|
expect(nx).to receive(:when_destroy_set?).and_yield
|
|
expect(resource).to receive(:strand).and_return(nil)
|
|
expect(nx.strand).to receive(:label).and_return("destroy").at_least(:once)
|
|
expect { nx.before_run }.not_to hop("destroy")
|
|
end
|
|
|
|
it "cancels the destroy if the server is picked up for take over" do
|
|
expect(nx).to receive(:when_destroy_set?).and_yield
|
|
expect(resource).to receive(:strand).and_return(instance_double(Strand, label: "wait"))
|
|
expect(postgres_server).to receive(:taking_over?).and_return(true)
|
|
expect(nx).to receive(:decr_destroy)
|
|
expect { nx.before_run }.not_to hop("destroy")
|
|
end
|
|
|
|
it "pops additional operations from stack" do
|
|
expect(nx).to receive(:when_destroy_set?).and_yield
|
|
expect(resource).to receive(:strand).and_return(instance_double(Strand, label: "destroy"))
|
|
expect(nx.strand).to receive(:label).and_return("destroy").at_least(:once)
|
|
expect(nx.strand.stack).to receive(:count).and_return(2)
|
|
expect { nx.before_run }.to exit({"msg" => "operation is cancelled due to the destruction of the postgres server"})
|
|
end
|
|
end
|
|
|
|
describe "#start" do
|
|
it "naps if vm not ready" do
|
|
expect(postgres_server.vm).to receive(:strand).and_return(instance_double(Strand, label: "prep"))
|
|
expect { nx.start }.to nap(5)
|
|
end
|
|
|
|
it "update sshable host and hops" do
|
|
expect(postgres_server.vm).to receive(:strand).and_return(instance_double(Strand, label: "wait"))
|
|
expect(postgres_server).to receive(:incr_initial_provisioning)
|
|
expect { nx.start }.to hop("bootstrap_rhizome")
|
|
end
|
|
end
|
|
|
|
describe "#bootstrap_rhizome" do
|
|
it "buds a bootstrap rhizome process" do
|
|
expect(postgres_server).to receive(:primary?).and_return(true)
|
|
expect(nx).to receive(:bud).with(Prog::BootstrapRhizome, {"target_folder" => "postgres", "subject_id" => postgres_server.vm.id, "user" => "ubi"})
|
|
expect { nx.bootstrap_rhizome }.to hop("wait_bootstrap_rhizome")
|
|
end
|
|
|
|
it "sets longer deadline for non-primary servers" do
|
|
expect(postgres_server).to receive(:primary?).and_return(false)
|
|
expect(nx).to receive(:register_deadline).with("wait", 120 * 60)
|
|
expect { nx.bootstrap_rhizome }.to hop("wait_bootstrap_rhizome")
|
|
end
|
|
end
|
|
|
|
describe "#wait_bootstrap_rhizome" do
|
|
it "hops to mount_data_disk if there are no sub-programs running" do
|
|
expect { nx.wait_bootstrap_rhizome }.to hop("mount_data_disk")
|
|
end
|
|
|
|
it "donates if there are sub-programs running" do
|
|
Strand.create(parent_id: st.id, prog: "BootstrapRhizome", label: "start", stack: [{}], lease: Time.now + 10)
|
|
expect { nx.wait_bootstrap_rhizome }.to nap(5)
|
|
end
|
|
end
|
|
|
|
describe "#mount_data_disk" do
|
|
it "formats data disk if format command is not sent yet or failed" do
|
|
expect(postgres_server).to receive(:storage_device_paths).and_return(["/dev/vdb"])
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer 'sudo mkfs --type ext4 /dev/vdb' format_disk")
|
|
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check format_disk").and_return("NotStarted")
|
|
expect { nx.mount_data_disk }.to nap(5)
|
|
end
|
|
|
|
it "formats data disk correctly when there are multiple storage volumes" do
|
|
expect(postgres_server).to receive(:storage_device_paths).and_return(["/dev/nvme1n1", "/dev/nvme2n1"])
|
|
expect(sshable).to receive(:cmd).with("sudo mdadm --create --verbose /dev/md0 --level=0 --raid-devices=2 /dev/nvme1n1 /dev/nvme2n1")
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer 'sudo mkfs --type ext4 /dev/md0' format_disk")
|
|
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check format_disk").and_return("NotStarted")
|
|
expect { nx.mount_data_disk }.to nap(5)
|
|
end
|
|
|
|
it "mounts data disk if format disk is succeeded and hops to configure_walg_credentials" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check format_disk").and_return("Succeeded")
|
|
expect(sshable).to receive(:cmd).with("sudo mkdir -p /dat")
|
|
expect(sshable).to receive(:cmd).with("sudo common/bin/add_to_fstab /dev/vdb /dat ext4 defaults 0 0")
|
|
expect(sshable).to receive(:cmd).with("sudo mount /dev/vdb /dat")
|
|
expect { nx.mount_data_disk }.to hop("configure_walg_credentials")
|
|
end
|
|
|
|
it "mounts data disk correctly when there are multiple storage volumes" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check format_disk").and_return("Succeeded")
|
|
expect(postgres_server).to receive(:storage_device_paths).and_return(["/dev/nvme1n1", "/dev/nvme2n1"])
|
|
expect(sshable).to receive(:cmd).with("sudo mdadm --detail --scan | sudo tee -a /etc/mdadm/mdadm.conf")
|
|
expect(sshable).to receive(:cmd).with("sudo update-initramfs -u")
|
|
expect(sshable).to receive(:cmd).with("sudo mkdir -p /dat")
|
|
expect(sshable).to receive(:cmd).with("sudo common/bin/add_to_fstab /dev/md0 /dat ext4 defaults 0 0")
|
|
expect(sshable).to receive(:cmd).with("sudo mount /dev/md0 /dat")
|
|
expect { nx.mount_data_disk }.to hop("configure_walg_credentials")
|
|
end
|
|
|
|
it "naps if script return unknown status" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check format_disk").and_return("Unknown")
|
|
expect { nx.mount_data_disk }.to nap(5)
|
|
end
|
|
end
|
|
|
|
describe "#configure_walg_credentials" do
|
|
it "hops to initialize_empty_database if the server is primary" do
|
|
expect(sshable).to receive(:cmd).with("sudo -u postgres tee /etc/postgresql/wal-g.env > /dev/null", stdin: "walg config")
|
|
expect(sshable).to receive(:cmd).with("sudo tee /usr/lib/ssl/certs/blob_storage_ca.crt > /dev/null", stdin: "certs")
|
|
expect(postgres_server).to receive(:primary?).and_return(true)
|
|
|
|
expect { nx.configure_walg_credentials }.to hop("initialize_empty_database")
|
|
end
|
|
|
|
it "hops to initialize_database_from_backup if the server is not primary" do
|
|
expect(sshable).to receive(:cmd).with("sudo -u postgres tee /etc/postgresql/wal-g.env > /dev/null", stdin: "walg config")
|
|
expect(sshable).to receive(:cmd).with("sudo tee /usr/lib/ssl/certs/blob_storage_ca.crt > /dev/null", stdin: "certs")
|
|
expect(postgres_server).to receive(:primary?).and_return(false)
|
|
expect { nx.configure_walg_credentials }.to hop("initialize_database_from_backup")
|
|
end
|
|
|
|
it "doesn't put the blob_storage_ca if the timeline is aws" do
|
|
expect(postgres_server.timeline).to receive(:aws?).and_return(true)
|
|
expect(sshable).to receive(:cmd).with("sudo -u postgres tee /etc/postgresql/wal-g.env > /dev/null", stdin: "walg config")
|
|
expect(sshable).not_to receive(:cmd).with("sudo tee /usr/lib/ssl/certs/blob_storage_ca.crt > /dev/null", stdin: "certs")
|
|
expect(postgres_server).to receive(:primary?).and_return(true)
|
|
|
|
expect { nx.configure_walg_credentials }.to hop("initialize_empty_database")
|
|
end
|
|
end
|
|
|
|
describe "#initialize_empty_database" do
|
|
it "triggers initialize_empty_database if initialize_empty_database command is not sent yet or failed" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer 'sudo postgres/bin/initialize-empty-database 16' initialize_empty_database").twice
|
|
|
|
# NotStarted
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check initialize_empty_database").and_return("NotStarted")
|
|
expect { nx.initialize_empty_database }.to nap(5)
|
|
|
|
# Failed
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check initialize_empty_database").and_return("Failed")
|
|
expect { nx.initialize_empty_database }.to nap(5)
|
|
end
|
|
|
|
it "hops to refresh_certificates if initialize_empty_database command is succeeded" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check initialize_empty_database").and_return("Succeeded")
|
|
expect { nx.initialize_empty_database }.to hop("refresh_certificates")
|
|
end
|
|
|
|
it "naps if script return unknown status" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check initialize_empty_database").and_return("Unknown")
|
|
expect { nx.initialize_empty_database }.to nap(5)
|
|
end
|
|
end
|
|
|
|
describe "#initialize_database_from_backup" do
|
|
it "triggers initialize_database_from_backup if initialize_database_from_backup command is not sent yet or failed" do
|
|
expect(postgres_server.resource).to receive(:restore_target).and_return(Time.now).twice
|
|
expect(postgres_server.timeline).to receive(:latest_backup_label_before_target).and_return("backup-label").twice
|
|
expect(postgres_server).to receive(:standby?).and_return(false).twice
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer 'sudo postgres/bin/initialize-database-from-backup 16 backup-label' initialize_database_from_backup").twice
|
|
|
|
# NotStarted
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check initialize_database_from_backup").and_return("NotStarted")
|
|
expect { nx.initialize_database_from_backup }.to nap(5)
|
|
|
|
# Failed
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check initialize_database_from_backup").and_return("Failed")
|
|
expect { nx.initialize_database_from_backup }.to nap(5)
|
|
end
|
|
|
|
it "hops to refresh_certificates if initialize_database_from_backup command is succeeded" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check initialize_database_from_backup").and_return("Succeeded")
|
|
expect { nx.initialize_database_from_backup }.to hop("refresh_certificates")
|
|
end
|
|
|
|
it "naps if script return unknown status" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check initialize_database_from_backup").and_return("Unknown")
|
|
expect { nx.initialize_database_from_backup }.to nap(5)
|
|
end
|
|
|
|
it "triggers initialize_database_from_backup with LATEST as backup_label for standbys" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check initialize_database_from_backup").and_return("NotStarted")
|
|
expect(postgres_server).to receive(:standby?).and_return(true)
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer 'sudo postgres/bin/initialize-database-from-backup 16 LATEST' initialize_database_from_backup")
|
|
expect { nx.initialize_database_from_backup }.to nap(5)
|
|
end
|
|
end
|
|
|
|
describe "#refresh_certificates" do
|
|
it "waits for certificate creation by the parent resource" do
|
|
expect(postgres_server.resource).to receive(:server_cert).and_return(nil)
|
|
expect { nx.refresh_certificates }.to nap(5)
|
|
end
|
|
|
|
it "pushes certificates to vm and hops to configure_prometheus during initial provisioning" do
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/ssl/certs/ca.crt > /dev/null", stdin: "root_cert_1\nroot_cert_2")
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/ssl/certs/server.crt > /dev/null", stdin: "server_cert")
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/ssl/certs/server.key > /dev/null", stdin: "server_cert_key")
|
|
expect(sshable).to receive(:cmd).with("sudo chgrp cert_readers /etc/ssl/certs/ca.crt && sudo chmod 640 /etc/ssl/certs/ca.crt")
|
|
expect(sshable).to receive(:cmd).with("sudo chgrp cert_readers /etc/ssl/certs/server.crt && sudo chmod 640 /etc/ssl/certs/server.crt")
|
|
expect(sshable).to receive(:cmd).with("sudo chgrp cert_readers /etc/ssl/certs/server.key && sudo chmod 640 /etc/ssl/certs/server.key")
|
|
|
|
expect(nx).to receive(:refresh_walg_credentials)
|
|
|
|
expect(nx).to receive(:when_initial_provisioning_set?).and_yield
|
|
expect { nx.refresh_certificates }.to hop("configure_metrics")
|
|
end
|
|
|
|
it "hops to wait at times other than the initial provisioning" do
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/ssl/certs/ca.crt > /dev/null", stdin: "root_cert_1\nroot_cert_2")
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/ssl/certs/server.crt > /dev/null", stdin: "server_cert")
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/ssl/certs/server.key > /dev/null", stdin: "server_cert_key")
|
|
expect(sshable).to receive(:cmd).with("sudo chgrp cert_readers /etc/ssl/certs/ca.crt && sudo chmod 640 /etc/ssl/certs/ca.crt")
|
|
expect(sshable).to receive(:cmd).with("sudo chgrp cert_readers /etc/ssl/certs/server.crt && sudo chmod 640 /etc/ssl/certs/server.crt")
|
|
expect(sshable).to receive(:cmd).with("sudo chgrp cert_readers /etc/ssl/certs/server.key && sudo chmod 640 /etc/ssl/certs/server.key")
|
|
expect(sshable).to receive(:cmd).with("sudo -u postgres pg_ctlcluster 16 main reload")
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl reload pgbouncer@*.service")
|
|
expect(nx).to receive(:refresh_walg_credentials)
|
|
expect { nx.refresh_certificates }.to hop("wait")
|
|
end
|
|
end
|
|
|
|
describe "#configure_metrics" do
|
|
let(:metrics_config) { {interval: "30s", endpoints: ["https://localhost:9100/metrics"], metrics_dir: "/home/ubi/postgres/metrics"} }
|
|
|
|
it "configures prometheus and metrics during initial provisioning" do
|
|
expect(nx).to receive(:when_initial_provisioning_set?).and_yield
|
|
expect(sshable).to receive(:cmd).with("sudo -u prometheus tee /home/prometheus/web-config.yml > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo -u prometheus tee /home/prometheus/prometheus.yml > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl enable --now postgres_exporter")
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl enable --now node_exporter")
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl enable --now prometheus")
|
|
|
|
# Configure metrics expectations
|
|
expect(postgres_server).to receive(:metrics_config).and_return(metrics_config)
|
|
expect(sshable).to receive(:cmd).with("mkdir -p /home/ubi/postgres/metrics")
|
|
expect(sshable).to receive(:cmd).with("tee /home/ubi/postgres/metrics/config.json > /dev/null", stdin: metrics_config.to_json)
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/systemd/system/postgres-metrics.service > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/systemd/system/postgres-metrics.timer > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl daemon-reload")
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl enable --now postgres-metrics.timer")
|
|
|
|
expect { nx.configure_metrics }.to hop("setup_hugepages")
|
|
end
|
|
|
|
it "configures prometheus and metrics during initial provisioning and hops to setup_cloudwatch if timeline is AWS" do
|
|
expect(nx).to receive(:when_initial_provisioning_set?).and_yield
|
|
expect(sshable).to receive(:cmd).with("sudo -u prometheus tee /home/prometheus/web-config.yml > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo -u prometheus tee /home/prometheus/prometheus.yml > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl enable --now postgres_exporter")
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl enable --now node_exporter")
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl enable --now prometheus")
|
|
|
|
# Configure metrics expectations
|
|
expect(postgres_server).to receive(:metrics_config).and_return(metrics_config)
|
|
expect(sshable).to receive(:cmd).with("mkdir -p /home/ubi/postgres/metrics")
|
|
expect(sshable).to receive(:cmd).with("tee /home/ubi/postgres/metrics/config.json > /dev/null", stdin: metrics_config.to_json)
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/systemd/system/postgres-metrics.service > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/systemd/system/postgres-metrics.timer > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl daemon-reload")
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl enable --now postgres-metrics.timer")
|
|
|
|
expect(postgres_server.timeline).to receive(:aws?).and_return(true)
|
|
expect { nx.configure_metrics }.to hop("setup_cloudwatch")
|
|
end
|
|
|
|
it "configures prometheus and metrics and hops to wait at times other than initial provisioning" do
|
|
# Prometheus expectations
|
|
expect(sshable).to receive(:cmd).with("sudo -u prometheus tee /home/prometheus/web-config.yml > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo -u prometheus tee /home/prometheus/prometheus.yml > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl reload postgres_exporter || sudo systemctl restart postgres_exporter")
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl reload node_exporter || sudo systemctl restart node_exporter")
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl reload prometheus || sudo systemctl restart prometheus")
|
|
|
|
# Configure metrics expectations
|
|
expect(postgres_server).to receive(:metrics_config).and_return(metrics_config)
|
|
expect(sshable).to receive(:cmd).with("mkdir -p /home/ubi/postgres/metrics")
|
|
expect(sshable).to receive(:cmd).with("tee /home/ubi/postgres/metrics/config.json > /dev/null", stdin: metrics_config.to_json)
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/systemd/system/postgres-metrics.service > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/systemd/system/postgres-metrics.timer > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl daemon-reload")
|
|
|
|
expect(resource).to receive(:representative_server).and_return(instance_double(PostgresServer, id: "random-id"))
|
|
expect { nx.configure_metrics }.to hop("wait")
|
|
end
|
|
|
|
it "uses default interval if not specified in config" do
|
|
config_without_interval = {endpoints: ["https://localhost:9100/metrics"], metrics_dir: "/home/ubi/postgres/metrics"}
|
|
|
|
# Prometheus expectations
|
|
expect(sshable).to receive(:cmd).with("sudo -u prometheus tee /home/prometheus/web-config.yml > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo -u prometheus tee /home/prometheus/prometheus.yml > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl reload postgres_exporter || sudo systemctl restart postgres_exporter")
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl reload node_exporter || sudo systemctl restart node_exporter")
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl reload prometheus || sudo systemctl restart prometheus")
|
|
|
|
# Configure metrics expectations with default interval
|
|
expect(postgres_server).to receive(:metrics_config).and_return(config_without_interval)
|
|
expect(sshable).to receive(:cmd).with("mkdir -p /home/ubi/postgres/metrics")
|
|
expect(sshable).to receive(:cmd).with("tee /home/ubi/postgres/metrics/config.json > /dev/null", stdin: config_without_interval.to_json)
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/systemd/system/postgres-metrics.service > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo tee /etc/systemd/system/postgres-metrics.timer > /dev/null", stdin: /OnUnitActiveSec=15s/)
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl daemon-reload")
|
|
|
|
expect(resource).to receive(:representative_server).and_return(instance_double(PostgresServer, id: "random-id"))
|
|
expect { nx.configure_metrics }.to hop("wait")
|
|
end
|
|
end
|
|
|
|
describe "#setup_cloudwatch" do
|
|
it "hops to setup_hugepages after setting up cloudwatch" do
|
|
expect(sshable).to receive(:cmd).with("sudo mkdir -p /opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.d")
|
|
expect(sshable).to receive(:cmd).with("sudo tee /opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.d/001-ubicloud-config.json > /dev/null", stdin: anything)
|
|
expect(sshable).to receive(:cmd).with("sudo /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -c file:/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.d/001-ubicloud-config.json -s")
|
|
expect { nx.setup_cloudwatch }.to hop("setup_hugepages")
|
|
end
|
|
end
|
|
|
|
describe "#setup_hugepages" do
|
|
it "hops to configure if the setup succeeds" do
|
|
expect(sshable).to receive(:d_check).with("setup_hugepages").and_return("Succeeded")
|
|
expect(sshable).to receive(:d_clean).with("setup_hugepages")
|
|
expect { nx.setup_hugepages }.to hop("configure")
|
|
end
|
|
|
|
it "retries the setup if it fails" do
|
|
expect(sshable).to receive(:d_check).with("setup_hugepages").and_return("Failed")
|
|
expect(sshable).to receive(:d_run).with("setup_hugepages", "sudo", "postgres/bin/setup-hugepages")
|
|
expect { nx.setup_hugepages }.to nap(5)
|
|
end
|
|
|
|
it "starts the setup if it is not started" do
|
|
expect(sshable).to receive(:d_check).with("setup_hugepages").and_return("NotStarted")
|
|
expect(sshable).to receive(:d_run).with("setup_hugepages", "sudo", "postgres/bin/setup-hugepages")
|
|
expect { nx.setup_hugepages }.to nap(5)
|
|
end
|
|
|
|
it "naps for 5 seconds if the setup is unknown" do
|
|
expect(sshable).to receive(:d_check).with("setup_hugepages").and_return("Unknown")
|
|
expect { nx.setup_hugepages }.to nap(5)
|
|
end
|
|
end
|
|
|
|
describe "#configure" do
|
|
it "triggers configure if configure command is not sent yet or failed" do
|
|
expect(postgres_server).to receive(:configure_hash).and_return("dummy-configure-hash").twice
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer 'sudo postgres/bin/configure 16' configure_postgres", stdin: JSON.generate("dummy-configure-hash")).twice
|
|
|
|
# NotStarted
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check configure_postgres").and_return("NotStarted")
|
|
expect { nx.configure }.to nap(5)
|
|
|
|
# Failed
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check configure_postgres").and_return("Failed")
|
|
expect { nx.configure }.to nap(5)
|
|
end
|
|
|
|
it "hops to update_superuser_password if configure command is succeeded during the initial provisioning and if the server is primary" do
|
|
expect(nx).to receive(:when_initial_provisioning_set?).and_yield
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --clean configure_postgres").and_return("Succeeded")
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check configure_postgres").and_return("Succeeded")
|
|
expect(postgres_server).to receive(:primary?).and_return(true)
|
|
expect { nx.configure }.to hop("update_superuser_password")
|
|
end
|
|
|
|
it "hops to wait_catch_up if configure command is succeeded during the initial provisioning and if the server is standby" do
|
|
expect(nx).to receive(:when_initial_provisioning_set?).and_yield
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --clean configure_postgres").and_return("Succeeded")
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check configure_postgres").and_return("Succeeded")
|
|
expect(postgres_server).to receive(:primary?).and_return(false)
|
|
expect(postgres_server).to receive(:standby?).and_return(true)
|
|
expect { nx.configure }.to hop("wait_catch_up")
|
|
end
|
|
|
|
it "hops to wait_recovery_completion if configure command is succeeded during the initial provisioning and if the server is doing pitr" do
|
|
expect(nx).to receive(:when_initial_provisioning_set?).and_yield
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --clean configure_postgres").and_return("Succeeded")
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check configure_postgres").and_return("Succeeded")
|
|
expect(postgres_server).to receive(:primary?).and_return(false)
|
|
expect(postgres_server).to receive(:standby?).and_return(false)
|
|
expect { nx.configure }.to hop("wait_recovery_completion")
|
|
end
|
|
|
|
it "hops to wait for primaries if configure command is succeeded at times other than the initial provisioning" do
|
|
expect(nx).to receive(:when_initial_provisioning_set?)
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --clean configure_postgres").and_return("Succeeded")
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check configure_postgres").and_return("Succeeded")
|
|
expect(postgres_server).to receive(:standby?).and_return(false)
|
|
expect { nx.configure }.to hop("wait")
|
|
end
|
|
|
|
it "hops to wait_catchup for standbys if configure command is succeeded at times other than the initial provisioning" do
|
|
expect(nx).to receive(:when_initial_provisioning_set?)
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --clean configure_postgres").and_return("Succeeded")
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check configure_postgres").and_return("Succeeded")
|
|
expect(postgres_server).to receive(:standby?).and_return(true)
|
|
expect(postgres_server).to receive(:synchronization_status).and_return("catching_up")
|
|
expect { nx.configure }.to hop("wait_catch_up")
|
|
end
|
|
|
|
it "hops to wait for read replicas if configure command is succeeded" do
|
|
expect(nx).to receive(:when_initial_provisioning_set?).and_yield
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --clean configure_postgres").and_return("Succeeded")
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check configure_postgres").and_return("Succeeded")
|
|
expect(postgres_server).to receive(:primary?).and_return(false)
|
|
expect(postgres_server).to receive(:standby?).and_return(false)
|
|
expect(postgres_server).to receive(:read_replica?).and_return(true)
|
|
expect { nx.configure }.to hop("wait_catch_up")
|
|
end
|
|
|
|
it "naps if script return unknown status" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check configure_postgres").and_return("Unknown")
|
|
expect { nx.configure }.to nap(5)
|
|
end
|
|
end
|
|
|
|
describe "#update_superuser_password" do
|
|
it "updates password and pushes restart during the initial provisioning" do
|
|
expect(nx).to receive(:when_initial_provisioning_set?).and_yield
|
|
expect(postgres_server).to receive(:run_query).with(/log_statement = 'none'.*\n.*SCRAM-SHA-256/)
|
|
expect(nx).to receive(:push).with(described_class, {}, "restart").and_call_original
|
|
expect { nx.update_superuser_password }.to hop("restart")
|
|
end
|
|
|
|
it "updates password and hops to wait during initial provisioning if restart is already executed" do
|
|
expect(nx).to receive(:when_initial_provisioning_set?).and_yield
|
|
expect(postgres_server).to receive(:run_query).with(/log_statement = 'none'.*\n.*SCRAM-SHA-256/)
|
|
expect(nx.strand).to receive(:retval).and_return({"msg" => "postgres server is restarted"})
|
|
expect(postgres_server).to receive(:primary?).and_return(true)
|
|
expect(resource).to receive(:flavor).and_return(PostgresResource::Flavor::STANDARD)
|
|
expect { nx.update_superuser_password }.to hop("wait")
|
|
end
|
|
|
|
it "updates password and hops to run_post_installation_script during initial provisioning for non-standard flavors if restart is already executed" do
|
|
expect(nx).to receive(:when_initial_provisioning_set?).and_yield
|
|
expect(postgres_server).to receive(:run_query).with(/log_statement = 'none'.*\n.*SCRAM-SHA-256/)
|
|
expect(nx.strand).to receive(:retval).and_return({"msg" => "postgres server is restarted"})
|
|
expect(postgres_server).to receive(:primary?).and_return(true)
|
|
expect(resource).to receive(:flavor).and_return(PostgresResource::Flavor::PARADEDB)
|
|
expect { nx.update_superuser_password }.to hop("run_post_installation_script")
|
|
end
|
|
|
|
it "updates password and hops to wait at times other than the initial provisioning" do
|
|
expect(nx).to receive(:when_initial_provisioning_set?)
|
|
expect(postgres_server).to receive(:run_query).with(/log_statement = 'none'.*\n.*SCRAM-SHA-256/)
|
|
expect { nx.update_superuser_password }.to hop("wait")
|
|
end
|
|
end
|
|
|
|
describe "#run_post_installation_script" do
|
|
it "runs post installation script and hops wait" do
|
|
expect(sshable).to receive(:cmd).with(/post-installation-script/)
|
|
expect { nx.run_post_installation_script }.to hop("wait")
|
|
end
|
|
end
|
|
|
|
describe "#wait_catch_up" do
|
|
it "naps if the lag is too high" do
|
|
expect(postgres_server).to receive(:lsn_caught_up).and_return(false, false)
|
|
expect { nx.wait_catch_up }.to nap(30)
|
|
expect { nx.wait_catch_up }.to nap(30)
|
|
end
|
|
|
|
it "sets the synchronization_status and hops to wait_synchronization for sync replication" do
|
|
expect(postgres_server).to receive(:lsn_caught_up).and_return(true)
|
|
expect(postgres_server).to receive(:update).with(synchronization_status: "ready")
|
|
expect(postgres_server).to receive(:incr_configure)
|
|
expect(postgres_server.resource).to receive(:ha_type).and_return(PostgresResource::HaType::SYNC)
|
|
expect { nx.wait_catch_up }.to hop("wait_synchronization")
|
|
end
|
|
|
|
it "sets the synchronization_status and hops to wait for async replication" do
|
|
expect(postgres_server).to receive(:lsn_caught_up).and_return(true)
|
|
expect(postgres_server).to receive(:update).with(synchronization_status: "ready")
|
|
expect(postgres_server).to receive(:incr_configure)
|
|
expect(postgres_server.resource).to receive(:ha_type).and_return(PostgresResource::HaType::ASYNC)
|
|
expect { nx.wait_catch_up }.to hop("wait")
|
|
end
|
|
|
|
it "hops to wait if replica and caught up" do
|
|
expect(postgres_server).to receive(:read_replica?).and_return(true)
|
|
expect(postgres_server).to receive(:lsn_caught_up).and_return(true)
|
|
expect { nx.wait_catch_up }.to hop("wait")
|
|
end
|
|
end
|
|
|
|
describe "#wait_synchronization" do
|
|
it "hops to wait if sync replication is established" do
|
|
expect(postgres_server).to receive(:run_query).and_return("quorum", "sync")
|
|
expect { nx.wait_synchronization }.to hop("wait")
|
|
expect { nx.wait_synchronization }.to hop("wait")
|
|
end
|
|
|
|
it "naps if sync replication is not established" do
|
|
expect(postgres_server).to receive(:run_query).and_return("", "async")
|
|
expect { nx.wait_synchronization }.to nap(30)
|
|
expect { nx.wait_synchronization }.to nap(30)
|
|
end
|
|
end
|
|
|
|
describe "#wait_recovery_completion" do
|
|
it "naps if it is still in recovery and wal replay is not paused" do
|
|
expect(postgres_server).to receive(:run_query).with("SELECT pg_is_in_recovery()").and_return("t")
|
|
expect(postgres_server).to receive(:run_query).with("SELECT pg_get_wal_replay_pause_state()").and_return("not paused")
|
|
expect { nx.wait_recovery_completion }.to nap(5)
|
|
end
|
|
|
|
it "naps if it cannot connect to database due to recovery" do
|
|
expect(postgres_server).to receive(:run_query).with("SELECT pg_is_in_recovery()").and_raise(Sshable::SshError.new("", nil, "Consistent recovery state has not been yet reached.", nil, nil))
|
|
expect { nx.wait_recovery_completion }.to nap(5)
|
|
end
|
|
|
|
it "raises error if it cannot connect to database due a problem other than to continueing recovery" do
|
|
expect(postgres_server).to receive(:run_query).with("SELECT pg_is_in_recovery()").and_raise(Sshable::SshError.new("", nil, "Bogus", nil, nil))
|
|
expect { nx.wait_recovery_completion }.to raise_error(Sshable::SshError)
|
|
end
|
|
|
|
it "stops wal replay and switches to new timeline if it is still in recovery but wal replay is paused" do
|
|
expect(postgres_server).to receive(:run_query).with("SELECT pg_is_in_recovery()").and_return("t")
|
|
expect(postgres_server).to receive(:run_query).with("SELECT pg_get_wal_replay_pause_state()").and_return("paused")
|
|
expect(sshable).to receive(:cmd).with("sudo -u postgres tee /etc/postgresql/wal-g.env > /dev/null", stdin: "walg config")
|
|
expect(sshable).to receive(:cmd).with("sudo tee /usr/lib/ssl/certs/blob_storage_ca.crt > /dev/null", stdin: "certs")
|
|
|
|
expect(postgres_server).to receive(:run_query).with("SELECT pg_wal_replay_resume()")
|
|
expect(Prog::Postgres::PostgresTimelineNexus).to receive(:assemble).and_return(instance_double(Strand, id: "375b1399-ec21-8eda-8859-2faee6ff6613"))
|
|
expect(postgres_server).to receive(:timeline_id=).with("375b1399-ec21-8eda-8859-2faee6ff6613")
|
|
expect(postgres_server).to receive(:timeline_access=).with("push")
|
|
expect(postgres_server).to receive(:save_changes)
|
|
expect { nx.wait_recovery_completion }.to hop("configure")
|
|
end
|
|
|
|
it "switches to new timeline if the recovery is completed" do
|
|
expect(postgres_server).to receive(:run_query).with("SELECT pg_is_in_recovery()").and_return("f")
|
|
expect(sshable).to receive(:cmd).with("sudo -u postgres tee /etc/postgresql/wal-g.env > /dev/null", stdin: "walg config")
|
|
expect(sshable).to receive(:cmd).with("sudo tee /usr/lib/ssl/certs/blob_storage_ca.crt > /dev/null", stdin: "certs")
|
|
|
|
expect(Prog::Postgres::PostgresTimelineNexus).to receive(:assemble).and_return(instance_double(Strand, id: "375b1399-ec21-8eda-8859-2faee6ff6613"))
|
|
expect(postgres_server).to receive(:timeline_id=).with("375b1399-ec21-8eda-8859-2faee6ff6613")
|
|
expect(postgres_server).to receive(:timeline_access=).with("push")
|
|
expect(postgres_server).to receive(:save_changes)
|
|
expect { nx.wait_recovery_completion }.to hop("configure")
|
|
end
|
|
end
|
|
|
|
describe "#wait" do
|
|
it "naps" do
|
|
expect { nx.wait }.to nap(6 * 60 * 60)
|
|
end
|
|
|
|
it "hops to fence if fence is set" do
|
|
expect(nx).to receive(:when_fence_set?).and_yield
|
|
expect { nx.wait }.to hop("fence")
|
|
end
|
|
|
|
it "hops to prepare_for_unplanned_take_over if take_over is set" do
|
|
expect(nx).to receive(:when_unplanned_take_over_set?).and_yield
|
|
expect { nx.wait }.to hop("prepare_for_unplanned_take_over")
|
|
end
|
|
|
|
it "hops to prepare_for_planned_take_over if take_over is set" do
|
|
expect(nx).to receive(:when_planned_take_over_set?).and_yield
|
|
expect { nx.wait }.to hop("prepare_for_planned_take_over")
|
|
end
|
|
|
|
it "hops to refresh_certificates if refresh_certificates is set" do
|
|
expect(nx).to receive(:when_refresh_certificates_set?).and_yield
|
|
expect { nx.wait }.to hop("refresh_certificates")
|
|
end
|
|
|
|
it "hops to update_superuser_password if update_superuser_password is set" do
|
|
expect(nx).to receive(:when_update_superuser_password_set?).and_yield
|
|
expect { nx.wait }.to hop("update_superuser_password")
|
|
end
|
|
|
|
it "hops to unavailable if checkup is set and the server is not available" do
|
|
expect(nx).to receive(:when_checkup_set?).and_yield
|
|
expect(nx).to receive(:available?).and_return(false)
|
|
expect { nx.wait }.to hop("unavailable")
|
|
end
|
|
|
|
it "naps if checkup is set but the server is available" do
|
|
expect(nx).to receive(:when_checkup_set?).and_yield
|
|
expect(nx).to receive(:available?).and_return(true)
|
|
expect { nx.wait }.to nap(6 * 60 * 60)
|
|
end
|
|
|
|
it "hops to configure_metrics if configure_metrics is set" do
|
|
expect(nx).to receive(:when_configure_metrics_set?).and_yield
|
|
expect { nx.wait }.to hop("configure_metrics")
|
|
end
|
|
|
|
it "hops to configure if configure is set" do
|
|
expect(nx).to receive(:when_configure_set?).and_yield
|
|
expect { nx.wait }.to hop("configure")
|
|
end
|
|
|
|
it "decrements and calls refresh_walg_credentials if refresh_walg_credentials is set" do
|
|
expect(nx).to receive(:when_refresh_walg_credentials_set?).and_yield
|
|
expect(nx).to receive(:decr_refresh_walg_credentials)
|
|
expect(nx).to receive(:refresh_walg_credentials)
|
|
expect { nx.wait }.to nap(6 * 60 * 60)
|
|
end
|
|
|
|
it "pushes restart if restart is set" do
|
|
expect(nx).to receive(:when_restart_set?).and_yield
|
|
expect(nx).to receive(:push).with(described_class, {}, "restart").and_call_original
|
|
expect { nx.wait }.to hop("restart")
|
|
end
|
|
|
|
it "promotes" do
|
|
expect(nx).to receive(:when_promote_set?).and_yield
|
|
expect(nx).to receive(:switch_to_new_timeline)
|
|
expect { nx.wait }.to hop("taking_over")
|
|
end
|
|
|
|
describe "read replica" do
|
|
before do
|
|
expect(postgres_server).to receive(:read_replica?).and_return(true)
|
|
expect(postgres_server.resource).to receive(:parent).and_return(true)
|
|
end
|
|
|
|
it "checks if it was already lagging and the lag continues, if so, starts recycling" do
|
|
expect(postgres_server).to receive(:lsn_caught_up).and_return(false)
|
|
expect(postgres_server).to receive(:current_lsn).and_return("1/A")
|
|
|
|
expect(nx.strand).to receive(:stack).and_return([{"lsn" => "1/A"}]).at_least(:once)
|
|
expect(postgres_server).to receive(:lsn_diff).with("1/A", "1/A").and_return(0)
|
|
expect(postgres_server).to receive(:recycle_set?).and_return(false)
|
|
expect(postgres_server).to receive(:incr_recycle)
|
|
expect { nx.wait }.to nap(60)
|
|
end
|
|
|
|
it "does not increment recycle if it is incremented already" do
|
|
expect(postgres_server).to receive(:lsn_caught_up).and_return(false)
|
|
expect(postgres_server).to receive(:current_lsn).and_return("1/A")
|
|
|
|
expect(nx.strand).to receive(:stack).and_return([{"lsn" => "1/A"}]).at_least(:once)
|
|
expect(postgres_server).to receive(:lsn_diff).with("1/A", "1/A").and_return(0)
|
|
expect(postgres_server).to receive(:recycle_set?).and_return(true)
|
|
expect(postgres_server).not_to receive(:incr_recycle)
|
|
expect { nx.wait }.to nap(60)
|
|
end
|
|
|
|
it "checks if it wasn't already lagging but the lag exists, if so, update the stack and nap" do
|
|
expect(postgres_server).to receive(:lsn_caught_up).and_return(false)
|
|
expect(postgres_server).to receive(:current_lsn).and_return("1/A")
|
|
|
|
expect(nx.strand).to receive(:stack).and_return([{}]).at_least(:once)
|
|
expect(nx).to receive(:update_stack_lsn).with("1/A")
|
|
expect { nx.wait }.to nap(900)
|
|
end
|
|
|
|
it "checks if there is no lag, simply naps" do
|
|
expect(postgres_server).to receive(:lsn_caught_up).and_return(true)
|
|
expect { nx.wait }.to nap(60)
|
|
end
|
|
|
|
it "checks if there was a lag, and it still exist but we are progressing, so, we update the stack and nap" do
|
|
expect(postgres_server).to receive(:lsn_caught_up).and_return(false)
|
|
expect(postgres_server).to receive(:current_lsn).and_return("1/A")
|
|
|
|
expect(nx.strand).to receive(:stack).and_return([{"lsn" => "1/9"}]).at_least(:once)
|
|
expect(postgres_server).to receive(:lsn_diff).with("1/A", "1/9").and_return(1)
|
|
expect(nx).to receive(:decr_recycle)
|
|
expect(nx).to receive(:update_stack_lsn).with("1/A")
|
|
expect { nx.wait }.to nap(900)
|
|
end
|
|
end
|
|
end
|
|
|
|
describe "#unavailable" do
|
|
it "hops to wait if the server is available" do
|
|
postgres_resource = instance_double(PostgresResource, ongoing_failover?: false)
|
|
expect(postgres_server).to receive(:resource).and_return(postgres_resource)
|
|
expect(postgres_server).to receive(:trigger_failover).and_return(false)
|
|
expect(nx).to receive(:available?).and_return(true)
|
|
expect { nx.unavailable }.to hop("wait")
|
|
end
|
|
|
|
it "buds restart if the server is not available" do
|
|
postgres_resource = instance_double(PostgresResource, ongoing_failover?: false)
|
|
expect(postgres_server).to receive(:resource).and_return(postgres_resource)
|
|
expect(postgres_server).to receive(:trigger_failover).and_return(false)
|
|
expect(nx).to receive(:available?).and_return(false)
|
|
expect(nx).to receive(:bud).with(described_class, {}, :restart)
|
|
expect { nx.unavailable }.to nap(5)
|
|
end
|
|
|
|
it "does not bud restart if there is already one restart going on" do
|
|
Strand.create(parent_id: st.id, prog: "Postgres::PostgresServerNexus", label: "restart", stack: [{}], lease: Time.now + 10)
|
|
postgres_resource = instance_double(PostgresResource, ongoing_failover?: false)
|
|
expect(postgres_server).to receive(:resource).and_return(postgres_resource)
|
|
expect(postgres_server).to receive(:trigger_failover).and_return(false)
|
|
expect { nx.unavailable }.to nap(5)
|
|
expect(Strand.where(prog: "Postgres::PostgresServerNexus", label: "restart").count).to eq 1
|
|
end
|
|
|
|
it "trigger_failover succeeds, naps 0" do
|
|
postgres_resource = instance_double(PostgresResource, ongoing_failover?: false)
|
|
expect(postgres_server).to receive(:resource).and_return(postgres_resource)
|
|
expect(postgres_server).to receive(:trigger_failover).and_return(true)
|
|
expect { nx.unavailable }.to nap(0)
|
|
end
|
|
end
|
|
|
|
describe "#fence" do
|
|
it "runs checkpoints and perform lockout" do
|
|
expect(nx).to receive(:decr_fence)
|
|
expect(postgres_server).to receive(:run_query).with("CHECKPOINT; CHECKPOINT; CHECKPOINT;")
|
|
expect(sshable).to receive(:cmd).with("sudo postgres/bin/lockout 16")
|
|
expect(sshable).to receive(:cmd).with("sudo pg_ctlcluster 16 main stop -m smart")
|
|
expect { nx.fence }.to nap(6 * 60 * 60)
|
|
end
|
|
end
|
|
|
|
describe "#prepare_for_unplanned_take_over" do
|
|
it "stops postgres in representative server and destroys it" do
|
|
representative_server = instance_double(PostgresServer)
|
|
expect(postgres_server.resource).to receive(:representative_server).and_return(representative_server)
|
|
expect(nx).to receive(:decr_unplanned_take_over)
|
|
expect(representative_server).to receive(:vm).and_return(instance_double(Vm, sshable: sshable))
|
|
expect(sshable).to receive(:cmd).with("sudo pg_ctlcluster 16 main stop -m immediate")
|
|
expect(representative_server).to receive(:incr_destroy)
|
|
|
|
expect { nx.prepare_for_unplanned_take_over }.to hop("taking_over")
|
|
end
|
|
|
|
it "handles SSH connection errors gracefully and continues with destroy" do
|
|
representative_server = instance_double(PostgresServer)
|
|
expect(postgres_server.resource).to receive(:representative_server).and_return(representative_server)
|
|
expect(nx).to receive(:decr_unplanned_take_over)
|
|
expect(representative_server).to receive(:vm).and_return(instance_double(Vm, sshable: sshable))
|
|
expect(sshable).to receive(:cmd).with("sudo pg_ctlcluster 16 main stop -m immediate").and_raise(Sshable::SshError.new("", "", "", "", ""))
|
|
expect(representative_server).to receive(:incr_destroy)
|
|
|
|
expect { nx.prepare_for_unplanned_take_over }.to hop("taking_over")
|
|
end
|
|
end
|
|
|
|
describe "#prepare_for_planned_take_over" do
|
|
it "starts fencing on representative server" do
|
|
expect(nx).to receive(:decr_planned_take_over)
|
|
representative_server = instance_double(PostgresServer)
|
|
expect(postgres_server.resource).to receive(:representative_server).and_return(representative_server)
|
|
expect(representative_server).to receive(:incr_fence)
|
|
expect { nx.prepare_for_planned_take_over }.to hop("wait_fencing_of_old_primary")
|
|
end
|
|
end
|
|
|
|
describe "#wait_fencing_of_old_primary" do
|
|
it "naps immediately if fence is set" do
|
|
representative_server = instance_double(PostgresServer)
|
|
expect(postgres_server.resource).to receive(:representative_server).and_return(representative_server)
|
|
expect(representative_server).to receive(:fence_set?).and_return(true)
|
|
expect { nx.wait_fencing_of_old_primary }.to nap(0)
|
|
end
|
|
|
|
it "destroys old primary and hops to taking_over when fence is not set" do
|
|
representative_server = instance_double(PostgresServer)
|
|
expect(postgres_server.resource).to receive(:representative_server).and_return(representative_server).at_least(:once)
|
|
expect(representative_server).to receive(:fence_set?).and_return(false)
|
|
expect(representative_server).to receive(:incr_destroy)
|
|
expect { nx.wait_fencing_of_old_primary }.to hop("taking_over")
|
|
end
|
|
end
|
|
|
|
describe "#taking_over" do
|
|
it "triggers promote if promote command is not sent yet or failed" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer 'sudo pg_ctlcluster 16 main promote' promote_postgres").twice
|
|
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check promote_postgres").and_return("NotStarted", "Failed")
|
|
expect { nx.taking_over }.to nap(0)
|
|
expect { nx.taking_over }.to nap(0)
|
|
end
|
|
|
|
it "updates the metadata and hops to configure if promote command is succeeded" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check promote_postgres").and_return("Succeeded")
|
|
|
|
expect(postgres_server).to receive(:update).with(timeline_access: "push", representative_at: anything, synchronization_status: "ready")
|
|
expect(postgres_server.resource).to receive(:incr_refresh_dns_record)
|
|
expect(postgres_server).to receive(:primary?).and_return(true)
|
|
expect(postgres_server).to receive(:incr_configure)
|
|
expect(postgres_server).to receive(:incr_configure_metrics)
|
|
expect(postgres_server).to receive(:incr_restart)
|
|
|
|
standby = instance_double(PostgresServer, primary?: false)
|
|
expect(standby).to receive(:update).with(synchronization_status: "catching_up")
|
|
expect(standby).to receive(:incr_configure)
|
|
expect(standby).to receive(:incr_configure_metrics)
|
|
expect(standby).to receive(:incr_restart)
|
|
|
|
expect(postgres_server.resource).to receive(:servers).at_least(:once).and_return([postgres_server, standby])
|
|
|
|
expect { nx.taking_over }.to hop("configure")
|
|
end
|
|
|
|
it "naps if script return unknown status" do
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check promote_postgres").and_return("Unknown")
|
|
expect { nx.taking_over }.to nap(5)
|
|
end
|
|
|
|
describe "read_replica" do
|
|
it "updates the representative server, refreshes dns and destroys the old representative_server and hops to configure when read_replica" do
|
|
time = Time.now
|
|
expect(postgres_server).to receive(:read_replica?).and_return(true)
|
|
expect(Time).to receive(:now).and_return(time)
|
|
expect(postgres_server).to receive(:update).with(representative_at: time)
|
|
expect(postgres_server.resource).to receive(:incr_refresh_dns_record)
|
|
expect(postgres_server.resource).to receive(:servers).at_least(:once).and_return([postgres_server])
|
|
expect(postgres_server).to receive(:incr_configure_metrics)
|
|
expect { nx.taking_over }.to hop("configure")
|
|
end
|
|
end
|
|
end
|
|
|
|
describe "#destroy" do
|
|
it "deletes resources and exits" do
|
|
expect(postgres_server.vm).to receive(:incr_destroy)
|
|
expect(postgres_server).to receive(:destroy)
|
|
|
|
expect { nx.destroy }.to exit({"msg" => "postgres server is deleted"})
|
|
end
|
|
end
|
|
|
|
describe "#restart" do
|
|
it "restarts and exits" do
|
|
expect(sshable).to receive(:cmd).with("sudo postgres/bin/restart 16")
|
|
expect(sshable).to receive(:cmd).with("sudo systemctl restart pgbouncer@*.service")
|
|
expect { nx.restart }.to exit({"msg" => "postgres server is restarted"})
|
|
end
|
|
end
|
|
|
|
describe "#refresh_walg_credentials" do
|
|
it "returns nil if blob storage is not configures" do
|
|
expect(postgres_server.timeline).to receive(:blob_storage).and_return(nil)
|
|
expect(nx.refresh_walg_credentials).to be_nil
|
|
end
|
|
end
|
|
|
|
describe "#available?" do
|
|
before do
|
|
expect(sshable).to receive(:invalidate_cache_entry)
|
|
end
|
|
|
|
it "returns true if health check is successful" do
|
|
expect(postgres_server).to receive(:run_query).with("SELECT 1").and_return("1")
|
|
expect(nx.available?).to be(true)
|
|
end
|
|
|
|
it "returns true if the database is in crash recovery" do
|
|
expect(postgres_server).to receive(:run_query).with("SELECT 1").and_raise(Sshable::SshError)
|
|
expect(sshable).to receive(:cmd).with("sudo tail -n 5 /dat/16/data/pg_log/postgresql.log").and_return("redo in progress")
|
|
expect(nx.available?).to be(true)
|
|
end
|
|
|
|
it "returns false otherwise" do
|
|
expect(postgres_server).to receive(:run_query).with("SELECT 1").and_raise(Sshable::SshError)
|
|
expect(sshable).to receive(:cmd).with("sudo tail -n 5 /dat/16/data/pg_log/postgresql.log").and_return("not doing redo")
|
|
expect(nx.available?).to be(false)
|
|
end
|
|
end
|
|
|
|
describe ".update_stack_lsn" do
|
|
it "updates the lsn in the current frame" do
|
|
frame = [{"lsn" => "hello"}]
|
|
nx.strand.stack = frame
|
|
expect(nx.strand).to receive(:modified!)
|
|
nx.update_stack_lsn("update")
|
|
expect(frame.first["lsn"]).to eq("update")
|
|
end
|
|
end
|
|
end
|