mirror of
https://github.com/ubicloud/ubicloud.git
synced 2025-10-06 14:51:57 +08:00
The Converge prog is now also responsible for matching the current Postgres version to the desired version. If there is a mismatch (current < desired), the Converge prog is launched. Roughly, the Converge prog does the following: 1. Provisions new servers. In case of upgrades, it only provisions upto one new standby if no existing standby is suitable for upgrades. 2. Wait for the required servers to be ready. 3. Wait for the maintenance window to start. 4. Fence the primary server, and launch pg_upgrade. 5. If the upgrade is successful, replace the current primary with the candidate standby. In case the upgrade fails, we delete the candidate standby and unfence the primary to bring the database back. During the Upgrade health checking is effectively disabled as the auto-recovery causes conflicts with the several restarts of various versions on the candidate.
269 lines
14 KiB
Ruby
269 lines
14 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require_relative "../../model/spec_helper"
|
|
|
|
RSpec.describe Prog::Postgres::PostgresTimelineNexus do
|
|
subject(:nx) { described_class.new(Strand.new(id: "8148ebdf-66b8-8ed0-9c2f-8cfe93f5aa77")) }
|
|
|
|
let(:postgres_timeline) {
|
|
instance_double(
|
|
PostgresTimeline,
|
|
id: "b253669e-1cf5-8ada-9337-5fc319690838",
|
|
ubid: "ptp99pd7gwyp4jcvnzgrsd443g",
|
|
blob_storage: instance_double(MinioCluster, url: "https://blob-endpoint", root_certs: "certs"),
|
|
blob_storage_endpoint: "https://blob-endpoint",
|
|
blob_storage_client: instance_double(Minio::Client),
|
|
access_key: "dummy-access-key",
|
|
secret_key: "dummy-secret-key",
|
|
blob_storage_policy: {"Version" => "2012-10-17", "Statement" => [{"Action" => ["s3:GetBucketLocation"], "Effect" => "Allow", "Principal" => {"AWS" => ["*"]}, "Resource" => ["arn:aws:s3:::test"], "Sid" => ""}]},
|
|
aws?: false
|
|
)
|
|
}
|
|
|
|
before do
|
|
allow(nx).to receive(:postgres_timeline).and_return(postgres_timeline)
|
|
end
|
|
|
|
describe ".assemble" do
|
|
it "throws an exception if parent is not found" do
|
|
expect {
|
|
described_class.assemble(location_id: Location::HETZNER_FSN1_ID, parent_id: "69c0f4cd-99c1-8ed0-acfe-7b013ce2fa0b")
|
|
}.to raise_error RuntimeError, "No existing parent"
|
|
end
|
|
|
|
it "throws an exception if location is not found" do
|
|
expect {
|
|
described_class.assemble(location_id: nil)
|
|
}.to raise_error RuntimeError, "No existing location"
|
|
end
|
|
|
|
it "creates postgres timeline" do
|
|
st = described_class.assemble(location_id: Location::HETZNER_FSN1_ID)
|
|
|
|
postgres_timeline = PostgresTimeline[st.id]
|
|
expect(postgres_timeline).not_to be_nil
|
|
end
|
|
|
|
it "creates postgres timeline with blob storage when it exists" do
|
|
project = Project.create(name: "mc-project")
|
|
expect(Config).to receive(:minio_service_project_id).and_return(project.id).at_least(:once)
|
|
expect(Config).to receive(:postgres_service_project_id).and_return(project.id)
|
|
mc = Prog::Minio::MinioClusterNexus.assemble(project.id, "minio", Location::HETZNER_FSN1_ID, "minio-admin", 100, 1, 1, 1, "standard-2").subject
|
|
|
|
st = described_class.assemble(location_id: Location::HETZNER_FSN1_ID)
|
|
|
|
postgres_timeline = PostgresTimeline[st.id]
|
|
expect(postgres_timeline.blob_storage_id).to eq(mc.id)
|
|
end
|
|
end
|
|
|
|
describe "#before_run" do
|
|
it "hops to destroy when needed" do
|
|
expect(nx).to receive(:when_destroy_set?).and_yield
|
|
expect { nx.before_run }.to hop("destroy")
|
|
end
|
|
|
|
it "does not hop to destroy if already in the destroy state" do
|
|
expect(nx).to receive(:when_destroy_set?).and_yield
|
|
expect(nx.strand).to receive(:label).and_return("destroy")
|
|
expect { nx.before_run }.not_to hop("destroy")
|
|
end
|
|
end
|
|
|
|
describe "#start" do
|
|
let(:admin_blob_storage_client) { instance_double(Minio::Client) }
|
|
|
|
describe "when blob storage is minio" do
|
|
it "creates user and policies and hops" do
|
|
expect(postgres_timeline).to receive(:blob_storage).and_return(instance_double(MinioCluster, url: "https://blob-endpoint", root_certs: "certs", admin_user: "admin", admin_password: "secret")).at_least(:once)
|
|
expect(Minio::Client).to receive(:new).with(endpoint: "https://blob-endpoint", access_key: "admin", secret_key: "secret", ssl_ca_data: "certs").and_return(admin_blob_storage_client)
|
|
expect(admin_blob_storage_client).to receive(:admin_add_user).with(postgres_timeline.access_key, postgres_timeline.secret_key).and_return(200)
|
|
expect(admin_blob_storage_client).to receive(:admin_policy_add).with(postgres_timeline.ubid, postgres_timeline.blob_storage_policy).and_return(200)
|
|
expect(admin_blob_storage_client).to receive(:admin_policy_set).with(postgres_timeline.ubid, postgres_timeline.access_key).and_return(200)
|
|
expect { nx.start }.to hop("setup_bucket")
|
|
end
|
|
end
|
|
|
|
describe "when blob storage is aws s3" do
|
|
it "creates user and policies and hops" do
|
|
expect(postgres_timeline).to receive(:aws?).and_return(true)
|
|
expect(postgres_timeline).to receive(:location).and_return(instance_double(Location, name: "us-west-2", location_credential: instance_double(LocationCredential, access_key: "access-key", secret_key: "secret-key"))).at_least(:once)
|
|
client = Aws::IAM::Client.new(stub_responses: true)
|
|
expect(Aws::IAM::Client).to receive(:new).and_return(client)
|
|
client.stub_responses(:create_user)
|
|
client.stub_responses(:create_policy)
|
|
client.stub_responses(:attach_user_policy)
|
|
client.stub_responses(:create_access_key, access_key: {access_key_id: "access-key", secret_access_key: "secret-key", user_name: "username", status: "Active"})
|
|
expect(postgres_timeline).to receive(:update).with(access_key: "access-key", secret_key: "secret-key").and_return(postgres_timeline)
|
|
expect(postgres_timeline).to receive(:leader).and_return(instance_double(PostgresServer, strand: instance_double(Strand, label: "wait"))).at_least(:once)
|
|
expect(postgres_timeline.leader).to receive(:incr_refresh_walg_credentials)
|
|
expect { nx.start }.to hop("setup_bucket")
|
|
end
|
|
end
|
|
|
|
it "hops without creating bucket if blob storage is not configured" do
|
|
expect(postgres_timeline).to receive(:blob_storage).and_return(nil)
|
|
expect(nx).not_to receive(:setup_blob_storage)
|
|
expect { nx.start }.to hop("wait_leader")
|
|
end
|
|
end
|
|
|
|
describe "#setup_bucket" do
|
|
it "hops to wait_leader if bucket is created" do
|
|
expect(postgres_timeline).to receive(:create_bucket).and_return(true)
|
|
expect(postgres_timeline).to receive(:set_lifecycle_policy).and_return(true)
|
|
expect { nx.setup_bucket }.to hop("wait_leader")
|
|
end
|
|
|
|
it "naps if aws and the key is not available" do
|
|
expect(postgres_timeline).to receive(:aws?).and_return(true)
|
|
expect(postgres_timeline).to receive(:location).and_return(instance_double(Location, name: "us-west-2", location_credential: instance_double(LocationCredential, access_key: "access-key", secret_key: "secret-key"))).at_least(:once)
|
|
iam_client = Aws::IAM::Client.new(stub_responses: true)
|
|
expect(Aws::IAM::Client).to receive(:new).and_return(iam_client)
|
|
iam_client.stub_responses(:list_access_keys, access_key_metadata: [{access_key_id: "access-key"}])
|
|
expect(postgres_timeline).to receive(:access_key).and_return("not-access-key")
|
|
expect { nx.setup_bucket }.to nap(1)
|
|
end
|
|
|
|
it "hops to wait_leader if aws and the key is available" do
|
|
expect(postgres_timeline).to receive(:aws?).and_return(true)
|
|
expect(nx).to receive(:aws_access_key_is_available?).and_return(true)
|
|
expect(postgres_timeline).to receive(:create_bucket).and_return(true)
|
|
expect(postgres_timeline).to receive(:set_lifecycle_policy).and_return(true)
|
|
expect { nx.setup_bucket }.to hop("wait_leader")
|
|
end
|
|
end
|
|
|
|
describe "#wait_leader" do
|
|
it "naps if leader not ready" do
|
|
expect(postgres_timeline).to receive(:leader).and_return(instance_double(PostgresServer, strand: instance_double(Strand, label: "start"))).twice
|
|
expect { nx.wait_leader }.to nap(5)
|
|
end
|
|
|
|
it "hops if leader is ready" do
|
|
expect(postgres_timeline).to receive(:leader).and_return(instance_double(PostgresServer, strand: instance_double(Strand, label: "wait"))).twice
|
|
expect { nx.wait_leader }.to hop("wait")
|
|
end
|
|
end
|
|
|
|
describe "#wait" do
|
|
it "naps if blob storage is not configured" do
|
|
expect(postgres_timeline).to receive(:leader).and_return("something")
|
|
expect(postgres_timeline).to receive(:backups).and_return([])
|
|
expect(postgres_timeline).to receive(:blob_storage).and_return(nil)
|
|
expect { nx.wait }.to nap(20 * 60)
|
|
end
|
|
|
|
it "self-destructs if there's no leader, no backups and the timeline is old enough" do
|
|
expect(postgres_timeline).to receive(:leader).and_return(nil)
|
|
expect(postgres_timeline).to receive(:created_at).and_return(Time.now - 11 * 24 * 60 * 60)
|
|
expect(Clog).to receive(:emit).with(/Self-destructing timeline/)
|
|
expect { nx.wait }.to hop("destroy")
|
|
end
|
|
|
|
it "avoids API calls backups if there is no leader" do
|
|
expect(postgres_timeline).to receive(:leader).and_return(nil)
|
|
expect(postgres_timeline).to receive(:created_at).and_return(Time.now - 6 * 24 * 60 * 60).twice
|
|
expect(postgres_timeline).not_to receive(:backups)
|
|
expect(postgres_timeline).to receive(:need_backup?).and_return(false)
|
|
|
|
expect { nx.wait }.to nap(20 * 60)
|
|
end
|
|
|
|
it "hops to take_backup if backup is needed" do
|
|
expect(postgres_timeline).to receive(:need_backup?).and_return(true)
|
|
backup = Struct.new(:last_modified)
|
|
expect(postgres_timeline).to receive(:backups).and_return([instance_double(backup, last_modified: Time.now - 3 * 24 * 60 * 60)])
|
|
expect(postgres_timeline).to receive(:leader).and_return(instance_double(PostgresServer))
|
|
expect { nx.wait }.to hop("take_backup")
|
|
end
|
|
|
|
it "creates a missing backup page if last completed backup is older than 2 days" do
|
|
expect(postgres_timeline).to receive(:need_backup?).and_return(false)
|
|
backup = Struct.new(:last_modified)
|
|
expect(postgres_timeline).to receive(:backups).and_return([instance_double(backup, last_modified: Time.now - 3 * 24 * 60 * 60)])
|
|
expect(postgres_timeline).to receive(:leader).and_return(instance_double(PostgresServer))
|
|
expect { nx.wait }.to nap(20 * 60)
|
|
expect(Page.active.count).to eq(1)
|
|
end
|
|
|
|
it "resolves the missing page if last completed backup is more recent than 2 days" do
|
|
expect(postgres_timeline).to receive(:need_backup?).and_return(false)
|
|
backup = Struct.new(:last_modified)
|
|
expect(postgres_timeline).to receive(:backups).and_return([instance_double(backup, last_modified: Time.now - 1 * 24 * 60 * 60)])
|
|
expect(postgres_timeline).to receive(:leader).and_return(instance_double(PostgresServer))
|
|
page = instance_double(Page)
|
|
expect(page).to receive(:incr_resolve)
|
|
expect(Page).to receive(:from_tag_parts).and_return(page)
|
|
|
|
expect { nx.wait }.to nap(20 * 60)
|
|
end
|
|
|
|
it "naps if there is nothing to do" do
|
|
expect(postgres_timeline).to receive(:need_backup?).and_return(false)
|
|
backup = Struct.new(:last_modified)
|
|
expect(postgres_timeline).to receive(:backups).and_return([instance_double(backup, last_modified: Time.now - 1 * 24 * 60 * 60)])
|
|
expect(postgres_timeline).to receive(:leader).and_return(instance_double(PostgresServer))
|
|
|
|
expect { nx.wait }.to nap(20 * 60)
|
|
end
|
|
end
|
|
|
|
describe "#take_backup" do
|
|
it "hops to wait if backup is not needed" do
|
|
expect(postgres_timeline).to receive(:need_backup?).and_return(false)
|
|
expect { nx.take_backup }.to hop("wait")
|
|
end
|
|
|
|
it "takes backup if it is needed" do
|
|
expect(postgres_timeline).to receive(:need_backup?).and_return(true)
|
|
sshable = instance_double(Sshable)
|
|
expect(sshable).to receive(:cmd).with("common/bin/daemonizer 'sudo postgres/bin/take-backup 16' take_postgres_backup")
|
|
expect(postgres_timeline).to receive(:leader).and_return(instance_double(PostgresServer, resource: instance_double(PostgresResource, version: "16"), vm: instance_double(Vm, sshable: sshable))).at_least(:once)
|
|
expect(postgres_timeline).to receive(:latest_backup_started_at=)
|
|
expect(postgres_timeline).to receive(:save_changes)
|
|
expect { nx.take_backup }.to hop("wait")
|
|
end
|
|
end
|
|
|
|
describe "#destroy" do
|
|
let(:admin_blob_storage_client) { instance_double(Minio::Client) }
|
|
|
|
it "completes destroy even if dns zone and blob_storage are not configured" do
|
|
expect(postgres_timeline).to receive(:blob_storage).and_return(nil)
|
|
expect(postgres_timeline).to receive(:destroy)
|
|
expect { nx.destroy }.to exit({"msg" => "postgres timeline is deleted"})
|
|
end
|
|
|
|
describe "when blob storage is minio" do
|
|
it "destroys blob storage and postgres timeline" do
|
|
expect(postgres_timeline).to receive(:blob_storage).and_return(instance_double(MinioCluster, url: "https://blob-endpoint", root_certs: "certs", admin_user: "admin", admin_password: "secret")).at_least(:once)
|
|
expect(postgres_timeline).to receive(:destroy)
|
|
|
|
expect(Minio::Client).to receive(:new).with(endpoint: postgres_timeline.blob_storage_endpoint, access_key: "admin", secret_key: "secret", ssl_ca_data: "certs").and_return(admin_blob_storage_client)
|
|
expect(admin_blob_storage_client).to receive(:admin_remove_user).with(postgres_timeline.access_key).and_return(200)
|
|
expect(admin_blob_storage_client).to receive(:admin_policy_remove).with(postgres_timeline.ubid).and_return(200)
|
|
expect { nx.destroy }.to exit({"msg" => "postgres timeline is deleted"})
|
|
end
|
|
end
|
|
|
|
describe "when blob storage is aws s3" do
|
|
before do
|
|
expect(postgres_timeline).to receive(:aws?).and_return(true)
|
|
expect(postgres_timeline).to receive(:location).and_return(instance_double(Location, name: "us-west-2", location_credential: instance_double(LocationCredential, access_key: "access-key", secret_key: "secret-key"))).at_least(:once)
|
|
end
|
|
|
|
it "destroys blob storage and postgres timeline" do
|
|
client = Aws::IAM::Client.new(stub_responses: true)
|
|
expect(Aws::IAM::Client).to receive(:new).and_return(client)
|
|
client.stub_responses(:delete_user)
|
|
client.stub_responses(:list_attached_user_policies, attached_policies: [{policy_arn: "arn:aws:iam::aws:policy/AmazonS3FullAccess"}])
|
|
client.stub_responses(:delete_policy)
|
|
client.stub_responses(:list_access_keys, access_key_metadata: [{access_key_id: "access-key"}])
|
|
client.stub_responses(:delete_access_key)
|
|
expect(postgres_timeline).to receive(:destroy)
|
|
expect { nx.destroy }.to exit({"msg" => "postgres timeline is deleted"})
|
|
end
|
|
end
|
|
end
|
|
end
|