Files
ubicloud/prog/postgres/postgres_timeline_nexus.rb
Furkan Sahin 063d8c24d7 Add safeguards so that the API calls to AWS do not fail
We used to not perform bunch of checks to fail until they succeed.
That's not an issue since we know they would go through eventually,
however, it's not a good behaviour and dirties the logs. Therefore, we
are adding bunch of safeguards to wait before performing certain
operations to wait.
2025-07-04 13:24:22 +02:00

160 lines
5.6 KiB
Ruby

# frozen_string_literal: true
require "forwardable"
require "aws-sdk-iam"
class Prog::Postgres::PostgresTimelineNexus < Prog::Base
subject_is :postgres_timeline
extend Forwardable
def_delegators :postgres_timeline, :blob_storage_client
def self.assemble(location_id:, parent_id: nil)
if parent_id && PostgresTimeline[parent_id].nil?
fail "No existing parent"
end
unless (location = Location[location_id])
fail "No existing location"
end
DB.transaction do
postgres_timeline = PostgresTimeline.create_with_id(
parent_id: parent_id,
access_key: SecureRandom.hex(16),
secret_key: SecureRandom.hex(32),
blob_storage_id: MinioCluster.first(project_id: Config.postgres_service_project_id, location_id: location.id)&.id,
location_id: location.id
)
Strand.create(prog: "Postgres::PostgresTimelineNexus", label: "start") { it.id = postgres_timeline.id }
end
end
def before_run
when_destroy_set? do
if strand.label != "destroy"
hop_destroy
end
end
end
label def start
if postgres_timeline.blob_storage
setup_blob_storage
hop_setup_bucket
end
hop_wait_leader
end
label def setup_bucket
nap 1 if postgres_timeline.aws? && !aws_access_key_is_available?
# Create bucket for the timeline
postgres_timeline.create_bucket
postgres_timeline.set_lifecycle_policy
hop_wait_leader
end
label def wait_leader
hop_destroy if postgres_timeline.leader.nil?
nap 5 if postgres_timeline.leader.strand.label != "wait"
hop_wait
end
label def wait
nap 20 * 60 if postgres_timeline.blob_storage.nil?
# For the purpose of missing backup pages, we act like the very first backup
# is taken at the creation, which ensures that we would get a page if and only
# if no backup is taken for 2 days.
latest_backup_completed_at = postgres_timeline.backups.map(&:last_modified).max || postgres_timeline.created_at
if postgres_timeline.leader && latest_backup_completed_at < Time.now - 2 * 24 * 60 * 60 # 2 days
Prog::PageNexus.assemble("Missing backup at #{postgres_timeline}!", ["MissingBackup", postgres_timeline.id], postgres_timeline.ubid)
else
Page.from_tag_parts("MissingBackup", postgres_timeline.id)&.incr_resolve
end
if postgres_timeline.need_backup?
hop_take_backup
end
nap 20 * 60
end
label def take_backup
# It is possible that we already started backup but crashed before saving
# the state to database. Since backup taking is an expensive operation,
# we check if backup is truly needed.
if postgres_timeline.need_backup?
postgres_timeline.leader.vm.sshable.cmd("common/bin/daemonizer 'sudo postgres/bin/take-backup #{postgres_timeline.leader.resource.version}' take_postgres_backup")
postgres_timeline.latest_backup_started_at = Time.now
postgres_timeline.save_changes
end
hop_wait
end
label def destroy
decr_destroy
destroy_blob_storage if postgres_timeline.blob_storage
postgres_timeline.destroy
pop "postgres timeline is deleted"
end
def destroy_blob_storage
return destroy_aws_s3 if postgres_timeline.aws?
admin_client.admin_remove_user(postgres_timeline.access_key)
admin_client.admin_policy_remove(postgres_timeline.ubid)
end
def destroy_aws_s3
iam_client.list_attached_user_policies(user_name: postgres_timeline.ubid).attached_policies.each do |it|
iam_client.detach_user_policy(user_name: postgres_timeline.ubid, policy_arn: it.policy_arn)
iam_client.delete_policy(policy_arn: it.policy_arn)
end
iam_client.list_access_keys(user_name: postgres_timeline.ubid).access_key_metadata.each do |it|
iam_client.delete_access_key(user_name: postgres_timeline.ubid, access_key_id: it.access_key_id)
end
iam_client.delete_user(user_name: postgres_timeline.ubid)
end
def setup_blob_storage
return setup_aws_s3 if postgres_timeline.aws?
# Setup user keys and policy for the timeline
admin_client.admin_add_user(postgres_timeline.access_key, postgres_timeline.secret_key)
admin_client.admin_policy_add(postgres_timeline.ubid, postgres_timeline.blob_storage_policy)
admin_client.admin_policy_set(postgres_timeline.ubid, postgres_timeline.access_key)
end
def setup_aws_s3
iam_client.create_user(user_name: postgres_timeline.ubid)
policy = iam_client.create_policy(policy_name: postgres_timeline.ubid, policy_document: postgres_timeline.blob_storage_policy.to_json)
iam_client.attach_user_policy(user_name: postgres_timeline.ubid, policy_arn: policy.policy.arn)
response = iam_client.create_access_key(user_name: postgres_timeline.ubid)
postgres_timeline.update(access_key: response.access_key.access_key_id, secret_key: response.access_key.secret_access_key)
postgres_timeline.leader.incr_refresh_walg_credentials
end
def aws_access_key_is_available?
iam_client.list_access_keys(user_name: postgres_timeline.ubid).access_key_metadata.any? { |it| it.access_key_id == postgres_timeline.access_key }
end
def iam_client
@iam_client ||= Aws::IAM::Client.new(access_key_id: postgres_timeline.location.location_credential.access_key, secret_access_key: postgres_timeline.location.location_credential.secret_key, region: postgres_timeline.location.name)
end
def admin_client
@admin_client ||= Minio::Client.new(
endpoint: postgres_timeline.blob_storage_endpoint,
access_key: postgres_timeline.blob_storage.admin_user,
secret_key: postgres_timeline.blob_storage.admin_password,
ssl_ca_data: postgres_timeline.blob_storage.root_certs
)
end
end