Files
ubicloud/rhizome/postgres/bin/configure
shikharbhardwaj 043e5272d0 Add bin/metrics-collector to collect and buffer metrics
This change adds a systemd-timer triggered script to collect
prometheues-style metrics from a configured list of endpoints on the
metrics producer side (eg. Postgres server). These will be written to
the metrics storage DB (VictoriaMetrics) by bin/monitor. The buffer
exists in case certain parts of the stack (either monitor or
VictoriaMetrics) are unavailable, we are able to catch up and recover
without losing data. With the default buffer size (20MiB), we can
tolerage about 30 minutes of downtime for PG metrics.

To add this functionality to other resources, one can configure the
systemd-timer in a similar manner as done in this change for
PostgresServer and provide the configuration as part of the
`metrics_config` method, like this:

{
  # Array of endpoints to collect metrics from
  endpoints: [],

  # Maximum size of the pending buffer in MiB
  max_pending_buffer_size_mib: 20,

  # Interval for collecting metrics in seconds or as a time span string
  interval: "15s",

  # Additional label names and values to be added to the collected metrics
  additional_labels: {"foo": "bar"}
}
2025-04-29 03:14:49 +05:30

145 lines
5.0 KiB
Ruby
Executable File

#!/bin/env ruby
# frozen_string_literal: true
require "json"
require_relative "../../common/lib/util"
require_relative "../lib/pgbouncer_setup"
if ARGV.count != 1
fail "Wrong number of arguments. Expected 1, Given #{ARGV.count}"
end
v = ARGV[0]
configure_hash = JSON.parse($stdin.read)
# Update /etc/hosts
hosts = <<-HOSTS
127.0.0.1 localhost
::1 ip6-localhost ip6-loopback
fe00::0 ip6-localnet
ff00::0 ip6-mcastprefix
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters
ff02::3 ip6-allhosts
#{configure_hash["hosts"]}
HOSTS
safe_write_to_file("/etc/hosts", hosts)
# Update postgresql.conf
configs = configure_hash["configs"].map { |k, v| "#{k} = #{v}" }.join("\n")
safe_write_to_file("/etc/postgresql/#{v}/main/conf.d/001-service.conf", configs)
# Update pg_hba.conf
private_subnets = configure_hash["private_subnets"].flat_map {
[
"host all all #{_1["net4"]} scram-sha-256",
"host all all #{_1["net6"]} scram-sha-256"
]
}.join("\n")
pg_hba_entries = <<-PG_HBA
# PostgreSQL Client Authentication Configuration File
# ===================================================
#
# Refer to the "Client Authentication" section in the PostgreSQL
# documentation for a complete description of this file.
# TYPE DATABASE USER ADDRESS METHOD
# Database administrative login by Unix domain socket
local all postgres peer map=system2postgres
local all pgbouncer peer map=system2pgbouncer
# "local" is for Unix domain socket connections only
local all all peer
# IPv4 local connections:
host all all 127.0.0.1/32 scram-sha-256
# IPv6 local connections:
host all all ::1/128 scram-sha-256
# Allow replication connections from localhost, by a user with the
# replication privilege.
local replication all peer
host replication all 127.0.0.1/32 scram-sha-256
host replication all ::1/128 scram-sha-256
# Allow connections from localhost with ubi_monitoring OS user as
# ubi_monitoring database user. This will be used by postgres_exporter
# to scrape metrics and expose them to prometheus.
local all ubi_monitoring peer
# Allow connections from private subnet with SCRAM authentication
#{private_subnets}
# Allow replication connection using special replication user for
# HA standbys
hostssl replication ubi_replication all cert map=standby2replication
# Allow connections from public internet with SCRAM authentication
host all all all scram-sha-256
PG_HBA
safe_write_to_file("/etc/postgresql/#{v}/main/pg_hba.conf", pg_hba_entries)
identity = configure_hash["identity"]
pg_ident_entries = <<-PG_IDENT
# PostgreSQL User Name Maps
# =========================
#
# Refer to the PostgreSQL documentation, chapter "Client
# Authentication" for a complete description.
# MAPNAME SYSTEM-USERNAME PG-USERNAME
system2postgres postgres postgres
system2pgbouncer postgres pgbouncer
system2postgres ubi postgres
standby2replication #{identity} ubi_replication
PG_IDENT
safe_write_to_file("/etc/postgresql/#{v}/main/pg_ident.conf", pg_ident_entries)
# Reload the postmaster to apply changes
r "pg_ctlcluster #{v} main reload || pg_ctlcluster #{v} main restart"
pgbouncer_setup = PgBouncerSetup.new(v, configure_hash["configs"]["max_connections"], configure_hash["pgbouncer_instances"])
pgbouncer_setup.setup
# Configure metrics collector
# Save the metrics configuration to a file
metrics_config = configure_hash.fetch("metrics_config", {})
r "mkdir -p /home/ubi/postgres/metrics"
safe_write_to_file("/home/ubi/postgres/metrics/config.json", metrics_config.to_json)
r "chown -R ubi:ubi /home/ubi/postgres/metrics"
# Create systemd service for metrics collection
metrics_service = <<-SERVICE
[Unit]
Description=PostgreSQL Metrics Collection
After=postgresql.service
[Service]
Type=oneshot
User=ubi
ExecStart=/home/ubi/postgres/bin/metrics-collector /home/ubi/postgres/metrics /home/ubi/postgres/metrics/config.json
StandardOutput=journal
StandardError=journal
SERVICE
safe_write_to_file("/etc/systemd/system/postgres-metrics.service", metrics_service)
# Create systemd timer for metrics collection
metrics_interval = metrics_config["interval"] || "15s"
metrics_timer = <<-TIMER
[Unit]
Description=Run PostgreSQL Metrics Collection Periodically
[Timer]
OnBootSec=30s
OnUnitActiveSec=#{metrics_interval}
AccuracySec=1s
[Install]
WantedBy=timers.target
TIMER
safe_write_to_file("/etc/systemd/system/postgres-metrics.timer", metrics_timer)
# Enable and start the timer
r "systemctl daemon-reload"
r "systemctl enable postgres-metrics.timer"
r "systemctl start postgres-metrics.timer"