Files
ubicloud/lib/metrics.rb
shikharbhardwaj 37bf6ba5e5 Add Postgres metrics UI
This commit adds the UI components for Postgres metrics, behind a
feature flag. The charts are built on top of Apache ECharts [1].
After some prototyping and reading source code across d3.js, ApexCharts
and ECharts, I decided to pick ECharts as it provides a good balance of
flexibility, performance and out-of-the box features, with a
navigable codebase and documentation.

For this version, I have added the metrics as another card on the
Postgres overview page. But this page is already pretty overcrowded.
In a future version, I would like to split the metrics section to a
separate place to clean things up and also avoid unnecessary requests to
fetch metrics on each Postgres server overview visit.

[1]: https://github.com/apache/echarts/
2025-05-13 12:20:27 +05:30

162 lines
6.5 KiB
Ruby

# frozen_string_literal: true
module Metrics
TimeSeries = Data.define(:labels, :query)
MetricDefinition = Data.define(:name, :description, :unit, :series)
POSTGRES_METRICS = {
cpu_usage:
MetricDefinition.new(
name: "CPU Usage",
description: "Percentage of CPU used by the system",
unit: "%",
series: [
TimeSeries.new(
labels: {},
query: "(1 - sum(avg(rate(node_cpu_seconds_total{mode=\"idle\", ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\"}[1m])))) * 100"
)
]
),
load_average:
MetricDefinition.new(
name: "Load Average",
description: "System load average over different time periods",
unit: nil,
series: [
TimeSeries.new(
labels: {name: "1 minute"},
query: "sum(node_load1{ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\"})"
),
TimeSeries.new(
labels: {name: "5 minutes"},
query: "sum(node_load5{ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\"})"
),
TimeSeries.new(
labels: {name: "15 minutes"},
query: "sum(node_load15{ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\"})"
)
]
),
memory_usage:
MetricDefinition.new(
name: "Memory Usage",
description: "Total memory usage vs cache & buffers",
unit: "%",
series: [
TimeSeries.new(
labels: {name: "Used Memory"},
query: "sum((1 - (node_memory_MemAvailable_bytes{ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\"} / node_memory_MemTotal_bytes{ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\"})) * 100)"
),
TimeSeries.new(
labels: {name: "Cache & Buffers"},
query: "sum((node_memory_Cached_bytes{ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\"} + node_memory_Buffers_bytes{ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\"}) / node_memory_MemTotal_bytes{ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\"} * 100)"
)
]
),
disk_usage:
MetricDefinition.new(
name: "Disk Usage",
description: "Disk space utilization",
unit: "%",
series: [
TimeSeries.new(
labels: {name: "Used Space"},
query: "100 - (sum(node_filesystem_avail_bytes{mountpoint=\"/dat\", ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\"} / node_filesystem_size_bytes{mountpoint=\"/dat\", ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\"}) * 100)"
)
]
),
network_traffic:
MetricDefinition.new(
name: "Network Traffic",
description: "Incoming and outgoing network traffic",
unit: "bytes/s",
series: [
TimeSeries.new(
labels: {name: "Received"},
query: "sum(rate(node_network_receive_bytes_total{ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\"}[1m]))"
),
TimeSeries.new(
labels: {name: "Transmitted"},
query: "sum(rate(node_network_transmit_bytes_total{ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\"}[1m]))"
)
]
),
connection_count:
MetricDefinition.new(
name: "Connection Count",
description: "Database activity metrics",
unit: "count",
series: [
TimeSeries.new(
labels: {name: "Active"},
query: "sum(pg_stat_activity_count{state=\"active\", ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\"})"
),
TimeSeries.new(
labels: {name: "Total"},
query: "sum(pg_stat_activity_count{ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\"})"
)
]
),
cache_hit_ratio:
MetricDefinition.new(
name: "Cache Hit Ratio",
description: "Percentage of cache hits vs reads",
unit: "%",
series: [
TimeSeries.new(
labels: {},
query: "sum(rate(pg_stat_database_blks_hit{ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\"}[1m])) / (sum(rate(pg_stat_database_blks_hit{ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\"}[1m])) + sum(rate(pg_stat_database_blks_read{ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\"}[1m]))) * 100"
)
]
),
operation_throughput:
MetricDefinition.new(
name: "Operation Throughput",
description: "Fetch, insert, update, delete operations per second",
unit: "ops/s",
series: [
TimeSeries.new(
labels: {name: "Fetch"},
query: "sum(rate(pg_stat_database_tup_fetched{ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\"}[1m]))"
),
TimeSeries.new(
labels: {name: "Insert"},
query: "sum(rate(pg_stat_database_tup_inserted{ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\"}[1m]))"
),
TimeSeries.new(
labels: {name: "Update"},
query: "sum(rate(pg_stat_database_tup_updated{ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\"}[1m]))"
),
TimeSeries.new(
labels: {name: "Delete"},
query: "sum(rate(pg_stat_database_tup_deleted{ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\"}[1m]))"
)
]
),
deadlocks:
MetricDefinition.new(
name: "Deadlocks",
description: "Deadlocks per second",
unit: "deadlocks/s",
series: [
TimeSeries.new(
labels: {},
query: "sum(rate(pg_stat_database_deadlocks{ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\"}[1m]))"
)
]
),
database_size:
MetricDefinition.new(
name: "Database Size",
description: "Top 5 databases by size",
unit: "bytes",
series: [
TimeSeries.new(
labels: {},
query: "topk(5, sum(pg_database_size_bytes{ubicloud_resource_id=\"$ubicloud_resource_id\", ubicloud_resource_role=\"primary\", datname!~\"template0|template1\"}) by (datname))"
)
]
)
}
end