This commit updates bin/monitor to scan for resources which have metrics export capability (currently only PG servers) and periodically export metrics out and store them in VictoriaMetrics. Each resource which exports metrics should include `MetricsTargetMethods`, which contain the config and a commmon algorithm to fetch scraped data from a given list of endpoints. Roghly the logic looks like this: Producer side (PG server): A systemd-backed timer executes `postgres/bin/metrics-collector`, which scrapes the list of endpoints and stores them on disk, every 15s (default interval). This location is the `pending` buffer, which can grow upto a 120 entries by default, providing recovery for 30 minutes of outages in other parts of the system. Consumer side (bin/monitor): Monitor periodically connects to each metrics target resource to fetch upto 4 scrapes at a time and write them to VictoriaMetrics, and move the successfully exported scrapes to the `done` buffer, which is cleand up periodically by the `metrics-collector`. Currently, we only create a single global VictoriaMetrics instance for simplicity, although it can be configured to be location-aware.
127 lines
4.7 KiB
Ruby
127 lines
4.7 KiB
Ruby
# spec/lib/metrics_target_methods_spec.rb
|
|
# frozen_string_literal: true
|
|
|
|
require "spec_helper"
|
|
require "metrics_target_methods"
|
|
|
|
class TestClass
|
|
include MetricsTargetMethods
|
|
end
|
|
|
|
RSpec.describe MetricsTargetMethods do
|
|
let(:test_instance) { TestClass.new }
|
|
let(:mock_ssh_session) { instance_double(Net::SSH::Connection::Session) }
|
|
let(:session) { {ssh_session: mock_ssh_session} }
|
|
let(:mock_tsdb_client) { instance_double(VictoriaMetrics::Client) }
|
|
let(:metrics_dir) { "/home/ubi/metrics" }
|
|
|
|
describe "#metrics_config" do
|
|
it "returns the default configuration" do
|
|
config = test_instance.metrics_config
|
|
expect(config).to be_a(Hash)
|
|
expect(config[:endpoints]).to eq([])
|
|
expect(config[:max_file_retention]).to eq(120)
|
|
expect(config[:interval]).to eq("15s")
|
|
expect(config[:additional_labels]).to eq({foo: "bar"})
|
|
expect(config[:metrics_dir]).to eq("/home/ubi/metrics")
|
|
end
|
|
end
|
|
|
|
describe "#export_metrics" do
|
|
context "when scrape results are empty" do
|
|
before do
|
|
allow(test_instance).to receive(:scrape_endpoints).and_return([])
|
|
allow(Clog).to receive(:emit)
|
|
end
|
|
|
|
it "does not call import_prometheus or mark_pending_scrapes_as_done" do
|
|
expect(mock_tsdb_client).not_to receive(:import_prometheus)
|
|
expect(test_instance).not_to receive(:mark_pending_scrapes_as_done)
|
|
|
|
test_instance.export_metrics(session: session, tsdb_client: mock_tsdb_client)
|
|
end
|
|
end
|
|
|
|
context "when scrape results exist" do
|
|
let(:time) { Time.now }
|
|
let(:scrape_result_a) { VictoriaMetrics::Client::Scrape.new(time: time - 10, samples: "metric1{} 1") }
|
|
let(:scrape_result_b) { VictoriaMetrics::Client::Scrape.new(time: time, samples: "metric2{} 2") }
|
|
let(:scrape_results) { [scrape_result_a, scrape_result_b] }
|
|
|
|
before do
|
|
allow(test_instance).to receive(:scrape_endpoints).and_return(scrape_results)
|
|
allow(Clog).to receive(:emit)
|
|
allow(test_instance).to receive(:mark_pending_scrapes_as_done)
|
|
end
|
|
|
|
it "does not call import_prometheus or mark_pending_scrapes_as_done if tsdb_client is nil" do
|
|
expect(mock_tsdb_client).not_to receive(:import_prometheus)
|
|
expect(test_instance).not_to receive(:mark_pending_scrapes_as_done)
|
|
test_instance.export_metrics(session: session, tsdb_client: nil)
|
|
end
|
|
|
|
it "imports all scrapes and marks them as done" do
|
|
expect(mock_tsdb_client).to receive(:import_prometheus).with(scrape_result_a, {foo: "bar"})
|
|
expect(mock_tsdb_client).to receive(:import_prometheus).with(scrape_result_b, {foo: "bar"})
|
|
expect(test_instance).to receive(:mark_pending_scrapes_as_done).with(session, time)
|
|
|
|
test_instance.export_metrics(session: session, tsdb_client: mock_tsdb_client)
|
|
end
|
|
end
|
|
end
|
|
|
|
describe "#scrape_endpoints" do
|
|
let(:file_list) { "2023-01-01T12-00-00-000000000.prom\n2023-01-01T12-15-00-000000000.prom" }
|
|
let(:file_content) { "metric{} 1" }
|
|
let(:status_hash) { {exit_code: 0} }
|
|
|
|
before do
|
|
allow(mock_ssh_session).to receive(:exec!).with(/ls.*done/).and_return(file_list)
|
|
allow(mock_ssh_session).to receive(:exec!).with(/cat.*done/, status: anything) do |_, options|
|
|
options[:status][:exit_code] = status_hash[:exit_code]
|
|
file_content
|
|
end
|
|
end
|
|
|
|
context "when files can be read successfully" do
|
|
it "returns the expected scrapes" do
|
|
results = test_instance.scrape_endpoints(session)
|
|
|
|
expect(results.length).to eq(2)
|
|
expect(results[0]).to be_a(VictoriaMetrics::Client::Scrape)
|
|
expect(results[0].samples).to eq(file_content)
|
|
expect(results[1]).to be_a(VictoriaMetrics::Client::Scrape)
|
|
expect(results[1].samples).to eq(file_content)
|
|
end
|
|
end
|
|
|
|
context "when files cannot be read" do
|
|
let(:status_hash) { {exit_code: 1} }
|
|
|
|
it "filters out failed scrapes" do
|
|
results = test_instance.scrape_endpoints(session)
|
|
expect(results).to be_empty
|
|
end
|
|
end
|
|
end
|
|
|
|
describe "#mark_pending_scrapes_as_done" do
|
|
let(:time) { Time.new(2023, 1, 1, 12, 0, 0) }
|
|
let(:time_marker) { "2023-01-01T12-00-00-000000000" }
|
|
|
|
it "executes the correct command to move files" do
|
|
expected_command = "ls #{metrics_dir}/done | sort | awk '$0 <= \"#{time_marker}\"' | xargs -I{} rm #{metrics_dir}/done/{}"
|
|
expect(mock_ssh_session).to receive(:exec!).with(expected_command)
|
|
|
|
test_instance.mark_pending_scrapes_as_done(session, time)
|
|
end
|
|
end
|
|
|
|
describe "#metrics_dir" do
|
|
it "returns the escaped metrics directory path" do
|
|
allow(test_instance).to receive(:metrics_config).and_return({metrics_dir: "/path with spaces"})
|
|
expect(test_instance.metrics_dir).to eq("/path\\ with\\ spaces")
|
|
end
|
|
end
|
|
end
|