Files
ubicloud/spec/prog/learn_cpu_spec.rb
Hadi Moshayedi 4e35b475a1 Fix learning total_dies in arm64.
We used to always determine the number of CPU dies by counting unique
values in `/sys/devices/system/cpu/cpu*/topology/die_id` files. This
method works on x64 systems but is ineffective on ARM64.

The `topology_die_id` function is defined for x64 architectures in
`arch/x86/include/asm/topology.h` but is not implemented for ARM64 in
`arch/arm64/include/asm/topology.h`.

In Linux kernel 5.15 (used in Ubuntu 22.04), the `die_id` attribute is
exposed with a value of -1 if `topology_die_id` is not defined for the
architecture. Therefore, the `die_id` file for ARM64 consistently has
the value -1, causing our method of counting unique values to always
return 1 on Ubuntu 22.04, regardless of the actual number of dies. This
can cause issues if number of sockets is more than 1, since our code
assumes `total_dies` is a multiple of `total_sockets`.

Starting with Linux kernel 5.17, a change was introduced [1] to expose
the `die_id` file only if `topology_die_id` is defined for the
architecture. Consequently, in Linux kernel 6.8 (used in Ubuntu 24.04),
this file is absent for ARM64 systems. As a result, our method of
counting unique values now produces 0 on Ubuntu 24.04.

Given the lack of a straightforward way to determine the number of dies
on ARM64 systems, this patch sets `total_dies` equal to `total_sockets`,
assuming one die per socket.

[1] https://github.com/torvalds/linux/commit/2c4dcd7
2024-11-22 12:08:50 -08:00

104 lines
2.9 KiB
Ruby

# frozen_string_literal: true
require_relative "../model/spec_helper"
RSpec.describe Prog::LearnCpu do
subject(:lc) { described_class.new(Strand.new) }
# Gin up a topologically complex processor to test summations.
let(:eight_thread_four_core_four_numa_two_socket) do
<<JSON
{
"cpus": [
{
"cpu": 0,
"socket": 0,
"core": 0
},{
"cpu": 1,
"socket": 0,
"core": 0
},{
"cpu": 2,
"socket": 0,
"core": 1
},{
"cpu": 3,
"socket": 0,
"core": 1
},{
"cpu": 4,
"socket": 1,
"core": 0
},{
"cpu": 5,
"socket": 1,
"core": 0
},{
"cpu": 6,
"socket": 1,
"core": 1
},{
"cpu": 7,
"socket": 1,
"core": 1
}
]
}
JSON
end
describe "#get_arch" do
it "returns the architecture" do
sshable = instance_double(Sshable)
expect(sshable).to receive(:cmd).with("common/bin/arch").and_return("x64")
allow(lc).to receive(:sshable).and_return(sshable)
expect(lc.get_arch).to eq("x64")
end
it "fails when there's an unexpected architecture" do
sshable = instance_double(Sshable)
expect(sshable).to receive(:cmd).with("common/bin/arch").and_return("s390x")
allow(lc).to receive(:sshable).and_return(sshable)
expect { lc.get_arch }.to raise_error RuntimeError, "BUG: unexpected CPU architecture"
end
end
describe "#get_topology" do
it "returns the CPU topology" do
sshable = instance_double(Sshable)
expect(sshable).to receive(:cmd).with("/usr/bin/lscpu -Jye").and_return(
eight_thread_four_core_four_numa_two_socket
)
allow(lc).to receive(:sshable).and_return(sshable)
expect(lc.get_topology).to eq(Prog::LearnCpu::CpuTopology.new(total_cpus: 8, total_cores: 4, total_dies: 0, total_sockets: 2))
end
end
describe "#count_dies" do
it "returns the number of dies" do
sshable = instance_double(Sshable)
expect(sshable).to receive(:cmd).with("cat /sys/devices/system/cpu/cpu*/topology/die_id").and_return("0\n1\n0\n1\n")
allow(lc).to receive(:sshable).and_return(sshable)
expect(lc.count_dies(arch: "x64", total_sockets: 2)).to eq(2)
end
it "returns the number of sockets when on arm64" do
sshable = instance_double(Sshable)
allow(lc).to receive(:sshable).and_return(sshable)
expect(lc.count_dies(arch: "arm64", total_sockets: 2)).to eq(2)
end
end
describe "#start" do
it "pops with cpu info" do
allow(lc).to receive_messages(
get_arch: "x64",
get_topology: Prog::LearnCpu::CpuTopology.new(total_cpus: 8, total_cores: 4, total_dies: 0, total_sockets: 2),
count_dies: 2
)
expect { lc.start }.to exit(arch: "x64", total_cpus: 8, total_cores: 4, total_dies: 2, total_sockets: 2)
end
end
end