Files
ubicloud/prog/aws/instance.rb
Enes Cakir 8e6f32039b Enable transparent cache in AWS runners
We use dnsmasq to resolve our transparent cache domain to the local IP
address on our runners. However, AWS runners do not use dnsmasq for
networking setup.

Therefore, we use /etc/hosts to resolve the transparent cache domain to
the local IP address.
2025-08-19 13:59:15 +03:00

255 lines
7.5 KiB
Ruby

# frozen_string_literal: true
class Prog::Aws::Instance < Prog::Base
subject_is :vm, :aws_instance
label def start
assume_role_policy_document = {
Version: "2012-10-17",
Statement: [
{
Effect: "Allow",
Principal: {Service: "ec2.amazonaws.com"},
Action: "sts:AssumeRole"
}
]
}.to_json
ignore_invalid_entity do
iam_client.create_role({role_name:, assume_role_policy_document:})
end
hop_create_role_policy
end
label def create_role_policy
policy_document = {
Version: "2012-10-17",
Statement: [
{
Effect: "Allow",
Action: [
"logs:CreateLogStream",
"logs:PutLogEvents",
"logs:CreateLogGroup"
],
Resource: [
"arn:aws:logs:*:*:log-group:/#{vm.name}/auth:log-stream:*",
"arn:aws:logs:*:*:log-group:/#{vm.name}/postgresql:log-stream:*"
]
},
{
Effect: "Allow",
Action: "logs:DescribeLogStreams",
Resource: [
"arn:aws:logs:*:*:log-group:/#{vm.name}/auth:*",
"arn:aws:logs:*:*:log-group:/#{vm.name}/postgresql:*"
]
}
]
}.to_json
ignore_invalid_entity do
iam_client.create_policy({policy_name:, policy_document:})
end
hop_attach_role_policy
end
label def attach_role_policy
ignore_invalid_entity do
iam_client.attach_role_policy({role_name:, policy_arn: cloudwatch_policy.arn})
end
hop_create_instance_profile
end
label def create_instance_profile
ignore_invalid_entity do
iam_client.create_instance_profile({instance_profile_name:})
end
hop_add_role_to_instance_profile
end
label def add_role_to_instance_profile
ignore_invalid_entity do
iam_client.add_role_to_instance_profile({instance_profile_name:, role_name:})
end
hop_wait_instance_profile_created
end
label def wait_instance_profile_created
begin
iam_client.get_instance_profile({instance_profile_name:})
rescue Aws::IAM::Errors::NoSuchEntity
nap 1
end
hop_create_instance
end
label def create_instance
public_keys = (vm.sshable.keys.map(&:public_key) + (vm.project.get_ff_vm_public_ssh_keys || [])).join("\n")
# Define user data script to set a custom username
user_data = <<~USER_DATA
#!/bin/bash
custom_user="#{vm.unix_user}"
if [ ! -d /home/$custom_user ]; then
# Create the custom user
adduser $custom_user --disabled-password --gecos ""
# Add the custom user to the sudo group
usermod -aG sudo $custom_user
# disable password for the custom user
echo "$custom_user ALL=(ALL:ALL) NOPASSWD:ALL" | sudo tee /etc/sudoers.d/$custom_user
# Set up SSH access for the custom user
mkdir -p /home/$custom_user/.ssh
cp /home/ubuntu/.ssh/authorized_keys /home/$custom_user/.ssh/
chown -R $custom_user:$custom_user /home/$custom_user/.ssh
chmod 700 /home/$custom_user/.ssh
chmod 600 /home/$custom_user/.ssh/authorized_keys
fi
echo #{public_keys.shellescape} > /home/$custom_user/.ssh/authorized_keys
usermod -L ubuntu
USER_DATA
# Normally we use dnsmasq to resolve our transparent cache domain to local IP, but we use /etc/hosts for AWS runners
user_data += "\necho \"#{vm.private_ipv4} ubicloudhostplaceholder.blob.core.windows.net\" >> /etc/hosts" if vm.unix_user == "runneradmin"
params = {
image_id: vm.boot_image, # AMI ID
instance_type: Option.aws_instance_type_name(vm.family, vm.vcpus),
block_device_mappings: [
{
device_name: "/dev/sda1",
ebs: {
encrypted: true,
delete_on_termination: true,
iops: 3000,
volume_size: vm.vm_storage_volumes_dataset.where(:boot).get(:size_gib),
volume_type: "gp3",
throughput: 125
}
}
],
network_interfaces: [
{
network_interface_id: vm.nics.first.nic_aws_resource.network_interface_id,
device_index: 0
}
],
private_dns_name_options: {
hostname_type: "ip-name",
enable_resource_name_dns_a_record: false,
enable_resource_name_dns_aaaa_record: false
},
min_count: 1,
max_count: 1,
user_data: Base64.encode64(user_data.gsub(/^(\s*# .*)?\n/, "")),
tag_specifications: Util.aws_tag_specifications("instance", vm.name),
iam_instance_profile: {name: instance_profile_name},
client_token: vm.id
}
begin
instance_response = client.run_instances(params)
rescue Aws::EC2::Errors::InvalidParameterValue => e
nap 1 if e.message.include?("Invalid IAM Instance Profile name")
raise
end
instance = instance_response.instances.first
instance_id = instance.instance_id
subnet_id = instance.network_interfaces.first.subnet_id
subnet_response = client.describe_subnets(subnet_ids: [subnet_id])
az_id = subnet_response.subnets.first.availability_zone_id
ipv4_dns_name = instance.public_dns_name
AwsInstance.create_with_id(vm.id, instance_id:, az_id:, ipv4_dns_name:)
hop_wait_instance_created
end
label def wait_instance_created
instance_response = client.describe_instances({filters: [{name: "instance-id", values: [aws_instance.instance_id]}, {name: "tag:Ubicloud", values: ["true"]}]}).reservations[0].instances[0]
nap 1 unless instance_response.dig(:state, :name) == "running"
public_ipv4 = instance_response.dig(:network_interfaces, 0, :association, :public_ip)
public_ipv6 = instance_response.dig(:network_interfaces, 0, :ipv_6_addresses, 0, :ipv_6_address)
AssignedVmAddress.create(dst_vm_id: vm.id, ip: public_ipv4)
vm.sshable&.update(host: public_ipv4)
vm.update(cores: vm.vcpus / 2, allocated_at: Time.now, ephemeral_net6: public_ipv6)
pop "vm created"
end
label def destroy
if aws_instance
begin
client.terminate_instances(instance_ids: [aws_instance.instance_id])
rescue Aws::EC2::Errors::InvalidInstanceIDNotFound
end
aws_instance.destroy
end
hop_cleanup_roles
end
label def cleanup_roles
ignore_invalid_entity do
iam_client.remove_role_from_instance_profile({instance_profile_name:, role_name:})
end
ignore_invalid_entity do
iam_client.delete_instance_profile({instance_profile_name:})
end
if cloudwatch_policy
ignore_invalid_entity do
iam_client.detach_role_policy({role_name:, policy_arn: cloudwatch_policy.arn})
end
ignore_invalid_entity do
iam_client.delete_policy({policy_arn: cloudwatch_policy.arn})
end
end
ignore_invalid_entity do
iam_client.delete_role({role_name:})
end
pop "vm destroyed"
end
def client
@client ||= vm.location.location_credential.client
end
def iam_client
@iam_client ||= vm.location.location_credential.iam_client
end
def cloudwatch_policy
@cloudwatch_policy ||= iam_client.list_policies(scope: "Local").policies.find { |p| p.policy_name == policy_name }
end
def policy_name
"#{vm.name}-cw-agent-policy"
end
def role_name
vm.name
end
def instance_profile_name
"#{vm.name}-instance-profile"
end
def ignore_invalid_entity
yield
rescue Aws::IAM::Errors::InvalidInstanceProfileName,
Aws::IAM::Errors::InvalidRoleName,
Aws::IAM::Errors::NoSuchEntity,
Aws::IAM::Errors::EntityAlreadyExists => e
Clog.emit("Entity does not exist or already exists") { Util.exception_to_hash(e) }
end
end