Files
ubicloud/routes/runtime/github.rb
Enes Cakir 6fbb7ae37b Allow access to all caches from all scopes if feature flag enabled
We aim to maintain the same security measures as the official GitHub
Actions runners.

Jobs can only access cache entries from their own branch or the default
branch.

> Access restrictions provide cache isolation and security by creating
> a logical boundary between different branches or tags. Workflow runs
> can restore caches created in either the current branch or the
> default branch (usually main) [^1]

Some customers want access to all caches from all scopes. If a customer
prefers not to isolate cache entries and accepts the consequences of
having no boundaries, this PR will allow that.

I believe it's reasonable not to enforce isolation for private
repositories if all contributors are trusted.

If more customers request this feature, we can consider adding a toggle
on the settings page.

[^1]: https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/caching-dependencies-to-speed-up-workflows#restrictions-for-accessing-a-cache
2024-12-24 18:05:20 +03:00

165 lines
6.9 KiB
Ruby

# frozen_string_literal: true
class Clover
hash_branch(:runtime_prefix, "github") do |r|
if (runner = GithubRunner[vm_id: @vm.id]).nil? || (repository = runner.repository).nil?
fail CloverError.new(400, "InvalidRequest", "invalid JWT format or claim in Authorization header")
end
repository.setup_blob_storage unless repository.access_key
# getCacheEntry
r.get "cache" do
keys, version = r.params["keys"]&.split(","), r.params["version"]
fail CloverError.new(400, "InvalidRequest", "Wrong parameters") if keys.nil? || keys.empty? || version.nil?
dataset = repository.cache_entries_dataset.exclude(committed_at: nil).where(version: version)
unless repository.installation.project.get_ff_access_all_cache_scopes
# Clients can send multiple keys, and we look for caches in multiple scopes.
# We prioritize scope over key, returning the cache for the first matching
# key in the head branch scope, followed by the first matching key in
# default branch scope.
scopes = [runner.workflow_job&.dig("head_branch") || get_scope_from_github(runner, r.params["runId"]), repository.default_branch]
scopes.compact!
scopes.uniq!
dataset = dataset.where(scope: scopes)
.order(Sequel.case(scopes.map.with_index { |scope, idx| [{scope:}, idx] }.to_h, scopes.length))
end
entry = dataset
.where(key: keys)
.order_append(Sequel.case(keys.map.with_index { |key, idx| [{key:}, idx] }.to_h, keys.length))
.first
# GitHub cache supports prefix match if the key doesn't match exactly.
# From their docs:
# When a key doesn't match directly, the action searches for keys
# prefixed with the restore key. If there are multiple partial matches
# for a restore key, the action returns the most recently created cache.
#
# We still prioritize scope over key in this case, and if there are
# multiple prefix matches for a key, this chooses the most recent.
entry ||= dataset
.grep(:key, keys.map { |key| "#{DB.dataset.escape_like(key)}%" })
.order_append(Sequel.case(keys.map.with_index { |key, idx| [Sequel.like(:key, "#{DB.dataset.escape_like(key)}%"), idx] }.to_h, keys.length), Sequel.desc(:created_at))
.first
fail CloverError.new(204, "NotFound", "No cache entry") if entry.nil?
entry.update(last_accessed_at: Time.now, last_accessed_by: runner.id)
signed_url = repository.url_presigner.presigned_url(:get_object, bucket: repository.bucket_name, key: entry.blob_key, expires_in: 900)
{
scope: entry.scope,
cacheKey: entry.key,
cacheVersion: entry.version,
creationTime: entry.created_at,
archiveLocation: signed_url
}
end
r.on "caches" do
# listCache
r.get true do
key = r.params["key"]
fail CloverError.new(204, "NotFound", "No cache entry") if key.nil?
scopes = [runner.workflow_job&.dig("head_branch"), repository.default_branch].compact
entries = repository.cache_entries_dataset
.exclude(committed_at: nil)
.where(key: key, scope: scopes)
.order(:version).all
{
totalCount: entries.count,
artifactCaches: entries.map do
{
scope: _1.scope,
cacheKey: _1.key,
cacheVersion: _1.version,
creationTime: _1.created_at
}
end
}
end
# reserveCache
r.post true do
key = r.params["key"]
version = r.params["version"]
size = r.params["cacheSize"]&.to_i
fail CloverError.new(400, "InvalidRequest", "Wrong parameters") if key.nil? || version.nil?
unless (scope = runner.workflow_job&.dig("head_branch") || get_scope_from_github(runner, r.params["runId"]))
Clog.emit("The runner does not have a workflow job") { {no_workflow_job: {ubid: runner.ubid, repository_ubid: repository.ubid}} }
fail CloverError.new(400, "InvalidRequest", "No workflow job data available")
end
if size && size > GithubRepository::CACHE_SIZE_LIMIT
fail CloverError.new(400, "InvalidRequest", "The cache size is over the 10GB limit")
end
entry, upload_id = nil, nil
DB.transaction do
begin
entry = GithubCacheEntry.create_with_id(repository_id: runner.repository.id, key: key, version: version, size: size, scope: scope, created_by: runner.id)
rescue Sequel::ValidationFailed, Sequel::UniqueConstraintViolation
fail CloverError.new(409, "AlreadyExists", "A cache entry for #{scope} scope already exists with #{key} key and #{version} version.")
end
upload_id = repository.blob_storage_client.create_multipart_upload(bucket: repository.bucket_name, key: entry.blob_key).upload_id
entry.update(upload_id: upload_id)
end
# If size is not provided, it means that the client doesn't
# let us know the size of the cache. In this case, we use the
# GithubRepository::CACHE_SIZE_LIMIT as the size.
size ||= GithubRepository::CACHE_SIZE_LIMIT
max_chunk_size = 32 * 1024 * 1024 # 32MB
presigned_urls = (1..size.fdiv(max_chunk_size).ceil).map do
repository.url_presigner.presigned_url(:upload_part, bucket: repository.bucket_name, key: entry.blob_key, upload_id: upload_id, part_number: _1, expires_in: 900)
end
{
uploadId: upload_id,
presignedUrls: presigned_urls,
chunkSize: max_chunk_size
}
end
# commitCache
r.post "commit" do
etags = r.params["etags"]
upload_id = r.params["uploadId"]
size = r.params["size"].to_i
fail CloverError.new(400, "InvalidRequest", "Wrong parameters") if etags.nil? || etags.empty? || upload_id.nil? || size == 0
entry = GithubCacheEntry[repository_id: repository.id, upload_id: upload_id, committed_at: nil]
fail CloverError.new(204, "NotFound", "No cache entry") if entry.nil? || (entry.size && entry.size != size)
begin
repository.blob_storage_client.complete_multipart_upload({
bucket: repository.bucket_name,
key: entry.blob_key,
upload_id: upload_id,
multipart_upload: {parts: etags.map.with_index { {part_number: _2 + 1, etag: _1} }}
})
rescue Aws::S3::Errors::InvalidPart, Aws::S3::Errors::NoSuchUpload => ex
Clog.emit("could not complete multipart upload") { {failed_multipart_upload: {ubid: runner.ubid, repository_ubid: repository.ubid, exception: Util.exception_to_hash(ex)}} }
fail CloverError.new(400, "InvalidRequest", "Wrong parameters")
end
updates = {committed_at: Time.now}
# If the size can not be set with reserveCache, we set it here.
updates[:size] = size if entry.size.nil?
entry.update(updates)
{}
end
end
end
end