diff --git a/hack/calculate-cache.sh b/hack/calculate-cache.sh new file mode 100755 index 000000000000..7f7d51e23197 --- /dev/null +++ b/hack/calculate-cache.sh @@ -0,0 +1,299 @@ +#!/usr/bin/env bash +# This script calculates the expected content size, actual cached size, and cache-keys used in caching method before and after +# implementation in https://github.com/lima-vm/lima/pull/2508 +# +# Answer to the question in https://github.com/lima-vm/lima/pull/2508#discussion_r1699798651 + +# usage: [DEBUG=1] ./hack/calculate-cache.sh +# DEBUG=1 will save the collected information in .calculate-cache-collected-info-{before,after}.yaml +# +# This script does: +# 1. extracts `runs_on` and `template` from workflow file (.github/workflows/test.yml) +# 2. check each template for image and nerdctl +# 3. detect size of image and nerdctl (responses from remote are cached for faster iteration) +# save the response in .calculate-cache-response-cache.yaml +# 4. print content size, actual cache size (if available), by cache key +# +# The major differences for reducing cache usage are as follows: +# - it is now cached `~/.cache/lima/download/by-url-sha256/$sha256` instead of caching `~/.cache/lima/download` +# - the cache keys are now based on the image digest and nerdctl digest instead of the template file's hash +# - enables the use of cache regardless of the operating system used to execute CI. +# +# The script requires the following commands: +# - gh: GitHub CLI. +# Using to get the cache information +# - jq: Command-line JSON processor +# Parse the workflow file and print runs-on and template. +# Parse output from gh cache list +# Calculate the expected content size, actual cached size, and cache-keys used. +# - limactl: lima CLI. +# Using to validate the template file for getting nerdctl location and digest. +# - sha256sum: Print or check SHA256 (256-bit) checksums +# - xxd: make a hexdump or do the reverse. +# Using to simulate the 'hashFile()' function in the workflow. +# - yq: Command-line YAML processor. +# Parse the template file for image and nerdctl location, digest, and size. +# Parse the cache response file for the cache. +# Convert the collected information to JSON. + +set -u -o pipefail + +required_commands=(gh jq limactl sha256sum xxd yq) +for cmd in "${required_commands[@]}"; do + if ! command -v "${cmd}" &>/dev/null; then + echo "${cmd} is required. Please install it" >&2 + exit 1 + fi +done + +# current workflow uses x86_64 only +arch=x86_64 + +LIMA_HOME=$(mktemp -d) +export LIMA_HOME + +# parse the workflow file and print runs-on and template +# e.g. +# ```console +# $ print_runs_on_template_from_workflow .github/workflows/test.yml +# macos-12 templates/default.yaml +# ubuntu-24.04 templates/alpine.yaml +# ubuntu-24.04 templates/debian.yaml +# ubuntu-24.04 templates/fedora.yaml +# ubuntu-24.04 templates/archlinux.yaml +# ubuntu-24.04 templates/opensuse.yaml +# ubuntu-24.04 templates/experimental/net-user-v2.yaml +# ubuntu-24.04 templates/experimental/9p.yaml +# ubuntu-24.04 templates/docker.yaml +# ubuntu-24.04 templates/../hack/test-templates/alpine-9p-writable.yaml +# ubuntu-24.04 templates/../hack/test-templates/test-misc.yaml +# macos-12 templates/vmnet.yaml +# macos-12 https://raw.githubusercontent.com/lima-vm/lima/v0.15.1/examples/ubuntu-lts.yaml +# macos-13 templates/experimental/vz.yaml +# macos-13 templates/fedora.yaml +# ``` +function print_runs_on_template_from_workflow() { + yq -o=j "$1" | jq -r ' + "./.github/actions/setup_cache_for_template" as $action | + "\\$\\{\\{\\s*(?\\S*)\\s*\\}\\}" as $pattern | + .jobs | map_values( + ."runs-on" as $runs_on | + { + template: .steps | map_values(select(.uses == $action)) | first |.with.template, + matrix: .strategy.matrix + } | select(.template) | + . + { path: .template | (if test($pattern) then sub(".*\($pattern).*";"\(.path)")|split(".") else null end) } | + ( + .template as $template| + if .path then + getpath(.path)|map(. as $item|$template|sub($pattern;$item)) + else + [$template] + end + ) | map("\($runs_on)\t\(.)") + + ) | flatten |.[] + ' +} + +# returns the OS name from the runner equivalent to the expression `${{ runner.os }}` in the workflow +# e.g. +# ```console +# $ runner_os_from_runner "macos-12" +# macOS +# $ runner_os_from_runner "ubuntu-24.04" +# Linux +# ``` +function runner_os_from_runner() { + # shellcheck disable=SC2249 + case "$1" in + macos*) + echo macOS + ;; + ubuntu*) + echo Linux + ;; + esac +} + +# check the remote location and return the http code and size. +# The result is cached in .calculate-cache-response-cache.yaml +# e.g. +# ```console +# $ check_location "https://cloud-images.ubuntu.com/releases/24.04/release-20240725/ubuntu-24.04-server-cloudimg-amd64.img" +# 200 585498624 +# ``` +function check_location() { + location="$1" + readonly cache_file="./.calculate-cache-response-cache.yaml" + # check response_cache.yaml for the cache + if [[ -f ${cache_file} ]]; then + cached=$(yq -e eval ".[\"${location}\"]" "${cache_file}" 2>/dev/null) && echo "${cached}" && return + else + touch "${cache_file}" + fi + http_code_and_size=$(curl -sIL -w "%{http_code} %header{Content-Length}" "${location}" -o /dev/null) + yq eval ".[\"${location}\"] = \"${http_code_and_size}\"" -i "${cache_file}" + echo "${http_code_and_size}" +} + +# print image location, digest, size, hash, containerd, containerd_location, containerd_digest, containerd_size from the template +# e.g. +# ```console +# $ print_location_digest_size_hash_from_template "templates/default.yaml" +# https://cloud-images.ubuntu.com/releases/24.04/release-20240725/ubuntu-24.04-server-cloudimg-amd64.img sha256:d2377667ea95222330ca2287817403c85178dad397e9fed768a9b4aec79d2a7f 585498624 49aa50a4872ded07ebf657c0eaf9e44ecc0c174d033a97c537ecd270f35b462f true https://github.com/containerd/nerdctl/releases/download/v1.7.6/nerdctl-full-1.7.6-linux-amd64.tar.gz sha256:2c841e097fcfb5a1760bd354b3778cb695b44cd01f9f271c17507dc4a0b25606 237465717 +# ``` +function print_location_digest_size_hash_from_template() { + readonly template=$1 + case "${template}" in + http*) + template_yaml=$(curl -sSL "${template}") + ;; + *) + template_yaml=$(<"${template}") + ;; + esac + readonly yq_filter=" + [ + .images | map(select(.arch == \"${arch}\")) | [.[0,1].location, .[0,1].digest], + .containerd|[.system or .user], + .containerd.archives | map(select(.arch == \"${arch}\")) | [.[0].location, .[0].digest] + ]|flatten|.[] + " + if command -v limactl &>/dev/null; then + parsed=$(limactl validate <(echo "${template_yaml}") --fill 2>/dev/null | yq eval "${yq_filter}") + else + parsed=$(yq eval "${yq_filter}" <<<"${template_yaml}") + fi + # macOS earlier than 15.0 uses bash 3.2.57, which does not support readarray -t + # readarray -t arr <<<"${parsed}" + while IFS= read -r line; do arr+=("${line}"); done <<<"${parsed}" + readonly locations=("${arr[@]:0:2}") digests=("${arr[@]:2:2}") + readonly containerd="${arr[4]}" containerd_location="${arr[5]}" containerd_digest="${arr[6]}" + declare location digest size hash + for ((i = 0; i < ${#locations[@]}; i++)); do + [[ ${locations[i]} != null ]] || continue + http_code_and_size=$(check_location "${locations[i]}") + read -r http_code size <<<"${http_code_and_size}" + if [[ ${http_code} -eq 200 ]]; then + location=${locations[i]} + digest=${digests[i]} + break + fi + done + if [[ -z ${location} ]]; then + echo "Failed to get the image location for ${template}" >&2 + return 1 + fi + hash=$(sha256sum <<<"${template_yaml}" | cut -d' ' -f1 | xxd -r -p | sha256sum | cut -d' ' -f1) + declare containerd_size + containerd_http_code_and_size=$(check_location "${containerd_location}") + read -r _containerd_http_code containerd_size <<<"${containerd_http_code_and_size}" + echo "${location} ${digest} ${size} ${hash} ${containerd} ${containerd_location} ${containerd_digest} ${containerd_size}" +} + +# format first column to MiB +# e.g. +# ```console +# $ echo 585498624 | size_to_mib +# 558.38 MiB +# ``` +function size_to_mib() { + awk ' + function mib(size) { return sprintf("%7.2f MiB", size / 1024 / 1024) } + int($1)>0{ $1=" "mib($1) } + int($2)>0{ $2=mib($2) } + int($2)==0 && NF>1{ $2="<>" } + { print } + ' +} + +# actual_cache_sizes=$(gh cache list --json key,createdAt,sizeInBytes|jq '[.[]|{"key":.key,"value":.sizeInBytes}]|from_entries') +# e.g. +# ```console +# $ echo "${actual_cache_sizes}" +# { +# "Linux-1c3b2791d52735d916dc44767c745c2319eb7cae74af71bbf45ddb268f42fc1d": 810758533, +# "Linux-231c66957fc2cdb18ea10e63f60770049026e29051ecd6598fc390b60d6a4fa6": 633036717, +# "Linux-3b906d46fa532e3bc348c35fc8e7ede6c69f0b27032046ee2cbb56d4022d1146": 574242367, +# "Linux-69a547b760dbf1650007ed541408474237bc611704077214adcac292de556444": 70310855, +# "Linux-7782f8b4ff8cd378377eb79f8d61c9559b94bbd0c11d19eb380ee7bda19af04e": 494141177, +# "Linux-8812aedfe81b4456d421645928b493b1f2f88aff04b7f3171207492fd44cd189": 812730766, +# "Linux-caa7d8af214d55ad8902e82d5918e61573f3d6795d2b5ad9a35305e26fa0e6a9": 754723892, +# "Linux-colima-v0.6.5": 226350335, +# "Linux-de83bce0608d787e3c68c7a31c5fab2b6d054320fd7bf633a031845e2ee03414": 810691197, +# "Linux-eb88a19dfcf2fb98278e7c7e941c143737c6d7cd8950a88f58e04b4ee7cef1bc": 570625794, +# "Linux-f88f0b3b678ff6432386a42bdd27661133c84a36ad29f393da407c871b0143eb": 68490954, +# "golangci-lint.cache-Linux-2850-74615231540133417fd618c72e37be92c5d3b3ad": 2434144, +# "macOS-231c66957fc2cdb18ea10e63f60770049026e29051ecd6598fc390b60d6a4fa6": 633020464, +# "macOS-49aa50a4872ded07ebf657c0eaf9e44ecc0c174d033a97c537ecd270f35b462f": 813179462, +# "macOS-8f37f663956af5f743f0f99ab973729b6a02f200ebfac7a3a036eff296550732": 810756770, +# "macOS-ef5509b5d4495c8c3590442ee912ad1c9a33f872dc4a29421c524fc1e2103b59": 813179476, +# "macOS-upgrade-v0.15.1": 1157814690, +# "setup-go-Linux-ubuntu20-go-1.23.0-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 1015518352, +# "setup-go-Linux-ubuntu20-go-1.23.0-6bce2eefc6111ace836de8bb322432c072805737d5f3c5a3d47d2207a05f50df": 936433302, +# "setup-go-Linux-ubuntu24-go-1.22.6-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 1090001859, +# "setup-go-Linux-ubuntu24-go-1.23.0-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 526146768, +# "setup-go-Windows-go-1.23.0-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 1155374040, +# "setup-go-Windows-go-1.23.0-6bce2eefc6111ace836de8bb322432c072805737d5f3c5a3d47d2207a05f50df": 1056433137, +# "setup-go-macOS-go-1.23.0-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 1060919942, +# "setup-go-macOS-go-1.23.0-6bce2eefc6111ace836de8bb322432c072805737d5f3c5a3d47d2207a05f50df": 982139209 +# } +actual_cache_sizes=$( + gh cache list --json key,createdAt,sizeInBytes \ + --jq 'sort_by(.createdAt)|reverse|unique_by(.key)|sort_by(.key)|map({"key":.key,"value":.sizeInBytes})|from_entries' +) + +workflows=( + .github/workflows/test.yml +) + +# shellcheck disable=SC2016 +echo "=> compare expected content size, actual cached size, and cache-keys used before and after the change in https://github.com/lima-vm/lima/pull/2508" +# iterate over before and after +for cache_method in before after; do + echo "==> ${cache_method}" + echo "content-size actual-size cache-key" + output_yaml=$( + for workflow in "${workflows[@]}"; do + print_runs_on_template_from_workflow "${workflow}" + done | while IFS=$'\t' read -r runner template; do + runner_os=$(runner_os_from_runner "${runner}") + location_digest_size_hash=$(print_location_digest_size_hash_from_template "${template}") || continue + read -r location digest size hash containerd containerd_location containerd_digest containerd_size <<<"${location_digest_size_hash}" + if [[ ${cache_method} != after ]]; then + key=${runner_os}-${hash} + elif [[ ${digest} == null ]]; then + key=image:$(basename "${location}")-url-sha256:$(echo -n "${location}" | sha256sum | cut -d' ' -f1) + else + key=image:$(basename "${location}")-${digest} + fi + if [[ ${containerd} == true ]]; then + if [[ ${cache_method} != after ]]; then + # previous caching method packages the containerd archive with the image + size=$((size + containerd_size)) + else + # new caching method packages the containerd archive separately + containerd_key=containerd:$(basename "${containerd_location}")-${containerd_digest} + printf -- "- key: %s\n template: %s\n location: %s\n digest: %s\n size: %s\n" \ + "${containerd_key}" "${template}" "${containerd_location}" "${containerd_digest}" "${containerd_size}" + fi + fi + printf -- "- key: %s\n template: %s\n location: %s\n digest: %s\n size: %s\n" \ + "${key}" "${template}" "${location}" "${digest}" "${size}" + done + ) + output_json=$(yq -o=j . <<<"${output_yaml}") + + # print size key + jq --argjson actual_size "${actual_cache_sizes}" -r 'unique_by(.key)|sort_by(.key)|.[]|[.size, $actual_size[.key] // 0, .key]|@tsv' <<<"${output_json}" | size_to_mib + # total + echo "------------" + jq '[unique_by(.key)|.[]|.size]|add' <<<"${output_json}" | size_to_mib + # save the collected information as yaml if DEBUG is set + if [[ -n ${DEBUG:+1} ]]; then + cat <<<"${output_yaml}" >".calculate-cache-collected-info-${cache_method}.yaml" + echo "Saved the collected information in .calculate-cache-collected-info-${cache_method}.yaml" + fi + echo "" +done