Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[devbin] Generate cleanup policies for GCP artifact registry #13489

Merged
merged 12 commits into from
Aug 29, 2023
27 changes: 27 additions & 0 deletions build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3829,3 +3829,30 @@ steps:
- default_ns
scopes:
- deploy
- kind: runImage
name: test_gcp_ar_cleanup_policies
resources:
memory: standard
cpu: '0.25'
image:
valueFrom: ci_utils_image.image
script: |
set -ex
cd /io/repo/
python3 devbin/generate_gcp_ar_cleanup_policy.py | diff - infra/gcp-broad/gcp-ar-cleanup-policy.txt
inputs:
- from: /repo/build.yaml
to: /io/repo/build.yaml
- from: /repo/devbin
to: /io/repo/devbin
- from: /repo/infra
to: /io/repo/infra
- from: /repo/docker
to: /io/repo/docker
- from: /repo/ci/test/resources
to: /io/repo/ci/test/resources
clouds:
- gcp
dependsOn:
- ci_utils_image
- default_ns
149 changes: 149 additions & 0 deletions devbin/generate_gcp_ar_cleanup_policy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import abc
import json
import yaml

from typing import List, Optional


class CleanupPolicy(abc.ABC):
@abc.abstractmethod
def to_dict(self):
pass


class DeletePolicy(CleanupPolicy):
def __init__(self,
name: str,
tag_state: str,
*,
tag_prefixes: Optional[List[str]] = None,
version_name_prefixes: Optional[List[str]] = None,
package_name_prefixes: Optional[List[str]] = None,
older_than: Optional[str] = None,
newer_than: Optional[str] = None):
self.name = name
self.tag_state = tag_state
self.tag_prefixes = tag_prefixes
self.version_name_prefixes = version_name_prefixes
self.package_name_prefixes = package_name_prefixes
self.older_than = older_than
self.newer_than = newer_than

def to_dict(self):
data = {'name': self.name, 'action': {'type': 'Delete'}, 'condition': {'tagState': self.tag_state}}
condition = data['condition']
if self.tag_prefixes is not None:
condition['tagPrefixes'] = self.tag_prefixes
if self.version_name_prefixes is not None:
condition['versionNamePrefixes'] = self.version_name_prefixes
if self.package_name_prefixes is not None:
condition['packageNamePrefixes'] = self.package_name_prefixes
if self.older_than:
condition['olderThan'] = self.older_than
if self.newer_than:
condition['newerThan'] = self.newer_than
return data


class ConditionalKeepPolicy(CleanupPolicy):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just a KeepPolicy, right? Or, otherwise, the above is a ConditionalDeletePolicy?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just used their nomenclature in the docs

def __init__(self,
name: str,
tag_state: str,
*,
tag_prefixes: Optional[List[str]] = None,
version_name_prefixes: Optional[List[str]] = None,
package_name_prefixes: Optional[List[str]] = None,
older_than: Optional[str] = None,
newer_than: Optional[str] = None):
self.name = name
self.tag_state = tag_state
self.tag_prefixes = tag_prefixes
self.version_name_prefixes = version_name_prefixes
self.package_name_prefixes = package_name_prefixes
self.older_than = older_than
self.newer_than = newer_than

def to_dict(self):
data = {'name': self.name, 'action': {'type': 'Keep'}, 'condition': {'tagState': self.tag_state}}
condition = data['condition']
if self.tag_prefixes is not None:
condition['tagPrefixes'] = self.tag_prefixes
if self.version_name_prefixes is not None:
condition['versionNamePrefixes'] = self.version_name_prefixes
if self.package_name_prefixes is not None:
condition['packageNamePrefixes'] = self.package_name_prefixes
if self.older_than:
condition['olderThan'] = self.older_than
if self.newer_than:
condition['newerThan'] = self.newer_than
return data


class MostRecentVersionKeepPolicy(CleanupPolicy):
def __init__(self,
name: str,
package_name_prefixes: List[str],
keep_count: int):
self.name = name
self.package_name_prefixes = package_name_prefixes
self.keep_count = keep_count

def to_dict(self):
data = {
'name': self.name,
'action': {'type': 'Keep'},
'mostRecentVersions': {
'packageNamePrefixes': self.package_name_prefixes,
'keepCount': self.keep_count
}
}
return data


third_party_images_fp = 'docker/third-party/images.txt'
third_party_packages = []
with open(third_party_images_fp, 'r') as f:
for image in f:
image = image.strip()
package, _ = image.split(':')
if package not in third_party_packages:
third_party_packages.append(package)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we keep only the tags listed there?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess that means we need #-of-package policies, which is currently 11.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I grabbed all of the tags if they exist, but I'm concerned because these are tag prefix matches so it's not 100% specific to the package.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I think if we can't do something exact we should do nothing at all. Better to not have a false sense of security.


deploy_packages = []


def scrape_build_yaml(file_path: str):
found_packages = []
with open(file_path, 'r') as f:
config_str = f.read().strip()
build_config = yaml.safe_load(config_str)
for step in build_config['steps']:
if step['kind'] == 'buildImage2':
image = step['publishAs']
if image not in found_packages:
found_packages.append(image)
return found_packages


deploy_packages.extend(scrape_build_yaml('build.yaml'))
deploy_packages.extend(scrape_build_yaml('ci/test/resources/build.yaml'))

deploy_packages = list(set(deploy_packages))

third_party_packages.sort()
deploy_packages.sort()

policies = [
DeletePolicy('delete_untagged', 'untagged'),
DeletePolicy('delete_dev', 'tagged', tag_prefixes=['dev-'], older_than='3d'),
DeletePolicy('delete_test_pr', 'tagged', tag_prefixes=['test-pr-'], older_than='3d'),
DeletePolicy('delete_test_deploy', 'tagged', tag_prefixes=['test-deploy-'], older_than='3d'),
DeletePolicy('delete_pr_cache', 'tagged', tag_prefixes=['cache-pr-'], older_than='7d'),
DeletePolicy('delete_cache', 'tagged', tag_prefixes=['cache-'], older_than='30d'),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are the names required to be unique?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I sort above.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm referring to the names of the policies themselves; delete_cache appears twice.

ConditionalKeepPolicy('keep_third_party', 'any', package_name_prefixes=third_party_packages),
MostRecentVersionKeepPolicy('keep_most_recent_deploy', package_name_prefixes=deploy_packages, keep_count=10),
]

policies = [p.to_dict() for p in policies]

print(json.dumps(policies, indent=4))
1 change: 0 additions & 1 deletion docker/third-party/images.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ envoyproxy/envoy:v1.22.3
ghost:3.0-alpine
google/cloud-sdk:305.0.0-slim
grafana/grafana:9.1.4
jupyter/scipy-notebook
jupyter/scipy-notebook:c094bb7219f9
moby/buildkit:v0.8.3-rootless
python:3.9
Expand Down
139 changes: 139 additions & 0 deletions infra/gcp-broad/gcp-ar-cleanup-policy.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
[
{
"name": "delete_untagged",
"action": {
"type": "Delete"
},
"condition": {
"tagState": "untagged"
}
},
{
"name": "delete_dev",
"action": {
"type": "Delete"
},
"condition": {
"tagState": "tagged",
"tagPrefixes": [
"dev-"
],
"olderThan": "3d"
}
},
{
"name": "delete_test_pr",
"action": {
"type": "Delete"
},
"condition": {
"tagState": "tagged",
"tagPrefixes": [
"test-pr-"
],
"olderThan": "3d"
}
},
{
"name": "delete_test_deploy",
"action": {
"type": "Delete"
},
"condition": {
"tagState": "tagged",
"tagPrefixes": [
"test-deploy-"
],
"olderThan": "3d"
}
},
{
"name": "delete_pr_cache",
"action": {
"type": "Delete"
},
"condition": {
"tagState": "tagged",
"tagPrefixes": [
"cache-pr-"
],
"olderThan": "7d"
}
},
{
"name": "delete_cache",
"action": {
"type": "Delete"
},
"condition": {
"tagState": "tagged",
"tagPrefixes": [
"cache-"
],
"olderThan": "30d"
}
},
{
"name": "keep_third_party",
"action": {
"type": "Keep"
},
"condition": {
"tagState": "any",
"packageNamePrefixes": [
"alpine",
"debian",
"envoyproxy/envoy",
"ghost",
"google/cloud-sdk",
"grafana/grafana",
"jupyter/scipy-notebook",
"moby/buildkit",
"python",
"redis",
"ubuntu"
]
}
},
{
"name": "keep_most_recent_deploy",
"action": {
"type": "Keep"
},
"mostRecentVersions": {
"packageNamePrefixes": [
"admin-pod",
"auth",
"base",
"batch",
"batch-worker",
"blog_nginx",
"ci",
"ci-hello",
"ci-utils",
"create_certs_image",
"curl",
"git-make-bash",
"hail-buildkit",
"hail-dev",
"hail-run",
"hail-ubuntu",
"hailgenetics/hail",
"hailgenetics/hailtop",
"hailgenetics/vep-grch37-85",
"linting",
"monitoring",
"netcat",
"notebook",
"notebook_nginx",
"query-build",
"test-ci-utils",
"test_hello_create_certs_image",
"volume",
"website",
"workdir"
],
"keepCount": 10
}
}
]