Skip to content

Commit

Permalink
Add new integration to delete unused AMIs
Browse files Browse the repository at this point in the history
part of APPSRE-7272

design-doc in: docs/app-sre/design-docs/aws-ami-cleanup.md

Signed-off-by: Rafa Porres Molina <[email protected]>
  • Loading branch information
rporres committed May 23, 2023
1 parent a6fb791 commit 251d133
Show file tree
Hide file tree
Showing 12 changed files with 898 additions and 3 deletions.
Empty file.
257 changes: 257 additions & 0 deletions reconcile/aws_ami_cleanup/integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
import logging
import re
from collections.abc import (
Callable,
Iterable,
Mapping,
)
from datetime import (
datetime,
timedelta,
)
from typing import (
Any,
Optional,
)

from botocore.exceptions import ClientError
from mypy_boto3_ec2 import EC2Client
from pydantic import (
BaseModel,
Field,
)

from reconcile import queries
from reconcile.gql_definitions.aws_ami_cleanup.asg_namespaces import (
ASGImageGitV1,
ASGImageStaticV1,
NamespaceTerraformProviderResourceAWSV1,
NamespaceTerraformResourceASGV1,
NamespaceV1,
)
from reconcile.gql_definitions.aws_ami_cleanup.asg_namespaces import (
query as query_asg_namespaces,
)
from reconcile.typed_queries.app_interface_vault_settings import (
get_app_interface_vault_settings,
)
from reconcile.utils import gql
from reconcile.utils.aws_api import AWSApi
from reconcile.utils.defer import defer
from reconcile.utils.parse_dhms_duration import dhms_to_seconds
from reconcile.utils.secret_reader import create_secret_reader
from reconcile.utils.terrascript_aws_client import TerrascriptClient as Terrascript

QONTRACT_INTEGRATION = "aws_ami_cleanup"
MANAGED_TAG = {"Key": "managed_by_integration", "Value": QONTRACT_INTEGRATION}


class CannotCompareTagsError(Exception):
pass


class AmiTag(BaseModel):
key: str = Field(alias="Key")
value: str = Field(alias="Value")

class Config:
allow_population_by_field_name = True
frozen = True


class AWSAmi(BaseModel):
creation_date: datetime
image_id: str
tags: set[AmiTag]

class Config:
frozen = True


class AIAmi(BaseModel):
identifier: str
tags: set[AmiTag]

class Config:
frozen = True


def get_aws_amis(
aws_api: AWSApi,
ec2_client: EC2Client,
owner: str,
regex: str,
age_in_seconds: int,
utc_now: datetime,
region: str,
) -> list[AWSAmi]:
"""Get amis that match regex older than given age"""

images = aws_api.paginate(
ec2_client, "describe_images", "Images", {"Owners": [owner]}
)

pattern = re.compile(regex)
results = []
for i in images:
if not re.search(pattern, i["Name"]):
continue

creation_date = datetime.strptime(i["CreationDate"], "%Y-%m-%dT%H:%M:%S.%fZ")

delete_delta = timedelta(seconds=age_in_seconds)
current_delta = utc_now - creation_date

if current_delta < delete_delta:
continue

if not i.get("Tags"):
continue

tags = set()
for tag in i.get("Tags"):
tags.add(AmiTag(**tag))

item = AWSAmi(image_id=i["ImageId"], tags=tags, creation_date=creation_date)
results.append(item)

return results


def get_region(
cleanup: Mapping[str, Any],
account: Mapping[str, Any],
) -> str:
"""Defines the region to search"""
region = cleanup.get("region") or account["resourcesDefaultRegion"]
if region not in account["supportedDeploymentRegions"]:
raise ValueError(f"region {region} is not supported in {account['name']}")

return region


def get_app_interface_amis(
namespaces: Optional[list[NamespaceV1]], ts: Terrascript
) -> list[AIAmi]:
"""Returns all the ami referenced in ASGs in app-interface"""
app_interface_amis = []
for n in namespaces or []:
for er in n.external_resources or []:
if not isinstance(er, NamespaceTerraformProviderResourceAWSV1):
continue

for r in er.resources:
if not isinstance(r, NamespaceTerraformResourceASGV1):
continue

tags = set()
for i in r.image:
if isinstance(i, ASGImageGitV1):
tags.add(
AmiTag(
key=i.tag_name,
value=ts.get_commit_sha(i.dict(by_alias=True)),
)
)
elif isinstance(i, ASGImageStaticV1):
tags.add(AmiTag(key=i.tag_name, value=i.value))

app_interface_amis.append(AIAmi(identifier=r.identifier, tags=tags))

return app_interface_amis


def check_aws_ami_in_use(
aws_ami: AWSAmi, app_interface_amis: list[AIAmi]
) -> Optional[str]:
"""Verifies if the given AWS ami is in use in a defined app-interface ASG"""
for ai_ami in app_interface_amis:
if len(ai_ami.tags) > len(aws_ami.tags):
raise CannotCompareTagsError(
f"{ai_ami.identifier} AI AMI has more tags than {aws_ami.image_id} AWS AMI"
)

if ai_ami.tags.issubset(aws_ami.tags):
return ai_ami.identifier

return None


@defer
def run(dry_run: bool, thread_pool_size: int, defer: Optional[Callable] = None) -> None:
cleanup_accounts = [
a
for a in queries.get_aws_accounts(terraform_state=True, cleanup=True)
if a.get("cleanup")
]

vault_settings = get_app_interface_vault_settings()

ts = Terrascript(
QONTRACT_INTEGRATION,
"",
thread_pool_size,
cleanup_accounts,
settings=vault_settings.dict(by_alias=True),
)

gqlapi = gql.get_api()
namespaces = query_asg_namespaces(query_func=gqlapi.query).namespaces or []
app_interface_amis = get_app_interface_amis(namespaces, ts)

secret_reader = create_secret_reader(use_vault=vault_settings.vault)
aws_api = AWSApi(1, cleanup_accounts, secret_reader=secret_reader, init_users=False)
if defer: # defer is provided by the method decorator. this makes just mypy happy
defer(aws_api.cleanup)

utc_now = datetime.utcnow()
for account in cleanup_accounts:
for cleanup in account["cleanup"]:
if cleanup["provider"] != "ami":
continue

region = get_region(cleanup, account)
regex = cleanup["regex"]
age_in_seconds = dhms_to_seconds(cleanup["age"])

session = aws_api.get_session(account["name"])
ec2_client = aws_api.get_session_client(session, "ec2", region)

amis = get_aws_amis(
aws_api=aws_api,
ec2_client=ec2_client,
owner=account["uid"],
regex=regex,
age_in_seconds=age_in_seconds,
utc_now=utc_now,
region=region,
)

for aws_ami in amis:
try:
if identifier := check_aws_ami_in_use(aws_ami, app_interface_amis):
logging.info(
"Discarding ami %s as it is used in app-interface in %s",
aws_ami.image_id,
identifier,
)
continue
except CannotCompareTagsError as e:
logging.error(e)
continue

logging.info(
"Deleting image %s with creation date %s",
aws_ami.image_id,
aws_ami.creation_date,
)

try:
ec2_client.deregister_image(
ImageId=aws_ami.image_id, DryRun=dry_run
)
except ClientError as e:
if "DryRunOperation" in str(e):
logging.info(e)
continue
raise
9 changes: 9 additions & 0 deletions reconcile/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -966,6 +966,15 @@ def aws_ami_share(ctx):
run_integration(reconcile.aws_ami_share, ctx.obj)


@integration.command(short_help="Cleanup old and unused AMIs.")
@threaded()
@click.pass_context
def aws_ami_cleanup(ctx, thread_pool_size):
import reconcile.aws_ami_cleanup.integration

run_integration(reconcile.aws_ami_cleanup.integration, ctx.obj, thread_pool_size)


@integration.command(
short_help="Generate AWS ECR image pull secrets and store them in Vault."
)
Expand Down
Empty file.
33 changes: 33 additions & 0 deletions reconcile/gql_definitions/aws_ami_cleanup/asg_namespaces.gql
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# qenerate: plugin=pydantic_v1

query ASGNamespaces {
namespaces: namespaces_v1 {
name
externalResources {
provider
provisioner {
name
}
... on NamespaceTerraformProviderResourceAWS_v1 {
resources {
provider
... on NamespaceTerraformResourceASG_v1 {
identifier
image {
provider
... on ASGImageGit_v1 {
tag_name
url
ref
}
... on ASGImageStatic_v1 {
tag_name
value
}
}
}
}
}
}
}
}
Loading

0 comments on commit 251d133

Please sign in to comment.