Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added a script to drain and kill a marathon app as gracefully as possible #143

Merged
merged 6 commits into from
Jan 21, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions paasta_tools/bounce_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,4 +370,21 @@ def downthenup_bounce(
"tasks_to_drain": set.union(set(), *old_app_live_tasks.values()),
}


@register_bounce_method('down')
def down_bounce(
new_config,
new_app_running,
happy_new_tasks,
old_app_live_tasks,
):
"""
Stops old apps, doesn't start any new apps.
Used for the graceful_app_drain script.
"""
return {
"create_app": False,
"tasks_to_drain": set(*old_app_live_tasks.values()),
}

# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
99 changes: 99 additions & 0 deletions paasta_tools/graceful_app_drain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/env python

import argparse
import sys
import time

import service_configuration_lib
from paasta_tools import marathon_tools
from paasta_tools import drain_lib
from paasta_tools import bounce_lib
from paasta_tools.utils import decompose_job_id
from paasta_tools.setup_marathon_job import do_bounce, get_old_live_draining_tasks
from paasta_tools.utils import load_system_paasta_config


def parse_args():
parser = argparse.ArgumentParser(
description="""This script attempts to gracefully drain and kill a marathon app.
It is intended for use in emergencies when the regular bounce script can't proceed,
and needs to kill a specific app to get going.""",
)
parser.add_argument(
'appname',
help="the app that will be drained",
)
parser.add_argument(
'-d', '--soa-dir',
dest="soa_dir",
metavar="SOA_DIR",
default=service_configuration_lib.DEFAULT_SOA_DIR,
help="define a different soa config directory",
)
return parser.parse_args()


def main():
args = parse_args()
full_appid = args.appname.lstrip('/')
soa_dir = args.soa_dir
marathon_config = marathon_tools.load_marathon_config()
client = marathon_tools.get_marathon_client(
url=marathon_config.get_url(),
user=marathon_config.get_username(),
passwd=marathon_config.get_password(),
)

if not marathon_tools.is_app_id_running(app_id=full_appid, client=client):
print("Couldn't find an app named {0}".format(full_appid))
sys.exit(1)

service, instance, _, __ = (s.replace('--', '_') for s in decompose_job_id(full_appid))
complete_config = marathon_tools.create_complete_config(service, instance, marathon_config)
cluster = load_system_paasta_config().get_cluster()
service_instance_config = marathon_tools.load_marathon_service_config(
service=service,
instance=instance,
cluster=cluster,
soa_dir=soa_dir,
)
nerve_ns = service_instance_config.get_nerve_namespace()
service_namespace_config = marathon_tools.load_service_namespace_config(service=service, namespace=nerve_ns)
drain_method = drain_lib.get_drain_method(
service_instance_config.get_drain_method(service_namespace_config),
service=service,
instance=instance,
nerve_ns=nerve_ns,
drain_method_params=service_instance_config.get_drain_method_params(service_namespace_config),
)

bounce_func = bounce_lib.get_bounce_method_func('down')

while marathon_tools.is_app_id_running(app_id=full_appid, client=client):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like it would iterate really fast, can you make it a bit more relaxed and iterate every 10s like the normal bounce? (but print that it is sleeping to let the user know why it "isn't doing anything")

app_to_kill = client.get_app(full_appid)
old_app_live_tasks, old_app_draining_tasks = get_old_live_draining_tasks([app_to_kill], drain_method)
do_bounce(
bounce_func=bounce_func,
drain_method=drain_method,
config=complete_config,
new_app_running='',
happy_new_tasks=[],
old_app_live_tasks=old_app_live_tasks,
old_app_draining_tasks=old_app_draining_tasks,
serviceinstance="{0}.{1}".format(service, instance),
bounce_method='down',
service=service,
cluster=cluster,
instance=instance,
marathon_jobid=full_appid,
client=client,
soa_dir=soa_dir,
)

print "Sleeping for 10 seconds to give the tasks time to drain"
time.sleep(10)

print("Sucessfully killed {0}".format(full_appid))

if __name__ == '__main__':
main()