Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added experimental manual mesos task reconciliation script (orphan killer) #238

Merged
merged 2 commits into from
Feb 5, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions paasta_tools/contrib/kill_orphaned_docker_containers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env python
import argparse

from docker import Client

from paasta_tools import mesos_tools
from paasta_tools.utils import get_docker_host


def parse_args():
parser = argparse.ArgumentParser(
description=(
'Cross references running containers with task ids from the mesos slave',
' and optionally kills them.'
)
)
parser.add_argument('-f', '--force', help="Actually kill the containers. (defaults to dry-run)")
args = parser.parse_args()
return args


def get_running_task_ids_from_mesos_slave():
state = mesos_tools.get_local_slave_state()
frameworks = state.get('frameworks')
executors = [ex for fw in frameworks for ex in fw.get('executors', [])
if u'TASK_RUNNING' in [t[u'state'] for t in ex.get('tasks', [])]]
return [e["id"] for e in executors]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return a set here to make this run in linear time instead of quadratic

Line 50 checks if a container id is in the running task ids for every container, and checking membership in a list is O(n) while checking membership in a set is O(1)



def get_running_mesos_docker_containers(client):
running_containers = client.containers()
return [container for container in running_containers if "mesos-" in container["Names"][0]]


def get_docker_client():
base_docker_url = get_docker_host()
return Client(base_url=base_docker_url)


def main():
args = parse_args()
docker_client = get_docker_client()
running_mesos_task_ids = get_running_task_ids_from_mesos_slave()
running_mesos_docker_containers = get_running_mesos_docker_containers(docker_client)
print running_mesos_task_ids

for container in running_mesos_docker_containers:
mesos_task_id = mesos_tools.get_mesos_id_from_container(container=container, client=docker_client)
print mesos_task_id
if mesos_task_id not in running_mesos_task_ids:
if args.force:
print "Killing %s. (%s)" % (container["Names"][0], mesos_task_id)
docker_client.kill(container)
else:
print "Would kill %s. (%s)" % (container["Names"][0], mesos_task_id)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nitpick: Could you hint at using the --force option in this message?

else:
print "Not killing %s. (%s)" % (container["Names"][0], mesos_task_id)


if __name__ == "__main__":
main()
31 changes: 31 additions & 0 deletions paasta_tools/mesos_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,3 +376,34 @@ def slave_passes_blacklist(slave, blacklist):
if attributes.get(location_type) == location:
return False
return True


def get_container_id_for_mesos_id(client, mesos_task_id):
running_containers = client.containers()

container_id = None
for container in running_containers:
info = client.inspect_container(container)
if info['Config']['Env']:
for env_var in info['Config']['Env']:
if ('MESOS_TASK_ID=%s' % mesos_task_id) in env_var:
container_id = info['Id']
break

return container_id


def get_mesos_id_from_container(container, client):
mesos_id = None
info = client.inspect_container(container)
if info['Config']['Env']:
for env_var in info['Config']['Env']:
# In marathon it is like this
if 'MESOS_TASK_ID=' in env_var:
mesos_id = re.match("MESOS_TASK_ID=(.*)", env_var).group(1)
break
# Chronos it is like this?
if 'mesos_task_id=' in env_var:
mesos_id = re.match("mesos_task_id=(.*)", env_var).group(1)
break
return mesos_id
16 changes: 1 addition & 15 deletions paasta_tools/paasta_execute_docker_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

from docker import Client

from paasta_tools.mesos_tools import get_container_id_for_mesos_id
from paasta_tools.utils import get_docker_host


Expand All @@ -44,21 +45,6 @@ def parse_args():
return args


def get_container_id_for_mesos_id(client, mesos_task_id):
running_containers = client.containers()

container_id = None
for container in running_containers:
info = client.inspect_container(container)
if info['Config']['Env']:
for env_var in info['Config']['Env']:
if ('MESOS_TASK_ID=%s' % mesos_task_id) in env_var:
container_id = info['Id']
break

return container_id


class TimeoutException(Exception):
pass

Expand Down
39 changes: 39 additions & 0 deletions tests/test_mesos_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import contextlib
import datetime

import docker
import mesos
import mock
import requests
Expand Down Expand Up @@ -383,3 +384,41 @@ def test_get_mesos_state_from_leader_raises_on_non_elected_leader():
mesos.cli.master.CURRENT.state = un_elected_fake_state
with raises(mesos_tools.MasterNotAvailableException):
assert mesos_tools.get_mesos_state_from_leader() == un_elected_fake_state


def test_get_paasta_execute_docker_healthcheck():
mock_docker_client = mock.MagicMock(spec_set=docker.Client)
fake_container_id = 'fake_container_id'
fake_mesos_id = 'fake_mesos_id'
fake_container_info = [
{'Config': {'Env': None}},
{'Config': {'Env': ['fake_key1=fake_value1', 'MESOS_TASK_ID=fake_other_mesos_id']}, 'Id': '11111'},
{'Config': {'Env': ['fake_key2=fake_value2', 'MESOS_TASK_ID=%s' % fake_mesos_id]}, 'Id': fake_container_id},
]
mock_docker_client.containers = mock.MagicMock(
spec_set=docker.Client,
return_value=['fake_container_1', 'fake_container_2', 'fake_container_3'],
)
mock_docker_client.inspect_container = mock.MagicMock(
spec_set=docker.Client,
side_effect=fake_container_info,
)
assert mesos_tools.get_container_id_for_mesos_id(mock_docker_client, fake_mesos_id) == fake_container_id


def test_get_paasta_execute_docker_healthcheck_when_not_found():
mock_docker_client = mock.MagicMock(spec_set=docker.Client)
fake_mesos_id = 'fake_mesos_id'
fake_container_info = [
{'Config': {'Env': ['fake_key1=fake_value1', 'MESOS_TASK_ID=fake_other_mesos_id']}, 'Id': '11111'},
{'Config': {'Env': ['fake_key2=fake_value2', 'MESOS_TASK_ID=fake_other_mesos_id2']}, 'Id': '2222'},
]
mock_docker_client.containers = mock.MagicMock(
spec_set=docker.Client,
return_value=['fake_container_1', 'fake_container_2'],
)
mock_docker_client.inspect_container = mock.MagicMock(
spec_set=docker.Client,
side_effect=fake_container_info,
)
assert mesos_tools.get_container_id_for_mesos_id(mock_docker_client, fake_mesos_id) is None
39 changes: 0 additions & 39 deletions tests/test_paasta_execute_docker_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,49 +18,10 @@
import pytest

from paasta_tools.paasta_execute_docker_command import execute_in_container
from paasta_tools.paasta_execute_docker_command import get_container_id_for_mesos_id
from paasta_tools.paasta_execute_docker_command import main
from paasta_tools.paasta_execute_docker_command import TimeoutException


def test_get_paasta_execute_docker_healthcheck():
mock_docker_client = mock.MagicMock(spec_set=docker.Client)
fake_container_id = 'fake_container_id'
fake_mesos_id = 'fake_mesos_id'
fake_container_info = [
{'Config': {'Env': None}},
{'Config': {'Env': ['fake_key1=fake_value1', 'MESOS_TASK_ID=fake_other_mesos_id']}, 'Id': '11111'},
{'Config': {'Env': ['fake_key2=fake_value2', 'MESOS_TASK_ID=%s' % fake_mesos_id]}, 'Id': fake_container_id},
]
mock_docker_client.containers = mock.MagicMock(
spec_set=docker.Client,
return_value=['fake_container_1', 'fake_container_2', 'fake_container_3'],
)
mock_docker_client.inspect_container = mock.MagicMock(
spec_set=docker.Client,
side_effect=fake_container_info,
)
assert get_container_id_for_mesos_id(mock_docker_client, fake_mesos_id) == fake_container_id


def test_get_paasta_execute_docker_healthcheck_when_not_found():
mock_docker_client = mock.MagicMock(spec_set=docker.Client)
fake_mesos_id = 'fake_mesos_id'
fake_container_info = [
{'Config': {'Env': ['fake_key1=fake_value1', 'MESOS_TASK_ID=fake_other_mesos_id']}, 'Id': '11111'},
{'Config': {'Env': ['fake_key2=fake_value2', 'MESOS_TASK_ID=fake_other_mesos_id2']}, 'Id': '2222'},
]
mock_docker_client.containers = mock.MagicMock(
spec_set=docker.Client,
return_value=['fake_container_1', 'fake_container_2'],
)
mock_docker_client.inspect_container = mock.MagicMock(
spec_set=docker.Client,
side_effect=fake_container_info,
)
assert get_container_id_for_mesos_id(mock_docker_client, fake_mesos_id) is None


def test_execute_in_container():
fake_container_id = 'fake_container_id'
fake_return_code = 0
Expand Down