Skip to content

Commit

Permalink
host select: testing
Browse files Browse the repository at this point in the history
  • Loading branch information
oliver-sanders committed Jan 28, 2020
1 parent c9aa08a commit 11dc8b3
Show file tree
Hide file tree
Showing 6 changed files with 410 additions and 59 deletions.
1 change: 1 addition & 0 deletions cylc/flow/cfgspec/globalcfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@
'run ports': [VDR.V_INTEGER_LIST, list(range(43001, 43101))],
'condemned hosts': [VDR.V_ABSOLUTE_HOST_LIST],
'auto restart delay': [VDR.V_INTERVAL],
'thresholds': [VDR.V_STRING],
'run host select': {
'rank': [VDR.V_STRING, 'random', 'load:1', 'load:5', 'load:15',
'memory', 'disk-space'],
Expand Down
80 changes: 27 additions & 53 deletions cylc/flow/host_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,22 +44,20 @@ def select_suite_host(cached=True):
# list of suite hosts
global_config.get(['suite servers', 'run hosts']) or ['localhost'],
# thresholds / ranking to apply
threshold_string=global_config.get(
['suite servers', 'run host select', 'thresholds']
),
threshold_string=global_config.get(['suite servers', 'thresholds']),
# list of condemned hosts
filtered_hosts=global_config.get(
blacklist=global_config.get(
['suite servers', 'condemned hosts']
),
filter_string='condemned host'
blacklist_name='condemned host'
)


def select_host(
hosts,
threshold_string=None,
filtered_hosts=None,
filter_string=None
blacklist=None,
blacklist_name=None
):
"""Select a host from the provided list.
Expand All @@ -84,11 +82,11 @@ def select_host(
getloadavg()
Comments are allowed using `#` but not inline comments.
filtered_hosts (list):
blacklist (list):
List of host names to filter out.
Can be short host names (do not have to be fqdn values)
filter_string (str):
Used with `filtered_hosts` in error messages.
blacklist_name (str):
Used with `blacklist` in error messages.
Returns:
tuple - (hostname, fqdn) the chosen host
Expand All @@ -105,8 +103,8 @@ def select_host(
for host in hosts
}
hosts = list(hostname_map)
if filtered_hosts:
filtered_hosts = list(set(map(get_fqdn_by_host, filtered_hosts)))
if blacklist:
blacklist = list(set(map(get_fqdn_by_host, blacklist)))

# dict of conditions and whether they have been met (for error reporting)
data = {
Expand All @@ -115,11 +113,11 @@ def select_host(
}

# filter out `filter_hosts` if provided
if filtered_hosts:
if blacklist:
hosts, data = _filter_by_hostname(
hosts,
filtered_hosts,
filter_string,
blacklist,
blacklist_name,
data=data
)

Expand All @@ -134,11 +132,8 @@ def select_host(

if not thresholds:
# no metrics or ranking required, pick host at random
hosts = [random.choice(list(hosts))]
hosts = [random.choice(list(hosts))] # nosec

if not hosts:
# no hosts provided / left after filtering
raise HostSelectException(data)
if not thresholds and len(hosts) == 1:
return hostname_map[hosts[0]], hosts[0]

Expand Down Expand Up @@ -172,18 +167,18 @@ def select_host(

def _filter_by_hostname(
hosts,
filtered_hosts,
filter_string=None,
blacklist,
blacklist_name=None,
data=None
):
"""Filter out any hosts present in `filtered_hosts`.
"""Filter out any hosts present in `blacklist`.
Args:
hosts (list):
List of host fqdns.
filtered_hosts (list):
blacklist (list):
List of blacklisted host fqdns.
filter_string (str):
blacklist_name (str):
The reason for blacklisting these hosts
(used for exceptions).
data (dict):
Expand All @@ -201,9 +196,9 @@ def _filter_by_hostname(
data = {host: dict() for host in hosts}
for host in list(hosts):
key = 'blacklisted'
if filter_string:
key = f'{key}({filter_string})'
if host in filtered_hosts:
if blacklist_name:
key = f'{key}({blacklist_name})'
if host in blacklist:
hosts.remove(host)
data[host][key] = True
else:
Expand Down Expand Up @@ -402,8 +397,6 @@ def _get_thresholds(string):
# end of positional arguments
in_args = False
break
elif item.string == ',':
pass
elif in_args:
# literal eval each argument
query.append(ast.literal_eval(item.string))
Expand Down Expand Up @@ -464,6 +457,11 @@ def _get_metrics(hosts, metrics, data=None):
data (dict):
Used for logging success/fail outcomes of the form {host: {}}
Examples:
Command failure:
>>> _get_metrics(['localhost'], [['elephant']])
({}, {'localhost': {'get_metrics': 'Command failed (exit: 1)'}})
Returns:
dict - {host: {(function, arg1, arg2, ...): result}}
Expand Down Expand Up @@ -532,27 +530,3 @@ def _reformat_expr(key, expression):
'RESULT',
f'{key[0]}({", ".join(map(repr, key[1:]))})'
)


# def main():
# return select_host(
# [
# 'localhost'
# # 'niwa-1019144l.niwa.local'
# ],
# '''
# # rank items, lower is better
# cpu_times().user

# # threshold items, accept hosts which eval to `True`
# virtual_memory().available > 123456789
# getloadavg()[0] < 5
# cpu_count() > 1
# disk_usage('/').free > 123
# '''
# )


# print(
# main()
# )
5 changes: 1 addition & 4 deletions cylc/flow/scripts/cylc_psutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
"""
from itertools import dropwhile
import json
import pickle
import sys

import psutil
Expand Down Expand Up @@ -56,9 +55,7 @@ def main(parser, options):

# serialise
for ind, item in enumerate(ret):
if hasattr(item, '_todict'):
ret[ind] = item._todict()
elif hasattr(item, '_asdict'):
if hasattr(item, '_asdict'):
ret[ind] = item._asdict()

print(json.dumps(ret))
186 changes: 186 additions & 0 deletions cylc/flow/tests/host_select.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
"""Test the cylc.flow.host_select module.
NOTE: these are functional tests, for unit tests see the docstrings in
the host_select module.
"""
import socket

import pytest

from cylc.flow.exceptions import HostSelectException
from cylc.flow.host_select import (
select_host,
select_suite_host
)
from cylc.flow.hostuserutil import get_fqdn_by_host
from cylc.flow.parsec.exceptions import ListValueError
from cylc.flow.tests.util import mock_glbl_cfg


localhost, localhost_aliases, _ = socket.gethostbyname_ex('localhost')
localhost_fqdn = get_fqdn_by_host(localhost)


def test_hostname_checking():
"""Check that unknown hosts raise an error"""
with pytest.raises(socket.gaierror):
select_host(['beefwellington'])


def test_localhost():
"""Basic test with one host to choose from."""
assert select_host([localhost]) == (
localhost,
localhost_fqdn
)


def test_unique():
"""Basic test choosing from multiple forms of localhost"""
assert select_host(
localhost_aliases + [localhost]
) == (
localhost,
localhost_fqdn
)


def test_filter():
"""Test that hosts are filtered out if specified."""
message = 'Localhost not allowed'
with pytest.raises(HostSelectException) as excinfo:
select_host(
[localhost],
blacklist=[localhost],
blacklist_name='Localhost not allowed'
)
assert message in str(excinfo.value)


def test_thresholds():
"""Positive test that thresholds are evaluated.
(doesn't prove anything by itself hence test_unreasonable_thresholds)
"""
assert select_host(
[localhost],
threshold_string='''
# if this test fails due to race conditions
# then you have bigger issues than a test failure
virtual_memory().available > 1
getloadavg()[0] < 500
cpu_count() > 1
disk_usage('/').free > 1
'''
) == (localhost, localhost_fqdn)


def test_unreasonable_thresholds():
"""Negative test that thresholds are evaluated.
(doesn't prove anything by itself hence test_thresholds)
"""
with pytest.raises(HostSelectException) as excinfo:
select_host(
[localhost],
threshold_string='''
# if this test fails due to race conditions
# then you are very lucky
virtual_memory().available > 123456789123456789
getloadavg()[0] < 1
cpu_count() > 512
disk_usage('/').free > 123456789123456789
'''
)
assert (
'virtual_memory().available > 123456789123456789: False'
) in str(excinfo.value)


def test_metric_command_failure():
"""If the psutil command (or SSH) fails ensure the host is excluded."""
with pytest.raises(HostSelectException) as excinfo:
select_host(
[localhost],
threshold_string='''
# elephant is not a psutil attribute
# so will cause the command to fail
elephant
'''
)
assert excinfo.value.data[localhost_fqdn]['get_metrics'] == (
'Command failed (exit: 1)'
)


def test_suite_host_select(mock_glbl_cfg):
"""Run the suite_host_select mechanism."""
mock_glbl_cfg(
'cylc.flow.host_select.glbl_cfg',
f'''
[suite servers]
run hosts = {localhost}
'''
)
assert select_suite_host() == (localhost, localhost_fqdn)


def test_suite_host_select_invalid_host(mock_glbl_cfg):
"""Ensure hosts are parsed before evaluation."""
mock_glbl_cfg(
'cylc.flow.host_select.glbl_cfg',
'''
[suite servers]
run hosts = elephant
'''
)
with pytest.raises(socket.gaierror):
select_suite_host()


def test_suite_host_select_default(mock_glbl_cfg):
"""Ensure "localhost" is provided as a default host."""
mock_glbl_cfg(
'cylc.flow.host_select.glbl_cfg',
'''
[suite servers]
run hosts =
'''
)
hostname, host_fqdn = select_suite_host()
assert hostname in localhost_aliases + [localhost]
assert host_fqdn == localhost_fqdn


def test_suite_host_select_condemned(mock_glbl_cfg):
"""Ensure condemned hosts are filtered out."""
mock_glbl_cfg(
'cylc.flow.host_select.glbl_cfg',
f'''
[suite servers]
run hosts = {localhost}
condemned hosts = {localhost_fqdn}
'''
)
with pytest.raises(HostSelectException) as excinfo:
select_suite_host()
assert 'blacklisted' in str(excinfo.value)
assert 'condemned host' in str(excinfo.value)


def test_condemned_host_ambiguous(mock_glbl_cfg):
"""Test the [suite servers]condemend host coercer
Not actually host_select code but related functionality.
"""
with pytest.raises(ListValueError) as excinfo:
mock_glbl_cfg(
'cylc.flow.host_select.glbl_cfg',
f'''
[suite servers]
run hosts = {localhost}
condemned hosts = {localhost}
'''
)
assert 'ambiguous host' in excinfo.value.msg
Loading

0 comments on commit 11dc8b3

Please sign in to comment.