Skip to content

Commit

Permalink
fixes per review notes, move poorly-placed class var declarations
Browse files Browse the repository at this point in the history
  • Loading branch information
ankona committed Aug 19, 2024
1 parent da4ecbd commit d423181
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 16 deletions.
21 changes: 13 additions & 8 deletions smartsim/_core/launcher/dragon/dragonBackend.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,17 @@ def __init__(self, pid: int) -> None:
self._infra_ddict: t.Optional[dragon_ddict.DDict] = None
self._prioritizer = NodePrioritizer(self._nodes, self._queue_lock)

Check warning on line 195 in smartsim/_core/launcher/dragon/dragonBackend.py

View check run for this annotation

Codecov / codecov/patch

smartsim/_core/launcher/dragon/dragonBackend.py#L195

Added line #L195 was not covered by tests

self._nodes: t.List["dragon_machine.Node"] = []
"""Node capability information for hosts in the allocation"""
self._hosts: t.List[str] = []
"""List of hosts available in allocation"""
self._cpus: t.List[int] = []
"""List of cpu-count by node"""
self._gpus: t.List[int] = []
"""List of gpu-count by node"""
self._allocated_hosts: t.Dict[str, t.Set[str]] = {}
"""Mapping with hostnames as keys and a set of running step IDs as the value"""

Check warning on line 206 in smartsim/_core/launcher/dragon/dragonBackend.py

View check run for this annotation

Codecov / codecov/patch

smartsim/_core/launcher/dragon/dragonBackend.py#L197-L206

Added lines #L197 - L206 were not covered by tests

@property
def hosts(self) -> list[str]:
with self._queue_lock:
Expand Down Expand Up @@ -229,16 +240,10 @@ def _initialize_hosts(self) -> None:
self._nodes = [
dragon_machine.Node(node) for node in dragon_machine.System().nodes
]
self._hosts: t.List[str] = sorted(node.hostname for node in self._nodes)
"""List of hosts available in allocation"""
self._hosts = sorted(node.hostname for node in self._nodes)

Check warning on line 243 in smartsim/_core/launcher/dragon/dragonBackend.py

View check run for this annotation

Codecov / codecov/patch

smartsim/_core/launcher/dragon/dragonBackend.py#L243

Added line #L243 was not covered by tests
self._cpus = [node.num_cpus for node in self._nodes]
"""List of cpu-count by node"""
self._gpus = [node.num_gpus for node in self._nodes]
"""List of gpu-count by node"""
self._allocated_hosts: t.Dict[str, t.Set[str]] = collections.defaultdict(
set
)
"""Mapping with hostnames as keys and a set of executing step IDs as the value"""
self._allocated_hosts = collections.defaultdict(set)

Check warning on line 246 in smartsim/_core/launcher/dragon/dragonBackend.py

View check run for this annotation

Codecov / codecov/patch

smartsim/_core/launcher/dragon/dragonBackend.py#L246

Added line #L246 was not covered by tests

def __str__(self) -> str:
return self.status_message
Expand Down
19 changes: 11 additions & 8 deletions smartsim/_core/launcher/dragon/pqueue.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ def add(
reference counted.
:param tracking_id: a unique task identifier executing on the node
to add"""
to add
:raises ValueError: if tracking_id is already assigned to this node"""
if tracking_id in self.assigned_tasks:
raise ValueError("Attempted adding task more than once")

Check warning on line 130 in smartsim/_core/launcher/dragon/pqueue.py

View check run for this annotation

Codecov / codecov/patch

smartsim/_core/launcher/dragon/pqueue.py#L129-L130

Added lines #L129 - L130 were not covered by tests

Expand All @@ -140,7 +141,8 @@ def remove(
"""Update the reference counter to indicate the removal of a process.
:param tracking_id: a unique task identifier executing on the node
to remove"""
to remove
:raises ValueError: if tracking_id is already assigned to this node"""
if tracking_id and tracking_id not in self.assigned_tasks:
raise ValueError("Attempted removal of untracked item")

Check warning on line 147 in smartsim/_core/launcher/dragon/pqueue.py

View check run for this annotation

Codecov / codecov/patch

smartsim/_core/launcher/dragon/pqueue.py#L146-L147

Added lines #L146 - L147 were not covered by tests

Expand Down Expand Up @@ -335,12 +337,12 @@ def _check_satisfiable_n(

return True

Check warning on line 338 in smartsim/_core/launcher/dragon/pqueue.py

View check run for this annotation

Codecov / codecov/patch

smartsim/_core/launcher/dragon/pqueue.py#L338

Added line #L338 was not covered by tests

def _get_next_available_node(
def _get_next_unassigned_node(
self,
heap: t.List[_TrackedNode],
tracking_id: t.Optional[str] = None,
) -> t.Optional[Node]:
"""Finds the next node w/the least amount of running processes and
"""Finds the next node with no running processes and
ensures that any elements that were directly updated are updated in
the priority structure before being made available
Expand Down Expand Up @@ -402,7 +404,7 @@ def _get_next_n_available_nodes(
return next_nodes

Check warning on line 404 in smartsim/_core/launcher/dragon/pqueue.py

View check run for this annotation

Codecov / codecov/patch

smartsim/_core/launcher/dragon/pqueue.py#L403-L404

Added lines #L403 - L404 were not covered by tests

while len(next_nodes) < num_items:
if next_node := self._get_next_available_node(heap, tracking_id):
if next_node := self._get_next_unassigned_node(heap, tracking_id):
next_nodes.append(next_node)
continue
break

Check warning on line 410 in smartsim/_core/launcher/dragon/pqueue.py

View check run for this annotation

Codecov / codecov/patch

smartsim/_core/launcher/dragon/pqueue.py#L406-L410

Added lines #L406 - L410 were not covered by tests
Expand Down Expand Up @@ -430,7 +432,7 @@ def next(
tracking_id: t.Optional[str] = None,
hosts: t.Optional[t.List[str]] = None,
) -> t.Optional[Node]:
"""Find the next available node using the supplied filter to target
"""Find the next unsassigned node using the supplied filter to target
a specific node capability
:param filter_on: the subset of nodes to query for available nodes
Expand All @@ -456,9 +458,10 @@ def next_n(
:param filter_on: the subset of nodes to query for available nodes
:param tracking_id: unique task identifier to track
:param hosts: a list of hostnames used to filter the available nodes
:returns: Collection of reserved nodes"""
:returns: Collection of reserved nodes
:raises ValueError: if the hosts parameter is an empty list"""
if hosts is not None and not hosts:
raise ValueError("No host names provided")
raise ValueError("No hostnames provided")

Check warning on line 464 in smartsim/_core/launcher/dragon/pqueue.py

View check run for this annotation

Codecov / codecov/patch

smartsim/_core/launcher/dragon/pqueue.py#L463-L464

Added lines #L463 - L464 were not covered by tests

heap = self._create_sub_heap(hosts, filter_on)
return self._get_next_n_available_nodes(num_items, heap, tracking_id)

Check warning on line 467 in smartsim/_core/launcher/dragon/pqueue.py

View check run for this annotation

Codecov / codecov/patch

smartsim/_core/launcher/dragon/pqueue.py#L466-L467

Added lines #L466 - L467 were not covered by tests
1 change: 1 addition & 0 deletions smartsim/settings/dragonRunSettings.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None:
"""Specify the hostlist for this job
:param host_list: hosts to launch on
:raises ValueError: if an empty host list is supplied
"""
if not host_list:
raise ValueError("empty hostlist provided")
Expand Down

0 comments on commit d423181

Please sign in to comment.