Skip to content

Commit

Permalink
Remove extra break and ensure hosts property on orchestrator is used …
Browse files Browse the repository at this point in the history
…properly
  • Loading branch information
ashao committed Mar 5, 2024
1 parent b44ef3a commit ecd5663
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 7 deletions.
6 changes: 3 additions & 3 deletions smartsim/_core/control/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,24 +719,24 @@ def _orchestrator_launch_wait(self, orchestrator: Orchestrator) -> None:
ready = False
while not ready:
try:
time.sleep(CONFIG.jm_interval)
# manually trigger job update if JM not running
if not self._jobs.actively_monitoring:
self._jobs.check_jobs()

# _jobs.get_status acquires JM lock for main thread, no need for locking
statuses = self.get_entity_list_status(orchestrator)
if all(stat == STATUS_RUNNING for stat in statuses):
if all(stat == STATUS_RUNNING for stat in statuses) and orchestrator.is_active():
ready = True
# TODO remove in favor of by node status check
time.sleep(CONFIG.jm_interval)
elif any(stat in TERMINAL_STATUSES for stat in statuses):
self.stop_db(orchestrator)
msg = "Orchestrator failed during startup"
msg += f" See {orchestrator.path} for details"
raise SmartSimError(msg)
else:
logger.debug("Waiting for orchestrator instances to spin up...")
time.sleep(CONFIG.jm_interval)

except KeyboardInterrupt:
logger.info("Orchestrator launch cancelled - requesting to stop")
self.stop_db(orchestrator)
Expand Down
8 changes: 4 additions & 4 deletions smartsim/database/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ def get_address(self) -> t.List[str]:
:raises SmartSimError: If database address cannot be found or is not active
"""
if not self._hosts:
if not self.hosts:
raise SmartSimError("Could not find database address")
if not self.is_active():
raise SmartSimError("Database is not active")
Expand All @@ -338,7 +338,7 @@ def get_address(self) -> t.List[str]:
def _get_address(self) -> t.List[str]:
return [
f"{host}:{port}"
for host, port in itertools.product(self._hosts, self.ports)
for host, port in itertools.product(self.hosts, self.ports)
]

def is_active(self) -> bool:
Expand All @@ -347,10 +347,10 @@ def is_active(self) -> bool:
:return: True if database is active, False otherwise
:rtype: bool
"""
if not self._hosts:
if not self.hosts:
return False

return db_is_active(self._hosts, self.ports, self.num_shards)
return db_is_active(self.hosts, self.ports, self.num_shards)

@property
def _rai_module(self) -> t.Tuple[str, ...]:
Expand Down

0 comments on commit ecd5663

Please sign in to comment.