Skip to content

Commit

Permalink
Add retries to shutdown to handle 409 conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
linkous8 committed Nov 3, 2022
1 parent 11907d2 commit f44af8e
Showing 1 changed file with 29 additions and 8 deletions.
37 changes: 29 additions & 8 deletions servo/connectors/kubernetes.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,14 +620,35 @@ async def shutdown(self, error: Optional[Exception] = None) -> None:
error: An optional error that triggered the destruction.
"""
self.logger.info(f"adjustment failed: shutting down deployment's pods...")
self.workload = await self.workload_helper.read(
self.workload_config.name, self.workload_config.namespace
)
self.workload.spec.replicas = 0
self.workload = await asyncio.wait_for(
self.workload_helper.patch(self.workload),
timeout=self.timeout.total_seconds(),
)

retries = 3
while retries > 0:
# patch the deployment
try:
self.workload = await self.workload_helper.read(
self.workload_config.name, self.workload_config.namespace
)
self.workload.spec.replicas = 0
self.workload = await asyncio.wait_for(
self.workload_helper.patch(self.workload),
timeout=self.timeout.total_seconds(),
)
except kubernetes_asyncio.client.ApiException as ae:
retries -= 1
if retries == 0:
self.logger.error(
"Failed to shutdown SaturationOptimization after 3 retries"
)
raise

if ae.status == 409 and ae.reason == "Conflict":
# If we have a conflict, just load the existing object and try again
pass
else:
raise
else:
# No need to retry if no exception raised
break

def to_components(self) -> List[servo.Component]:
settings = [self.cpu, self.memory, self.replicas]
Expand Down

0 comments on commit f44af8e

Please sign in to comment.