Skip to content

Commit 4667960

Browse files
authored
Reschedule rate limited telegram task instead of retry (#5178)
1 parent 287bfcc commit 4667960

File tree

3 files changed

+53
-9
lines changed

3 files changed

+53
-9
lines changed

engine/apps/alerts/tasks/notify_user.py

+28-7
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@
2626
from apps.user_management.models import User
2727

2828

29+
RETRY_TIMEOUT_HOURS = 1
30+
31+
2932
def schedule_send_bundled_notification_task(
3033
user_notification_bundle: "UserNotificationBundle", alert_group: "AlertGroup"
3134
):
@@ -445,10 +448,29 @@ def perform_notification(log_record_pk, use_default_notification_policy_fallback
445448
try:
446449
TelegramToUserConnector.notify_user(user, alert_group, notification_policy)
447450
except RetryAfter as e:
448-
countdown = getattr(e, "retry_after", 3)
449-
raise perform_notification.retry(
450-
(log_record_pk, use_default_notification_policy_fallback), countdown=countdown, exc=e
451-
)
451+
task_logger.exception(f"Telegram API rate limit exceeded. Retry after {e.retry_after} seconds.")
452+
# check how much time has passed since log record was created
453+
# to prevent eternal loop of restarting perform_notification task
454+
if timezone.now() < log_record.created_at + timezone.timedelta(hours=RETRY_TIMEOUT_HOURS):
455+
countdown = getattr(e, "retry_after", 3)
456+
perform_notification.apply_async(
457+
(log_record_pk, use_default_notification_policy_fallback), countdown=countdown
458+
)
459+
else:
460+
task_logger.debug(
461+
f"telegram notification for alert_group {alert_group.pk} failed because of rate limit"
462+
)
463+
UserNotificationPolicyLogRecord(
464+
author=user,
465+
type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED,
466+
notification_policy=notification_policy,
467+
reason="Telegram rate limit exceeded",
468+
alert_group=alert_group,
469+
notification_step=notification_policy.step,
470+
notification_channel=notification_channel,
471+
notification_error_code=UserNotificationPolicyLogRecord.ERROR_NOTIFICATION_IN_TELEGRAM_RATELIMIT,
472+
).save()
473+
return
452474

453475
elif notification_channel == UserNotificationPolicy.NotificationChannel.SLACK:
454476
# TODO: refactor checking the possibility of sending a notification in slack
@@ -516,13 +538,12 @@ def perform_notification(log_record_pk, use_default_notification_policy_fallback
516538
).save()
517539
return
518540

519-
retry_timeout_hours = 1
520541
if alert_group.slack_message:
521542
alert_group.slack_message.send_slack_notification(user, alert_group, notification_policy)
522543
task_logger.debug(f"Finished send_slack_notification for alert_group {alert_group.pk}.")
523544
# check how much time has passed since log record was created
524545
# to prevent eternal loop of restarting perform_notification task
525-
elif timezone.now() < log_record.created_at + timezone.timedelta(hours=retry_timeout_hours):
546+
elif timezone.now() < log_record.created_at + timezone.timedelta(hours=RETRY_TIMEOUT_HOURS):
526547
task_logger.debug(
527548
f"send_slack_notification for alert_group {alert_group.pk} failed because slack message "
528549
f"does not exist. Restarting perform_notification."
@@ -534,7 +555,7 @@ def perform_notification(log_record_pk, use_default_notification_policy_fallback
534555
else:
535556
task_logger.debug(
536557
f"send_slack_notification for alert_group {alert_group.pk} failed because slack message "
537-
f"after {retry_timeout_hours} hours still does not exist"
558+
f"after {RETRY_TIMEOUT_HOURS} hours still does not exist"
538559
)
539560
UserNotificationPolicyLogRecord(
540561
author=user,

engine/apps/alerts/tests/test_notify_user.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -360,12 +360,30 @@ def test_perform_notification_telegram_retryafter_error(
360360
countdown = 15
361361
exc = RetryAfter(countdown)
362362
with patch.object(TelegramToUserConnector, "notify_user", side_effect=exc) as mock_notify_user:
363-
with pytest.raises(RetryAfter):
363+
with patch.object(perform_notification, "apply_async") as mock_apply_async:
364364
perform_notification(log_record.pk, False)
365365

366366
mock_notify_user.assert_called_once_with(user, alert_group, user_notification_policy)
367+
# task is rescheduled using the countdown value from the exception
368+
mock_apply_async.assert_called_once_with((log_record.pk, False), countdown=countdown)
367369
assert alert_group.personal_log_records.last() == log_record
368370

371+
# but if the log was too old, skip and create a failed log record
372+
log_record.created_at = timezone.now() - timezone.timedelta(minutes=90)
373+
log_record.save()
374+
with patch.object(TelegramToUserConnector, "notify_user", side_effect=exc) as mock_notify_user:
375+
with patch.object(perform_notification, "apply_async") as mock_apply_async:
376+
perform_notification(log_record.pk, False)
377+
mock_notify_user.assert_called_once_with(user, alert_group, user_notification_policy)
378+
assert not mock_apply_async.called
379+
last_log_record = UserNotificationPolicyLogRecord.objects.last()
380+
assert last_log_record.type == UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED
381+
assert last_log_record.reason == "Telegram rate limit exceeded"
382+
assert (
383+
last_log_record.notification_error_code
384+
== UserNotificationPolicyLogRecord.ERROR_NOTIFICATION_IN_TELEGRAM_RATELIMIT
385+
)
386+
369387

370388
@patch("apps.base.models.UserNotificationPolicy.get_default_fallback_policy")
371389
@patch("apps.base.tests.messaging_backend.TestOnlyBackend.notify_user")

engine/apps/base/models/user_notification_policy_log_record.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,8 @@ class UserNotificationPolicyLogRecord(models.Model):
106106
ERROR_NOTIFICATION_TELEGRAM_USER_IS_DEACTIVATED,
107107
ERROR_NOTIFICATION_MOBILE_USER_HAS_NO_ACTIVE_DEVICE,
108108
ERROR_NOTIFICATION_FORMATTING_ERROR,
109-
) = range(29)
109+
ERROR_NOTIFICATION_IN_TELEGRAM_RATELIMIT,
110+
) = range(30)
110111

111112
# for this errors we want to send message to general log channel
112113
ERRORS_TO_SEND_IN_SLACK_CHANNEL = [
@@ -304,6 +305,10 @@ def render_log_line_action(self, for_slack=False, substitute_author_with_tag=Fal
304305
result += f"failed to notify {user_verbal} in Slack, because channel is archived"
305306
elif self.notification_error_code == UserNotificationPolicyLogRecord.ERROR_NOTIFICATION_IN_SLACK_RATELIMIT:
306307
result += f"failed to notify {user_verbal} in Slack due to Slack rate limit"
308+
elif (
309+
self.notification_error_code == UserNotificationPolicyLogRecord.ERROR_NOTIFICATION_IN_TELEGRAM_RATELIMIT
310+
):
311+
result += f"failed to notify {user_verbal} in Telegram due to Telegram rate limit"
307312
elif self.notification_error_code == UserNotificationPolicyLogRecord.ERROR_NOTIFICATION_FORBIDDEN:
308313
result += f"failed to notify {user_verbal}, not allowed"
309314
elif (

0 commit comments

Comments
 (0)