Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v1.9.3 #4855

Merged
merged 2 commits into from
Aug 19, 2024
Merged

v1.9.3 #4855

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/e2e-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -144,12 +144,14 @@ jobs:
git clone https://x-access-token:${{ steps.generate-token.outputs.token }}@github.com/grafana/ops-devenv.git
git clone https://x-access-token:${{ steps.generate-token.outputs.token }}@github.com/grafana/gops-labels.git

- name: Tilt CI - standard and expensive E2E tests
- name: Tilt CI - Expensive E2E tests
if: inputs.run-expensive-tests
shell: bash
env:
E2E_TESTS_CMD: "cd ../../grafana-plugin && yarn test:e2e-expensive"
GRAFANA_VERSION: ${{ inputs.grafana_version }}
GF_FEATURE_TOGGLES_ENABLE: "externalServiceAccounts"
ONCALL_API_URL: "http://oncall-dev-engine:8080"
GRAFANA_ADMIN_USERNAME: "irm"
GRAFANA_ADMIN_PASSWORD: "irm"
BROWSERS: ${{ inputs.browsers }}
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/expensive-e2e-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ jobs:
# - 9.3.16
# - 9.4.13
# - 9.5.7
- 10.0.11
- 10.1.7
- 10.3.3
# TODO: fix issues with running e2e tests against Grafana v10.2.x and v10.3.x
# - 10.2.4
# - latest
Expand Down Expand Up @@ -55,7 +55,7 @@ jobs:
#
- uses: slackapi/[email protected]
with:
channel-id: gops-oncall-dev
channel-id: gops-irm-dev
# yamllint disable rule:line-length
payload: |
{
Expand Down
13 changes: 10 additions & 3 deletions .github/workflows/linting-and-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -239,9 +239,16 @@ jobs:
end-to-end-tests:
name: Standard e2e tests
uses: ./.github/workflows/e2e-tests.yml
strategy:
matrix:
grafana_version:
- 10.1.7
- 10.3.3
# TODO: fix issues with running e2e tests against Grafana v10.2.x and latest
# - 10.2.4
# - latest
fail-fast: false
with:
# TODO: fix issues with running e2e tests against Grafana v10.2.x and v10.3.x
grafana_version: 10.1.7
# grafana_version: 10.3.3
grafana_version: ${{ matrix.grafana_version }}
run-expensive-tests: false
browsers: "chromium"
11 changes: 9 additions & 2 deletions Tiltfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
load('ext://uibutton', 'cmd_button', 'location', 'text_input', 'bool_input')
load("ext://configmap", "configmap_create")

grafana_url = os.getenv("GRAFANA_URL", "http://grafana:3000")
running_under_parent_tiltfile = os.getenv("TILT_PARENT", "false") == "true"
twilio_values=[
"oncall.twilio.accountSid=" + os.getenv("TWILIO_ACCOUNT_SID", ""),
Expand Down Expand Up @@ -29,6 +30,14 @@ def plugin_json():
return plugin_file
return 'NOT_A_PLUGIN'

def extra_env():
return {
"GF_APP_URL": grafana_url,
"GF_SERVER_ROOT_URL": grafana_url,
"GF_FEATURE_TOGGLES_ENABLE": "externalServiceAccounts",
"ONCALL_API_URL": "http://oncall-dev-engine:8080"
}


allow_k8s_contexts(["kind-kind"])

Expand Down Expand Up @@ -83,8 +92,6 @@ def load_grafana():
# The user/pass that you will login to Grafana with
grafana_admin_user_pass = os.getenv("GRAFANA_ADMIN_USER_PASS", "oncall")
grafana_version = os.getenv("GRAFANA_VERSION", "latest")
grafana_url = os.getenv("GRAFANA_URL", "http://grafana:3000")


if 'plugin' in profiles:
k8s_resource(
Expand Down
2 changes: 1 addition & 1 deletion engine/apps/alerts/models/alert_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,6 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
resolved_by_user: typing.Optional["User"]
root_alert_group: typing.Optional["AlertGroup"]
silenced_by_user: typing.Optional["User"]
slack_log_message: typing.Optional["SlackMessage"]
slack_messages: "RelatedManager['SlackMessage']"
users: "RelatedManager['User']"
labels: "RelatedManager['AlertGroupAssociatedLabel']"
Expand Down Expand Up @@ -396,6 +395,7 @@ def status(self) -> int:
related_name="wiped_alert_groups",
)

# TODO: drop this column in future release
slack_log_message = models.OneToOneField(
"slack.SlackMessage",
on_delete=models.SET_NULL,
Expand Down
4 changes: 0 additions & 4 deletions engine/apps/alerts/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@
AlertGroupSlackRepresentative.on_alert_group_action_triggered,
)

alert_group_update_log_report_signal.connect(
AlertGroupSlackRepresentative.on_alert_group_update_log_report,
)

alert_group_update_resolution_note_signal.connect(
AlertGroupSlackRepresentative.on_alert_group_update_resolution_note,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,19 +90,7 @@ def on_alert_group_action_triggered_async(log_record_id):
autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None
)
def on_alert_group_update_log_report_async(alert_group_id):
from apps.alerts.models import AlertGroup

alert_group = AlertGroup.objects.get(pk=alert_group_id)
logger.debug(f"Start on_alert_group_update_log_report for alert_group {alert_group_id}")
organization = alert_group.channel.organization
if alert_group.slack_message and organization.slack_team_identity:
logger.debug(f"Process on_alert_group_update_log_report for alert_group {alert_group_id}")
UpdateLogReportMessageStep = ScenarioStep.get_step("distribute_alerts", "UpdateLogReportMessageStep")
step = UpdateLogReportMessageStep(organization.slack_team_identity, organization)
step.process_signal(alert_group)
else:
logger.debug(f"Drop on_alert_group_update_log_report for alert_group {alert_group_id}")
logger.debug(f"Finish on_alert_group_update_log_report for alert_group {alert_group_id}")
return "Deprecated, will be removed after queue cleanup"


class AlertGroupSlackRepresentative(AlertGroupAbstractRepresentative):
Expand Down Expand Up @@ -173,32 +161,6 @@ def on_alert_group_action_triggered(cls, **kwargs):
logger.debug(f"SLACK on_alert_group_action_triggered: async {log_record_id} {force_sync}")
on_alert_group_action_triggered_async.apply_async((log_record_id,))

@classmethod
def on_alert_group_update_log_report(cls, **kwargs):
from apps.alerts.models import AlertGroup

alert_group = kwargs["alert_group"]

if isinstance(alert_group, AlertGroup):
alert_group_id = alert_group.pk
else:
alert_group_id = alert_group
try:
alert_group = AlertGroup.objects.get(pk=alert_group_id)
except AlertGroup.DoesNotExist as e:
logger.warning(f"SLACK update log report: alert group {alert_group_id} has been deleted")
raise e

logger.debug(
f"Received alert_group_update_log_report signal in SLACK representative for alert_group {alert_group_id}"
)

if alert_group.notify_in_slack_enabled is False:
logger.debug(f"Skipping alert_group {alert_group_id} since notify_in_slack is disabled")
return

on_alert_group_update_log_report_async.apply_async((alert_group_id,))

@classmethod
def on_alert_group_update_resolution_note(cls, **kwargs):
alert_group = kwargs["alert_group"]
Expand Down
87 changes: 1 addition & 86 deletions engine/apps/slack/scenarios/distribute_alerts.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from datetime import datetime

from django.core.cache import cache
from django.utils import timezone

from apps.alerts.constants import ActionSource
from apps.alerts.incident_appearance.renderers.constants import DEFAULT_BACKUP_TITLE
Expand All @@ -14,25 +13,17 @@
from apps.slack.chatops_proxy_routing import make_private_metadata, make_value
from apps.slack.constants import CACHE_UPDATE_INCIDENT_SLACK_MESSAGE_LIFETIME
from apps.slack.errors import (
SlackAPICantUpdateMessageError,
SlackAPIChannelArchivedError,
SlackAPIChannelInactiveError,
SlackAPIChannelNotFoundError,
SlackAPIError,
SlackAPIInvalidAuthError,
SlackAPIMessageNotFoundError,
SlackAPIRatelimitError,
SlackAPIRestrictedActionError,
SlackAPITokenError,
)
from apps.slack.scenarios import scenario_step
from apps.slack.scenarios.slack_renderer import AlertGroupLogSlackRenderer
from apps.slack.slack_formatter import SlackFormatter
from apps.slack.tasks import (
post_or_update_log_report_message_task,
send_message_to_thread_if_bot_not_in_channel,
update_incident_slack_message,
)
from apps.slack.tasks import send_message_to_thread_if_bot_not_in_channel, update_incident_slack_message
from apps.slack.types import (
Block,
BlockActionType,
Expand Down Expand Up @@ -95,7 +86,6 @@ def process_signal(self, alert: Alert) -> None:
else:
# check if alert group was posted to slack before posting message to thread
if not alert.group.skip_escalation_in_slack:
self._send_log_report_message(alert.group, channel_id)
self._send_message_to_thread_if_bot_not_in_channel(alert.group, channel_id)
else:
# check if alert group was posted to slack before updating its message
Expand Down Expand Up @@ -208,11 +198,6 @@ def _send_debug_mode_notice(self, alert_group: AlertGroup, channel_id: str) -> N
blocks=blocks,
)

def _send_log_report_message(self, alert_group: AlertGroup, channel_id: str) -> None:
post_or_update_log_report_message_task.apply_async(
(alert_group.pk, self.slack_team_identity.pk),
)

def _send_message_to_thread_if_bot_not_in_channel(self, alert_group: AlertGroup, channel_id: str) -> None:
send_message_to_thread_if_bot_not_in_channel.apply_async(
(alert_group.pk, self.slack_team_identity.pk, channel_id),
Expand Down Expand Up @@ -895,76 +880,6 @@ def process_signal(self, log_record: AlertGroupLogRecord) -> None:
message.delete()


class UpdateLogReportMessageStep(scenario_step.ScenarioStep):
def process_signal(self, alert_group: AlertGroup) -> None:
if alert_group.skip_escalation_in_slack or alert_group.channel.is_rate_limited_in_slack:
return

self.update_log_message(alert_group)

def update_log_message(self, alert_group: AlertGroup) -> None:
slack_message = alert_group.slack_message
if slack_message is None:
logger.info(
f"Cannot update log message for alert_group {alert_group.pk} because SlackMessage doesn't exist"
)
return None

slack_log_message = alert_group.slack_log_message

if slack_log_message is not None:
# prevent too frequent updates
if timezone.now() <= slack_log_message.last_updated + timezone.timedelta(seconds=5):
return

attachments = AlertGroupLogSlackRenderer.render_incident_log_report_for_slack(alert_group)
logger.debug(
f"Update log message for alert_group {alert_group.pk}, slack_log_message {slack_log_message.pk}"
)
try:
self._slack_client.chat_update(
channel=slack_message.channel_id,
text="Alert Group log",
ts=slack_log_message.slack_id,
attachments=attachments,
)
except SlackAPIRatelimitError as e:
if not alert_group.channel.is_rate_limited_in_slack:
alert_group.channel.start_send_rate_limit_message_task(e.retry_after)
except SlackAPIMessageNotFoundError:
alert_group.slack_log_message = None
alert_group.save(update_fields=["slack_log_message"])
except (
SlackAPITokenError,
SlackAPIChannelNotFoundError,
SlackAPIChannelArchivedError,
SlackAPIChannelInactiveError,
SlackAPIInvalidAuthError,
SlackAPICantUpdateMessageError,
):
pass
else:
slack_log_message.last_updated = timezone.now()
slack_log_message.save(update_fields=["last_updated"])
logger.debug(
f"Finished update log message for alert_group {alert_group.pk}, "
f"slack_log_message {slack_log_message.pk}"
)
# check how much time has passed since slack message was created
# to prevent eternal loop of restarting update log message task
elif timezone.now() <= slack_message.created_at + timezone.timedelta(minutes=5):
logger.debug(
f"Update log message failed for alert_group {alert_group.pk}: "
f"log message does not exist yet. Restarting post_or_update_log_report_message_task..."
)
post_or_update_log_report_message_task.apply_async(
(alert_group.pk, self.slack_team_identity.pk, True),
countdown=3,
)
else:
logger.debug(f"Update log message failed for alert_group {alert_group.pk}: " f"log message does not exist.")


STEPS_ROUTING: ScenarioRoute.RoutingSteps = [
{
"payload_type": PayloadType.INTERACTIVE_MESSAGE,
Expand Down
14 changes: 0 additions & 14 deletions engine/apps/slack/scenarios/slack_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,3 @@ def render_alert_group_future_log_report_text(alert_group: "AlertGroup"):
for plan_line in escalation_policies_plan[time]:
result += f"*{humanize.naturaldelta(time)}:* {plan_line}\n"
return result

@staticmethod
def render_incident_log_report_for_slack(alert_group: "AlertGroup"):
attachments = []
past = AlertGroupLogSlackRenderer.render_alert_group_past_log_report_text(alert_group)
future = AlertGroupLogSlackRenderer.render_alert_group_future_log_report_text(alert_group)
text = past + future
if len(text) > 0:
attachments.append(
{
"text": text,
}
)
return attachments
23 changes: 1 addition & 22 deletions engine/apps/slack/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
SlackAPITokenError,
SlackAPIUsergroupNotFoundError,
)
from apps.slack.scenarios.scenario_step import ScenarioStep
from apps.slack.utils import (
get_cache_key_update_incident_slack_message,
get_populate_slack_channel_task_id_key,
Expand Down Expand Up @@ -289,27 +288,7 @@ def populate_slack_user_identities(organization_pk):
autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None
)
def post_or_update_log_report_message_task(alert_group_pk, slack_team_identity_pk, update=False):
logger.debug(f"Start post_or_update_log_report_message_task for alert_group {alert_group_pk}")
from apps.alerts.models import AlertGroup
from apps.slack.models import SlackTeamIdentity

UpdateLogReportMessageStep = ScenarioStep.get_step("distribute_alerts", "UpdateLogReportMessageStep")

slack_team_identity = SlackTeamIdentity.objects.get(pk=slack_team_identity_pk)
alert_group = AlertGroup.objects.get(pk=alert_group_pk)
step = UpdateLogReportMessageStep(slack_team_identity, alert_group.channel.organization)

if alert_group.skip_escalation_in_slack or alert_group.channel.is_rate_limited_in_slack:
return

if update: # flag to prevent multiple posting log message to slack
step.update_log_message(alert_group)
else:
# don't post a new message, as it is available from the button
# this is an intermediate step, so we will only update posted messages but not post new ones
# once majority of messages are updated, we can remove this step (https://github.com/grafana/oncall/pull/4686)
pass
logger.debug(f"Finish post_or_update_log_report_message_task for alert_group {alert_group_pk}")
return "Deprecated, will be removed after queue cleanup"


@shared_dedicated_queue_retry_task(
Expand Down
Loading
Loading