diff --git a/pkg/alertmanager/alertmanager.go b/pkg/alertmanager/alertmanager.go index 52bd99bdb3..dc8ebf27b3 100644 --- a/pkg/alertmanager/alertmanager.go +++ b/pkg/alertmanager/alertmanager.go @@ -104,6 +104,7 @@ type Config struct { PersisterConfig PersisterConfig GrafanaAlertmanagerCompatibility bool + GrafanaAlertmanagerTenantSuffix string } // An Alertmanager manages the alerts for one user. diff --git a/pkg/alertmanager/multitenant.go b/pkg/alertmanager/multitenant.go index 3b7f847bec..3da6d7f9e8 100644 --- a/pkg/alertmanager/multitenant.go +++ b/pkg/alertmanager/multitenant.go @@ -81,8 +81,9 @@ type MultitenantAlertmanagerConfig struct { PeerTimeout time.Duration `yaml:"peer_timeout" category:"advanced"` - EnableAPI bool `yaml:"enable_api" category:"advanced"` - GrafanaAlertmanagerCompatibilityEnabled bool `yaml:"grafana_alertmanager_compatibility_enabled" category:"experimental"` + EnableAPI bool `yaml:"enable_api" category:"advanced"` + GrafanaAlertmanagerCompatibilityEnabled bool `yaml:"grafana_alertmanager_compatibility_enabled" category:"experimental"` + GrafanaAlertmanagerTenantSuffix string `yaml:"grafana_alertmanager_tenant_suffix" category:"experimental"` MaxConcurrentGetRequestsPerTenant int `yaml:"max_concurrent_get_requests_per_tenant" category:"advanced"` @@ -126,6 +127,7 @@ func (cfg *MultitenantAlertmanagerConfig) RegisterFlags(f *flag.FlagSet, logger f.BoolVar(&cfg.EnableAPI, "alertmanager.enable-api", true, "Enable the alertmanager config API.") f.BoolVar(&cfg.GrafanaAlertmanagerCompatibilityEnabled, "alertmanager.grafana-alertmanager-compatibility-enabled", false, "Enable routes to support the migration and operation of the Grafana Alertmanager.") + f.StringVar(&cfg.GrafanaAlertmanagerTenantSuffix, "alertmanager.grafana-tenant-suffix", "", "Avoid starting the Alertmanager for Grafana Alertmanager tenants unless they have a promoted, non-default configuration.") f.IntVar(&cfg.MaxConcurrentGetRequestsPerTenant, "alertmanager.max-concurrent-get-requests-per-tenant", 0, "Maximum number of concurrent GET requests allowed per tenant. The zero value (and negative values) result in a limit of GOMAXPROCS or 8, whichever is larger. Status code 503 is served for GET requests that would exceed the concurrency limit.") f.BoolVar(&cfg.EnableStateCleanup, "alertmanager.enable-state-cleanup", true, "Enables periodic cleanup of alertmanager stateful data (notification logs and silences) from object storage. When enabled, data is removed for any tenant that does not have a configuration.") @@ -666,29 +668,38 @@ func (am *MultitenantAlertmanager) syncConfigs(ctx context.Context, cfgMap map[s level.Debug(am.logger).Log("msg", "adding configurations", "num_configs", len(cfgMap)) userAlertmanagersToStop := map[string]*Alertmanager{} for user, cfgs := range cfgMap { - cfg, startAm, err := am.computeConfig(cfgs) + cfg, err := am.computeConfig(cfgs) if err != nil { am.multitenantMetrics.lastReloadSuccessful.WithLabelValues(user).Set(float64(0)) level.Warn(am.logger).Log("msg", "error computing config", "err", err) continue } - if !startAm { - am.alertmanagersMtx.Lock() - if userAM, ok := am.alertmanagers[user]; ok { - userAlertmanagersToStop[user] = userAM - delete(am.alertmanagers, user) - delete(am.cfgs, user) - am.multitenantMetrics.lastReloadSuccessful.DeleteLabelValues(user) - am.multitenantMetrics.lastReloadSuccessfulTimestamp.DeleteLabelValues(user) - am.alertmanagerMetrics.removeUserRegistry(user) + if am.cfg.GrafanaAlertmanagerTenantSuffix != "" { + if !cfg.usingGrafanaConfig && strings.HasSuffix(user, am.cfg.GrafanaAlertmanagerTenantSuffix) { + // Avoid starting the Alertmanager for Grafana tenants not using a Grafana configuration. + level.Debug(am.logger).Log("msg", "not initializing alertmanager for grafana tenant without promoted, non-default configuration", "user", user) + am.alertmanagersMtx.Lock() + if userAM, ok := am.alertmanagers[user]; ok { + userAlertmanagersToStop[user] = userAM + delete(am.alertmanagers, user) + delete(am.cfgs, user) + am.multitenantMetrics.lastReloadSuccessful.DeleteLabelValues(user) + am.multitenantMetrics.lastReloadSuccessfulTimestamp.DeleteLabelValues(user) + am.alertmanagerMetrics.removeUserRegistry(user) + } + am.alertmanagersMtx.Unlock() + continue } - am.alertmanagersMtx.Unlock() - continue - } - if err := am.syncStates(ctx, cfg); err != nil { - level.Error(am.logger).Log("msg", "error syncing states", "err", err, "user", user) + // Promote state if needed. + if err := am.syncStates(ctx, cfg); err != nil { + level.Error(am.logger).Log("msg", "error syncing states", "err", err, "user", user) + } + } else { + if err := am.syncStates(ctx, cfg); err != nil { + level.Error(am.logger).Log("msg", "error syncing states", "err", err, "user", user) + } } if err := am.setConfig(cfg); err != nil { @@ -723,41 +734,41 @@ func (am *MultitenantAlertmanager) syncConfigs(ctx context.Context, cfgMap map[s } // computeConfig takes an AlertConfigDescs struct containing Mimir and Grafana configurations. -// It returns a bool indicating whether the Alertmanager should be started for a tenant. -func (am *MultitenantAlertmanager) computeConfig(cfgs alertspb.AlertConfigDescs) (amConfig, bool, error) { +// It returns the final configuration and external URL the Alertmanager will use. +func (am *MultitenantAlertmanager) computeConfig(cfgs alertspb.AlertConfigDescs) (amConfig, error) { cfg := amConfig{ AlertConfigDesc: cfgs.Mimir, tmplExternalURL: am.cfg.ExternalURL.URL, } - shouldStartAlertmanager := !(am.fallbackConfig == cfgs.Mimir.RawConfig || cfgs.Mimir.RawConfig == "") + switch { // Mimir configuration. - case cfgs.Grafana.RawConfig == "": - level.Debug(am.logger).Log("msg", "grafana configuration is empty, using mimir config", "user", cfgs.Mimir.User) - return cfg, shouldStartAlertmanager && cfgs.Grafana.User != "", nil case !cfgs.Grafana.Promoted: level.Debug(am.logger).Log("msg", "grafana configuration not promoted, using mimir config", "user", cfgs.Mimir.User) - return cfg, shouldStartAlertmanager, nil + return cfg, nil + case cfgs.Grafana.Default: level.Debug(am.logger).Log("msg", "grafana configuration is default, using mimir config", "user", cfgs.Mimir.User) - return cfg, shouldStartAlertmanager, nil + return cfg, nil + + case cfgs.Grafana.RawConfig == "": + level.Debug(am.logger).Log("msg", "grafana configuration is empty, using mimir config", "user", cfgs.Mimir.User) + return cfg, nil // Grafana configuration. case cfgs.Mimir.RawConfig == am.fallbackConfig: level.Debug(am.logger).Log("msg", "mimir configuration is default, using grafana config with the default globals", "user", cfgs.Mimir.User) - cfg, err := createUsableGrafanaConfig(cfgs.Grafana, cfgs.Mimir.RawConfig) - return cfg, true, err + return createUsableGrafanaConfig(cfgs.Grafana, cfgs.Mimir.RawConfig) case cfgs.Mimir.RawConfig == "": level.Debug(am.logger).Log("msg", "mimir configuration is empty, using grafana config with the default globals", "user", cfgs.Grafana.User) - cfg, err := createUsableGrafanaConfig(cfgs.Grafana, am.fallbackConfig) - return cfg, true, err + return createUsableGrafanaConfig(cfgs.Grafana, am.fallbackConfig) // Both configurations. // TODO: merge configurations. default: level.Warn(am.logger).Log("msg", "merging configurations not implemented, using mimir config", "user", cfgs.Mimir.User) - return cfg, true, nil + return cfg, nil } } @@ -948,6 +959,7 @@ func (am *MultitenantAlertmanager) newAlertmanager(userID string, amConfig *defi Limits: am.limits, Features: am.features, GrafanaAlertmanagerCompatibility: am.cfg.GrafanaAlertmanagerCompatibilityEnabled, + GrafanaAlertmanagerTenantSuffix: am.cfg.GrafanaAlertmanagerTenantSuffix, }, reg) if err != nil { return nil, fmt.Errorf("unable to start Alertmanager for user %v: %v", userID, err) diff --git a/pkg/alertmanager/multitenant_test.go b/pkg/alertmanager/multitenant_test.go index d16cfe4048..92b83ee3f7 100644 --- a/pkg/alertmanager/multitenant_test.go +++ b/pkg/alertmanager/multitenant_test.go @@ -2521,16 +2521,15 @@ func TestComputeConfig(t *testing.T) { mimirExternalURL := am.cfg.ExternalURL.String() tests := []struct { - name string - cfg alertspb.AlertConfigDescs - expErr string - expCfg alertspb.AlertConfigDesc - shouldStartAm bool - expURL string - expHeaders map[string]string + name string + cfg alertspb.AlertConfigDescs + expErr string + expCfg alertspb.AlertConfigDesc + expURL string + expHeaders map[string]string }{ { - name: "no Grafana configuration", + name: "no grafana configuration", cfg: alertspb.AlertConfigDescs{ Mimir: alertspb.AlertConfigDesc{ User: "user", @@ -2541,28 +2540,10 @@ func TestComputeConfig(t *testing.T) { User: "user", RawConfig: simpleConfigOne, }, - shouldStartAm: true, - expURL: mimirExternalURL, - }, - { - name: "no Grafana config, default Mimir config", - cfg: alertspb.AlertConfigDescs{ - Mimir: alertspb.AlertConfigDesc{ - User: "user", - RawConfig: am.fallbackConfig, - }, - }, - shouldStartAm: false, - expURL: mimirExternalURL, - }, - { - name: "no Grafana config, no Mimir config", - cfg: alertspb.AlertConfigDescs{}, - shouldStartAm: true, - expURL: mimirExternalURL, + expURL: mimirExternalURL, }, { - name: "empty Grafana configuration", + name: "empty grafana configuration", cfg: alertspb.AlertConfigDescs{ Mimir: alertspb.AlertConfigDesc{ User: "user", @@ -2577,7 +2558,6 @@ func TestComputeConfig(t *testing.T) { StaticHeaders: map[string]string{"Test-Header": "test-value"}, }, }, - shouldStartAm: true, expCfg: alertspb.AlertConfigDesc{ User: "user", RawConfig: simpleConfigOne, @@ -2585,41 +2565,7 @@ func TestComputeConfig(t *testing.T) { expURL: mimirExternalURL, }, { - name: "empty Grafana config, default Mimir config", - cfg: alertspb.AlertConfigDescs{ - Mimir: alertspb.AlertConfigDesc{ - User: "user", - RawConfig: am.fallbackConfig, - }, - Grafana: alertspb.GrafanaAlertConfigDesc{ - User: "user", - RawConfig: "", - Default: false, - Promoted: true, - ExternalUrl: grafanaExternalURL, - StaticHeaders: map[string]string{"Test-Header": "test-value"}, - }, - }, - shouldStartAm: false, - expURL: mimirExternalURL, - }, - { - name: "empty Grafana config, no Mimir config", - cfg: alertspb.AlertConfigDescs{ - Grafana: alertspb.GrafanaAlertConfigDesc{ - User: "user", - RawConfig: "", - Default: false, - Promoted: true, - ExternalUrl: grafanaExternalURL, - StaticHeaders: map[string]string{"Test-Header": "test-value"}, - }, - }, - shouldStartAm: true, - expURL: mimirExternalURL, - }, - { - name: "unpromoted Grafana configuration", + name: "grafana configuration is not promoted", cfg: alertspb.AlertConfigDescs{ Mimir: alertspb.AlertConfigDesc{ User: "user", @@ -2637,43 +2583,10 @@ func TestComputeConfig(t *testing.T) { User: "user", RawConfig: simpleConfigOne, }, - shouldStartAm: true, - expURL: mimirExternalURL, - }, - { - name: "unpromoted Grafana config, default Mimir config", - cfg: alertspb.AlertConfigDescs{ - Mimir: alertspb.AlertConfigDesc{ - User: "user", - RawConfig: am.fallbackConfig, - }, - Grafana: alertspb.GrafanaAlertConfigDesc{ - User: "user", - RawConfig: grafanaConfig, - Promoted: false, - ExternalUrl: grafanaExternalURL, - StaticHeaders: map[string]string{"Test-Header": "test-value"}, - }, - }, - shouldStartAm: false, - expURL: mimirExternalURL, - }, - { - name: "unpromoted Grafana config, no Mimir config", - cfg: alertspb.AlertConfigDescs{ - Grafana: alertspb.GrafanaAlertConfigDesc{ - User: "user", - RawConfig: grafanaConfig, - Promoted: false, - ExternalUrl: grafanaExternalURL, - StaticHeaders: map[string]string{"Test-Header": "test-value"}, - }, - }, - shouldStartAm: false, - expURL: mimirExternalURL, + expURL: mimirExternalURL, }, { - name: "default Grafana configuration", + name: "grafana configuration is default", cfg: alertspb.AlertConfigDescs{ Mimir: alertspb.AlertConfigDesc{ User: "user", @@ -2692,45 +2605,10 @@ func TestComputeConfig(t *testing.T) { User: "user", RawConfig: simpleConfigOne, }, - shouldStartAm: true, - expURL: mimirExternalURL, - }, - { - name: "default Grafana config, default Mimir config", - cfg: alertspb.AlertConfigDescs{ - Mimir: alertspb.AlertConfigDesc{ - User: "user", - RawConfig: am.fallbackConfig, - }, - Grafana: alertspb.GrafanaAlertConfigDesc{ - User: "user", - RawConfig: grafanaConfig, - Default: true, - Promoted: true, - ExternalUrl: grafanaExternalURL, - StaticHeaders: map[string]string{"Test-Header": "test-value"}, - }, - }, - shouldStartAm: false, - expURL: mimirExternalURL, - }, - { - name: "default Grafana config, no Mimir config", - cfg: alertspb.AlertConfigDescs{ - Grafana: alertspb.GrafanaAlertConfigDesc{ - User: "user", - RawConfig: grafanaConfig, - Default: true, - Promoted: true, - ExternalUrl: grafanaExternalURL, - StaticHeaders: map[string]string{"Test-Header": "test-value"}, - }, - }, - shouldStartAm: false, - expURL: mimirExternalURL, + expURL: mimirExternalURL, }, { - name: "no Mimir configuration", + name: "no mimir configuration", cfg: alertspb.AlertConfigDescs{ Grafana: alertspb.GrafanaAlertConfigDesc{ User: "user", @@ -2746,12 +2624,11 @@ func TestComputeConfig(t *testing.T) { RawConfig: string(combinedCfg), Templates: []*alertspb.TemplateDesc{}, }, - shouldStartAm: true, - expURL: grafanaExternalURL, - expHeaders: map[string]string{"Test-Header": "test-value"}, + expURL: grafanaExternalURL, + expHeaders: map[string]string{"Test-Header": "test-value"}, }, { - name: "empty Mimir configuration", + name: "empty mimir configuration", cfg: alertspb.AlertConfigDescs{ Mimir: alertspb.AlertConfigDesc{ User: "user", @@ -2771,12 +2648,11 @@ func TestComputeConfig(t *testing.T) { RawConfig: string(combinedCfg), Templates: []*alertspb.TemplateDesc{}, }, - shouldStartAm: true, - expURL: grafanaExternalURL, - expHeaders: map[string]string{"Test-Header-1": "test-value-1", "Test-Header-2": "test-value-2"}, + expURL: grafanaExternalURL, + expHeaders: map[string]string{"Test-Header-1": "test-value-1", "Test-Header-2": "test-value-2"}, }, { - name: "default Mimir configuration", + name: "default mimir configuration", cfg: alertspb.AlertConfigDescs{ Mimir: alertspb.AlertConfigDesc{ User: "user", @@ -2796,13 +2672,12 @@ func TestComputeConfig(t *testing.T) { RawConfig: string(combinedCfg), Templates: []*alertspb.TemplateDesc{}, }, - shouldStartAm: true, - expURL: grafanaExternalURL, - expHeaders: map[string]string{"Test-Header-1": "test-value-1", "Test-Header-2": "test-value-2"}, + expURL: grafanaExternalURL, + expHeaders: map[string]string{"Test-Header-1": "test-value-1", "Test-Header-2": "test-value-2"}, }, { // TODO: change once merging configs is implemented. - name: "both Mimir and Grafana configurations (merging not implemented)", + name: "both mimir and grafana configurations (merging not implemented)", cfg: alertspb.AlertConfigDescs{ Mimir: alertspb.AlertConfigDesc{ User: "user", @@ -2821,25 +2696,18 @@ func TestComputeConfig(t *testing.T) { User: "user", RawConfig: simpleConfigOne, }, - shouldStartAm: true, - expURL: am.cfg.ExternalURL.String(), + expURL: am.cfg.ExternalURL.String(), }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - // TODO: fix - cfg, shouldStartAm, err := am.computeConfig(test.cfg) + cfg, err := am.computeConfig(test.cfg) if test.expErr != "" { require.EqualError(t, err, test.expErr) return } - if !test.shouldStartAm { - require.False(t, shouldStartAm) - return - } - require.NoError(t, err) require.Equal(t, test.expCfg, cfg.AlertConfigDesc) require.Equal(t, test.expURL, cfg.tmplExternalURL.String())