Skip to content

Commit

Permalink
scope everything to grafana tenants
Browse files Browse the repository at this point in the history
  • Loading branch information
santihernandezc committed Jan 22, 2025
1 parent c8a5fd6 commit 5f2c772
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 187 deletions.
1 change: 1 addition & 0 deletions pkg/alertmanager/alertmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ type Config struct {
PersisterConfig PersisterConfig

GrafanaAlertmanagerCompatibility bool
GrafanaAlertmanagerTenantSuffix string
}

// An Alertmanager manages the alerts for one user.
Expand Down
72 changes: 42 additions & 30 deletions pkg/alertmanager/multitenant.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,9 @@ type MultitenantAlertmanagerConfig struct {

PeerTimeout time.Duration `yaml:"peer_timeout" category:"advanced"`

EnableAPI bool `yaml:"enable_api" category:"advanced"`
GrafanaAlertmanagerCompatibilityEnabled bool `yaml:"grafana_alertmanager_compatibility_enabled" category:"experimental"`
EnableAPI bool `yaml:"enable_api" category:"advanced"`
GrafanaAlertmanagerCompatibilityEnabled bool `yaml:"grafana_alertmanager_compatibility_enabled" category:"experimental"`
GrafanaAlertmanagerTenantSuffix string `yaml:"grafana_alertmanager_tenant_suffix" category:"experimental"`

MaxConcurrentGetRequestsPerTenant int `yaml:"max_concurrent_get_requests_per_tenant" category:"advanced"`

Expand Down Expand Up @@ -126,6 +127,7 @@ func (cfg *MultitenantAlertmanagerConfig) RegisterFlags(f *flag.FlagSet, logger

f.BoolVar(&cfg.EnableAPI, "alertmanager.enable-api", true, "Enable the alertmanager config API.")
f.BoolVar(&cfg.GrafanaAlertmanagerCompatibilityEnabled, "alertmanager.grafana-alertmanager-compatibility-enabled", false, "Enable routes to support the migration and operation of the Grafana Alertmanager.")
f.StringVar(&cfg.GrafanaAlertmanagerTenantSuffix, "alertmanager.grafana-tenant-suffix", "", "Avoid starting the Alertmanager for Grafana Alertmanager tenants unless they have a promoted, non-default configuration.")
f.IntVar(&cfg.MaxConcurrentGetRequestsPerTenant, "alertmanager.max-concurrent-get-requests-per-tenant", 0, "Maximum number of concurrent GET requests allowed per tenant. The zero value (and negative values) result in a limit of GOMAXPROCS or 8, whichever is larger. Status code 503 is served for GET requests that would exceed the concurrency limit.")

f.BoolVar(&cfg.EnableStateCleanup, "alertmanager.enable-state-cleanup", true, "Enables periodic cleanup of alertmanager stateful data (notification logs and silences) from object storage. When enabled, data is removed for any tenant that does not have a configuration.")
Expand Down Expand Up @@ -666,29 +668,38 @@ func (am *MultitenantAlertmanager) syncConfigs(ctx context.Context, cfgMap map[s
level.Debug(am.logger).Log("msg", "adding configurations", "num_configs", len(cfgMap))
userAlertmanagersToStop := map[string]*Alertmanager{}
for user, cfgs := range cfgMap {
cfg, startAm, err := am.computeConfig(cfgs)
cfg, err := am.computeConfig(cfgs)
if err != nil {
am.multitenantMetrics.lastReloadSuccessful.WithLabelValues(user).Set(float64(0))
level.Warn(am.logger).Log("msg", "error computing config", "err", err)
continue
}

if !startAm {
am.alertmanagersMtx.Lock()
if userAM, ok := am.alertmanagers[user]; ok {
userAlertmanagersToStop[user] = userAM
delete(am.alertmanagers, user)
delete(am.cfgs, user)
am.multitenantMetrics.lastReloadSuccessful.DeleteLabelValues(user)
am.multitenantMetrics.lastReloadSuccessfulTimestamp.DeleteLabelValues(user)
am.alertmanagerMetrics.removeUserRegistry(user)
if am.cfg.GrafanaAlertmanagerTenantSuffix != "" {
if !cfg.usingGrafanaConfig && strings.HasSuffix(user, am.cfg.GrafanaAlertmanagerTenantSuffix) {
// Avoid starting the Alertmanager for Grafana tenants not using a Grafana configuration.
level.Debug(am.logger).Log("msg", "not initializing alertmanager for grafana tenant without promoted, non-default configuration", "user", user)
am.alertmanagersMtx.Lock()
if userAM, ok := am.alertmanagers[user]; ok {
userAlertmanagersToStop[user] = userAM
delete(am.alertmanagers, user)
delete(am.cfgs, user)
am.multitenantMetrics.lastReloadSuccessful.DeleteLabelValues(user)
am.multitenantMetrics.lastReloadSuccessfulTimestamp.DeleteLabelValues(user)
am.alertmanagerMetrics.removeUserRegistry(user)
}
am.alertmanagersMtx.Unlock()
continue
}
am.alertmanagersMtx.Unlock()
continue
}

if err := am.syncStates(ctx, cfg); err != nil {
level.Error(am.logger).Log("msg", "error syncing states", "err", err, "user", user)
// Promote state if needed.
if err := am.syncStates(ctx, cfg); err != nil {
level.Error(am.logger).Log("msg", "error syncing states", "err", err, "user", user)
}
} else {
if err := am.syncStates(ctx, cfg); err != nil {
level.Error(am.logger).Log("msg", "error syncing states", "err", err, "user", user)
}
}

if err := am.setConfig(cfg); err != nil {
Expand Down Expand Up @@ -723,41 +734,41 @@ func (am *MultitenantAlertmanager) syncConfigs(ctx context.Context, cfgMap map[s
}

// computeConfig takes an AlertConfigDescs struct containing Mimir and Grafana configurations.
// It returns a bool indicating whether the Alertmanager should be started for a tenant.
func (am *MultitenantAlertmanager) computeConfig(cfgs alertspb.AlertConfigDescs) (amConfig, bool, error) {
// It returns the final configuration and external URL the Alertmanager will use.
func (am *MultitenantAlertmanager) computeConfig(cfgs alertspb.AlertConfigDescs) (amConfig, error) {
cfg := amConfig{
AlertConfigDesc: cfgs.Mimir,
tmplExternalURL: am.cfg.ExternalURL.URL,
}
shouldStartAlertmanager := !(am.fallbackConfig == cfgs.Mimir.RawConfig || cfgs.Mimir.RawConfig == "")

switch {
// Mimir configuration.
case cfgs.Grafana.RawConfig == "":
level.Debug(am.logger).Log("msg", "grafana configuration is empty, using mimir config", "user", cfgs.Mimir.User)
return cfg, shouldStartAlertmanager && cfgs.Grafana.User != "", nil
case !cfgs.Grafana.Promoted:
level.Debug(am.logger).Log("msg", "grafana configuration not promoted, using mimir config", "user", cfgs.Mimir.User)
return cfg, shouldStartAlertmanager, nil
return cfg, nil

case cfgs.Grafana.Default:
level.Debug(am.logger).Log("msg", "grafana configuration is default, using mimir config", "user", cfgs.Mimir.User)
return cfg, shouldStartAlertmanager, nil
return cfg, nil

case cfgs.Grafana.RawConfig == "":
level.Debug(am.logger).Log("msg", "grafana configuration is empty, using mimir config", "user", cfgs.Mimir.User)
return cfg, nil

// Grafana configuration.
case cfgs.Mimir.RawConfig == am.fallbackConfig:
level.Debug(am.logger).Log("msg", "mimir configuration is default, using grafana config with the default globals", "user", cfgs.Mimir.User)
cfg, err := createUsableGrafanaConfig(cfgs.Grafana, cfgs.Mimir.RawConfig)
return cfg, true, err
return createUsableGrafanaConfig(cfgs.Grafana, cfgs.Mimir.RawConfig)

case cfgs.Mimir.RawConfig == "":
level.Debug(am.logger).Log("msg", "mimir configuration is empty, using grafana config with the default globals", "user", cfgs.Grafana.User)
cfg, err := createUsableGrafanaConfig(cfgs.Grafana, am.fallbackConfig)
return cfg, true, err
return createUsableGrafanaConfig(cfgs.Grafana, am.fallbackConfig)

// Both configurations.
// TODO: merge configurations.
default:
level.Warn(am.logger).Log("msg", "merging configurations not implemented, using mimir config", "user", cfgs.Mimir.User)
return cfg, true, nil
return cfg, nil
}
}

Expand Down Expand Up @@ -948,6 +959,7 @@ func (am *MultitenantAlertmanager) newAlertmanager(userID string, amConfig *defi
Limits: am.limits,
Features: am.features,
GrafanaAlertmanagerCompatibility: am.cfg.GrafanaAlertmanagerCompatibilityEnabled,
GrafanaAlertmanagerTenantSuffix: am.cfg.GrafanaAlertmanagerTenantSuffix,
}, reg)
if err != nil {
return nil, fmt.Errorf("unable to start Alertmanager for user %v: %v", userID, err)
Expand Down
Loading

0 comments on commit 5f2c772

Please sign in to comment.