From 21f1edd85b824b16ff2ba8a751d998d55cd00c14 Mon Sep 17 00:00:00 2001 From: Steve Simpson Date: Thu, 8 Jul 2021 12:26:11 +0200 Subject: [PATCH 1/3] Expose configuration of memberlist packet compression. Allows manually specifying whether memberlist should compress packets via a new configuration flag: `-memberlist.enable-compression`. This typically has little benefit for Cortex, as the ring state messages are already compressed with Snappy, the second layer of compression does not achieve any additional saving. It's not clear cut whether there might still be some benefit for internal memberlist messages; this needs to be evaluated in a environment of some reasonable scale. Signed-off-by: Steve Simpson --- CHANGELOG.md | 1 + docs/configuration/config-file-reference.md | 5 ++++ ...tegration_memberlist_single_binary_test.go | 29 ++++++++++++------- pkg/ring/kv/memberlist/memberlist_client.go | 3 ++ 4 files changed, 27 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3d14d59657..86bfd28b7b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ * `-compactor.ring.heartbeat-timeout` * `-store-gateway.sharding-ring.heartbeat-timeout` * [ENHANCEMENT] Memberlist: optimized receive path for processing ring state updates, to help reduce CPU utilization in large clusters. #4345 +* [ENHANCEMENT] Memberlist: expose configuration of memberlist packet compression via `-memberlist.enable-compression`. #4346 * [BUGFIX] HA Tracker: when cleaning up obsolete elected replicas from KV store, tracker didn't update number of cluster per user correctly. #4336 ## 1.10.0-rc.0 / 2021-06-28 diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index c034a7cdb5..67906e7383 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -3796,6 +3796,11 @@ The `memberlist_config` configures the Gossip memberlist. # CLI flag: -memberlist.dead-node-reclaim-time [dead_node_reclaim_time: | default = 0s] +# Enable message compression. This can be used to reduce bandwidth usage at the +# cost of slightly more CPU utilization. +# CLI flag: -memberlist.enable-compression +[enable_compression: | default = true] + # Other cluster members to join. Can be specified multiple times. It can be an # IP, hostname or an entry specified in the DNS Service Discovery format (see # https://cortexmetrics.io/docs/configuration/arguments/#dns-service-discovery diff --git a/integration/integration_memberlist_single_binary_test.go b/integration/integration_memberlist_single_binary_test.go index 53c1a945fd..0cfe7da925 100644 --- a/integration/integration_memberlist_single_binary_test.go +++ b/integration/integration_memberlist_single_binary_test.go @@ -23,15 +23,21 @@ import ( func TestSingleBinaryWithMemberlist(t *testing.T) { t.Run("default", func(t *testing.T) { - testSingleBinaryEnv(t, false) + testSingleBinaryEnv(t, false, nil) }) t.Run("tls", func(t *testing.T) { - testSingleBinaryEnv(t, true) + testSingleBinaryEnv(t, true, nil) + }) + + t.Run("compression-disabled", func(t *testing.T) { + testSingleBinaryEnv(t, false, map[string]string{ + "-memberlist.enable-compression": "false", + }) }) } -func testSingleBinaryEnv(t *testing.T, tlsEnabled bool) { +func testSingleBinaryEnv(t *testing.T, tlsEnabled bool, flags map[string]string) { s, err := e2e.NewScenario(networkName) require.NoError(t, err) defer s.Close() @@ -65,13 +71,13 @@ func testSingleBinaryEnv(t *testing.T, tlsEnabled bool) { filepath.Join(s.SharedDir(), clientKeyFile), )) - cortex1 = newSingleBinary("cortex-1", memberlistDNS, "") - cortex2 = newSingleBinary("cortex-2", memberlistDNS, networkName+"-cortex-1:8000") - cortex3 = newSingleBinary("cortex-3", memberlistDNS, networkName+"-cortex-1:8000") + cortex1 = newSingleBinary("cortex-1", memberlistDNS, "", flags) + cortex2 = newSingleBinary("cortex-2", memberlistDNS, networkName+"-cortex-1:8000", flags) + cortex3 = newSingleBinary("cortex-3", memberlistDNS, networkName+"-cortex-1:8000", flags) } else { - cortex1 = newSingleBinary("cortex-1", "", "") - cortex2 = newSingleBinary("cortex-2", "", networkName+"-cortex-1:8000") - cortex3 = newSingleBinary("cortex-3", "", networkName+"-cortex-1:8000") + cortex1 = newSingleBinary("cortex-1", "", "", flags) + cortex2 = newSingleBinary("cortex-2", "", networkName+"-cortex-1:8000", flags) + cortex3 = newSingleBinary("cortex-3", "", networkName+"-cortex-1:8000", flags) } // start cortex-1 first, as cortex-2 and cortex-3 both connect to cortex-1 @@ -109,7 +115,7 @@ func testSingleBinaryEnv(t *testing.T, tlsEnabled bool) { require.NoError(t, s.Stop(cortex3)) } -func newSingleBinary(name string, servername string, join string) *e2ecortex.CortexService { +func newSingleBinary(name string, servername string, join string, testFlags map[string]string) *e2ecortex.CortexService { flags := map[string]string{ "-ingester.final-sleep": "0s", "-ingester.join-after": "0s", // join quickly @@ -132,6 +138,7 @@ func newSingleBinary(name string, servername string, join string) *e2ecortex.Cor mergeFlags( ChunksStorageFlags(), flags, + testFlags, getTLSFlagsWithPrefix("memberlist", servername, servername == ""), ), "", @@ -170,7 +177,7 @@ func TestSingleBinaryWithMemberlistScaling(t *testing.T) { if i > 0 { join = e2e.NetworkContainerHostPort(networkName, "cortex-1", 8000) } - c := newSingleBinary(name, "", join) + c := newSingleBinary(name, "", join, nil) require.NoError(t, s.StartAndWaitReady(c)) instances = append(instances, c) } diff --git a/pkg/ring/kv/memberlist/memberlist_client.go b/pkg/ring/kv/memberlist/memberlist_client.go index 7e45868bd9..778058a902 100644 --- a/pkg/ring/kv/memberlist/memberlist_client.go +++ b/pkg/ring/kv/memberlist/memberlist_client.go @@ -136,6 +136,7 @@ type KVConfig struct { GossipNodes int `yaml:"gossip_nodes"` GossipToTheDeadTime time.Duration `yaml:"gossip_to_dead_nodes_time"` DeadNodeReclaimTime time.Duration `yaml:"dead_node_reclaim_time"` + EnableCompression bool `yaml:"enable_compression"` // List of members to join JoinMembers flagext.StringSlice `yaml:"join_members"` @@ -187,6 +188,7 @@ func (cfg *KVConfig) RegisterFlagsWithPrefix(f *flag.FlagSet, prefix string) { f.DurationVar(&cfg.GossipToTheDeadTime, prefix+"memberlist.gossip-to-dead-nodes-time", mlDefaults.GossipToTheDeadTime, "How long to keep gossiping to dead nodes, to give them chance to refute their death.") f.DurationVar(&cfg.DeadNodeReclaimTime, prefix+"memberlist.dead-node-reclaim-time", mlDefaults.DeadNodeReclaimTime, "How soon can dead node's name be reclaimed with new address. 0 to disable.") f.IntVar(&cfg.MessageHistoryBufferBytes, prefix+"memberlist.message-history-buffer-bytes", 0, "How much space to use for keeping received and sent messages in memory for troubleshooting (two buffers). 0 to disable.") + f.BoolVar(&cfg.EnableCompression, prefix+"memberlist.enable-compression", mlDefaults.EnableCompression, "Enable message compression. This can be used to reduce bandwidth usage at the cost of slightly more CPU utilization.") cfg.TCPTransport.RegisterFlags(f, prefix) } @@ -380,6 +382,7 @@ func (m *KV) buildMemberlistConfig() (*memberlist.Config, error) { mlCfg.GossipNodes = m.cfg.GossipNodes mlCfg.GossipToTheDeadTime = m.cfg.GossipToTheDeadTime mlCfg.DeadNodeReclaimTime = m.cfg.DeadNodeReclaimTime + mlCfg.EnableCompression = m.cfg.EnableCompression if m.cfg.NodeName != "" { mlCfg.Name = m.cfg.NodeName From e49b9a71a117f9754ba4d098c31a6ba2429c79e9 Mon Sep 17 00:00:00 2001 From: Steve Simpson Date: Thu, 8 Jul 2021 13:35:43 +0200 Subject: [PATCH 2/3] Review comments. Signed-off-by: Steve Simpson --- docs/configuration/config-file-reference.md | 4 ++-- integration/integration_memberlist_single_binary_test.go | 2 +- pkg/ring/kv/memberlist/memberlist_client.go | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index 67906e7383..ed7db7fdbe 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -3798,8 +3798,8 @@ The `memberlist_config` configures the Gossip memberlist. # Enable message compression. This can be used to reduce bandwidth usage at the # cost of slightly more CPU utilization. -# CLI flag: -memberlist.enable-compression -[enable_compression: | default = true] +# CLI flag: -memberlist.compression-enabled +[compression_enabled: | default = true] # Other cluster members to join. Can be specified multiple times. It can be an # IP, hostname or an entry specified in the DNS Service Discovery format (see diff --git a/integration/integration_memberlist_single_binary_test.go b/integration/integration_memberlist_single_binary_test.go index 0cfe7da925..f4cd7b7914 100644 --- a/integration/integration_memberlist_single_binary_test.go +++ b/integration/integration_memberlist_single_binary_test.go @@ -32,7 +32,7 @@ func TestSingleBinaryWithMemberlist(t *testing.T) { t.Run("compression-disabled", func(t *testing.T) { testSingleBinaryEnv(t, false, map[string]string{ - "-memberlist.enable-compression": "false", + "-memberlist.compression-enabled": "false", }) }) } diff --git a/pkg/ring/kv/memberlist/memberlist_client.go b/pkg/ring/kv/memberlist/memberlist_client.go index 778058a902..79432d4668 100644 --- a/pkg/ring/kv/memberlist/memberlist_client.go +++ b/pkg/ring/kv/memberlist/memberlist_client.go @@ -136,7 +136,7 @@ type KVConfig struct { GossipNodes int `yaml:"gossip_nodes"` GossipToTheDeadTime time.Duration `yaml:"gossip_to_dead_nodes_time"` DeadNodeReclaimTime time.Duration `yaml:"dead_node_reclaim_time"` - EnableCompression bool `yaml:"enable_compression"` + EnableCompression bool `yaml:"compression_enabled"` // List of members to join JoinMembers flagext.StringSlice `yaml:"join_members"` @@ -188,7 +188,7 @@ func (cfg *KVConfig) RegisterFlagsWithPrefix(f *flag.FlagSet, prefix string) { f.DurationVar(&cfg.GossipToTheDeadTime, prefix+"memberlist.gossip-to-dead-nodes-time", mlDefaults.GossipToTheDeadTime, "How long to keep gossiping to dead nodes, to give them chance to refute their death.") f.DurationVar(&cfg.DeadNodeReclaimTime, prefix+"memberlist.dead-node-reclaim-time", mlDefaults.DeadNodeReclaimTime, "How soon can dead node's name be reclaimed with new address. 0 to disable.") f.IntVar(&cfg.MessageHistoryBufferBytes, prefix+"memberlist.message-history-buffer-bytes", 0, "How much space to use for keeping received and sent messages in memory for troubleshooting (two buffers). 0 to disable.") - f.BoolVar(&cfg.EnableCompression, prefix+"memberlist.enable-compression", mlDefaults.EnableCompression, "Enable message compression. This can be used to reduce bandwidth usage at the cost of slightly more CPU utilization.") + f.BoolVar(&cfg.EnableCompression, prefix+"memberlist.compression-enabled", mlDefaults.EnableCompression, "Enable message compression. This can be used to reduce bandwidth usage at the cost of slightly more CPU utilization.") cfg.TCPTransport.RegisterFlags(f, prefix) } From 2d3f57e2160e486309862c66f6bc6a215dafe2aa Mon Sep 17 00:00:00 2001 From: Steve Simpson Date: Thu, 8 Jul 2021 14:29:33 +0200 Subject: [PATCH 3/3] Review comments. Signed-off-by: Steve Simpson --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 86bfd28b7b..43cad88cab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ * `-compactor.ring.heartbeat-timeout` * `-store-gateway.sharding-ring.heartbeat-timeout` * [ENHANCEMENT] Memberlist: optimized receive path for processing ring state updates, to help reduce CPU utilization in large clusters. #4345 -* [ENHANCEMENT] Memberlist: expose configuration of memberlist packet compression via `-memberlist.enable-compression`. #4346 +* [ENHANCEMENT] Memberlist: expose configuration of memberlist packet compression via `-memberlist.compression=enabled`. #4346 * [BUGFIX] HA Tracker: when cleaning up obsolete elected replicas from KV store, tracker didn't update number of cluster per user correctly. #4336 ## 1.10.0-rc.0 / 2021-06-28