From 355a5d15abf61881180ec0f246a299cc2fcc9071 Mon Sep 17 00:00:00 2001
From: Alanmas <acostas.alan@gmail.com>
Date: Wed, 21 Apr 2021 18:17:28 -0500
Subject: [PATCH] roachprod: removing cassandra.go

Second part of #47567 where we need to ensure no dependencie is getting affected when we remove ssh.go.

So we are removing cassandra.go and all its dependencies.

Release note: None
---
 pkg/cmd/roachprod/install/BUILD.bazel       |    2 -
 pkg/cmd/roachprod/install/cassandra.go      |  164 ---
 pkg/cmd/roachprod/install/cassandra_yaml.go | 1277 -------------------
 pkg/cmd/roachprod/main.go                   |   24 +-
 pkg/cmd/roachprod/ssh/BUILD.bazel           |    9 -
 pkg/cmd/roachprod/ssh/ssh.go                |  168 +--
 vendor                                      |    2 +-
 7 files changed, 12 insertions(+), 1634 deletions(-)
 delete mode 100644 pkg/cmd/roachprod/install/cassandra.go
 delete mode 100644 pkg/cmd/roachprod/install/cassandra_yaml.go

diff --git a/pkg/cmd/roachprod/install/BUILD.bazel b/pkg/cmd/roachprod/install/BUILD.bazel
index d2f1713f80fb..7a3beff39f4a 100644
--- a/pkg/cmd/roachprod/install/BUILD.bazel
+++ b/pkg/cmd/roachprod/install/BUILD.bazel
@@ -3,8 +3,6 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
 go_library(
     name = "install",
     srcs = [
-        "cassandra.go",
-        "cassandra_yaml.go",
         "cluster_synced.go",
         "cockroach.go",
         "expander.go",
diff --git a/pkg/cmd/roachprod/install/cassandra.go b/pkg/cmd/roachprod/install/cassandra.go
deleted file mode 100644
index 770a7e1f4ea3..000000000000
--- a/pkg/cmd/roachprod/install/cassandra.go
+++ /dev/null
@@ -1,164 +0,0 @@
-// Copyright 2018 The Cockroach Authors.
-//
-// Use of this software is governed by the Business Source License
-// included in the file licenses/BSL.txt.
-//
-// As of the Change Date specified in that file, in accordance with
-// the Business Source License, use of this software will be governed
-// by the Apache License, Version 2.0, included in the file
-// licenses/APL.txt.
-
-package install
-
-import (
-	"bufio"
-	"fmt"
-	"html/template"
-	"io/ioutil"
-	"log"
-	"os"
-	"time"
-
-	"github.com/cockroachdb/cockroach/pkg/cmd/roachprod/ssh"
-)
-
-// Cassandra TODO(peter): document
-type Cassandra struct{}
-
-// Start implements the ClusterImpl.NodeDir interface.
-func (Cassandra) Start(c *SyncedCluster, extraArgs []string) {
-	yamlPath, err := makeCassandraYAML(c)
-	if err != nil {
-		log.Fatal(err)
-	}
-	c.Put(yamlPath, "./cassandra.yaml")
-	_ = os.Remove(yamlPath)
-
-	display := fmt.Sprintf("%s: starting cassandra (be patient)", c.Name)
-	nodes := c.ServerNodes()
-	c.Parallel(display, len(nodes), 1, func(i int) ([]byte, error) {
-		host := c.host(nodes[i])
-		user := c.user(nodes[i])
-
-		if err := func() error {
-			session, err := ssh.NewSSHSession(user, host)
-			if err != nil {
-				return err
-			}
-			defer func() {
-				_ = session.Close()
-			}()
-
-			cmd := `env ` + c.Env + ` ROACHPROD=true cassandra` +
-				` -Dcassandra.config=file://${PWD}/cassandra.yaml` +
-				` -Dcassandra.ring_delay_ms=3000` +
-				` > cassandra.stdout 2> cassandra.stderr`
-			_, err = session.CombinedOutput(cmd)
-			return err
-		}(); err != nil {
-			return nil, err
-		}
-
-		for {
-			up, err := func() (bool, error) {
-				session, err := ssh.NewSSHSession(user, host)
-				if err != nil {
-					return false, err
-				}
-				defer func() {
-					_ = session.Close()
-				}()
-
-				cmd := `nc -z $(hostname) 9042`
-				if _, err := session.CombinedOutput(cmd); err != nil {
-					// The common case here is going to be "exit status 1" until the
-					// cassandra process starts listening on the port. Logging would
-					// just generate noise.
-					return false, nil //nolint:returnerrcheck
-				}
-				return true, nil
-			}()
-			if err != nil {
-				return nil, err
-			}
-			if up {
-				break
-			}
-			time.Sleep(time.Second)
-		}
-		return nil, nil
-	})
-}
-
-// NodeDir implements the ClusterImpl.NodeDir interface.
-func (Cassandra) NodeDir(c *SyncedCluster, index, storeIndex int) string {
-	if c.IsLocal() {
-		// TODO(peter): This will require a bit of work to adjust paths in
-		// cassandra.yaml.
-		panic("Cassandra.NodeDir unimplemented")
-	}
-	if storeIndex != 1 {
-		panic("Cassandra.NodeDir only supports one store")
-	}
-	return "/mnt/data1/cassandra"
-}
-
-// LogDir implements the ClusterImpl.NodeDir interface.
-func (Cassandra) LogDir(c *SyncedCluster, index int) string {
-	panic("Cassandra.LogDir unimplemented")
-}
-
-// CertsDir implements the ClusterImpl.NodeDir interface.
-func (Cassandra) CertsDir(c *SyncedCluster, index int) string {
-	panic("Cassandra.CertsDir unimplemented")
-}
-
-// NodeURL implements the ClusterImpl.NodeDir interface.
-func (Cassandra) NodeURL(_ *SyncedCluster, host string, port int) string {
-	return fmt.Sprintf("'cassandra://%s:%d'", host, port)
-}
-
-// NodePort implements the ClusterImpl.NodeDir interface.
-func (Cassandra) NodePort(c *SyncedCluster, index int) int {
-	// TODO(peter): This will require a bit of work to adjust ports in
-	// cassandra.yaml.
-	// if c.IsLocal() {
-	// }
-	return 9042
-}
-
-// NodeUIPort implements the ClusterImpl.NodeDir interface.
-func (Cassandra) NodeUIPort(c *SyncedCluster, index int) int {
-	return 0 // unimplemented
-}
-
-func makeCassandraYAML(c *SyncedCluster) (string, error) {
-	ip, err := c.GetInternalIP(c.ServerNodes()[0])
-	if err != nil {
-		return "", err
-	}
-
-	f, err := ioutil.TempFile("", "cassandra.yaml")
-	if err != nil {
-		return "", err
-	}
-	defer f.Close()
-
-	w := bufio.NewWriter(f)
-	if _, err := w.WriteString(cassandraDefaultYAML); err != nil {
-		return "", err
-	}
-	defer w.Flush()
-
-	t, err := template.New("cassandra.yaml").Parse(cassandraDiffYAML)
-	if err != nil {
-		log.Fatal(err)
-	}
-	m := map[string]interface{}{
-		"Seeds": ip,
-	}
-	if err := t.Execute(w, m); err != nil {
-		log.Fatal(err)
-	}
-	return f.Name(), nil
-}
diff --git a/pkg/cmd/roachprod/install/cassandra_yaml.go b/pkg/cmd/roachprod/install/cassandra_yaml.go
deleted file mode 100644
index 1676bd8c87b6..000000000000
--- a/pkg/cmd/roachprod/install/cassandra_yaml.go
+++ /dev/null
@@ -1,1277 +0,0 @@
-// Copyright 2018 The Cockroach Authors.
-//
-// Use of this software is governed by the Business Source License
-// included in the file licenses/BSL.txt.
-//
-// As of the Change Date specified in that file, in accordance with
-// the Business Source License, use of this software will be governed
-// by the Apache License, Version 2.0, included in the file
-// licenses/APL.txt.
-
-package install
-
-const cassandraDiffYAML = `
-commitlog_sync: batch
-commitlog_sync_batch_window_in_ms: 2
-commitlog_sync_period_in_ms: 0
-
-read_request_timeout_in_ms: 10000
-write_request_timeout_in_ms: 10000
-
-commitlog_directory: /mnt/data1/cassandra/commitlog
-data_file_directories:
-    - /mnt/data1/cassandra/data
-hints_directory: /mnt/data1/cassandra/hints
-saved_caches_directory: /mnt/data1/cassandra/saved_caches
-
-seed_provider:
-    # Addresses of hosts that are deemed contact points.
-    # Cassandra nodes use this list of hosts to find each other and learn
-    # the topology of the ring.  You must change this if you are running
-    # multiple nodes!
-    - class_name: org.apache.cassandra.locator.SimpleSeedProvider
-      parameters:
-          # seeds is actually a comma-delimited list of addresses.
-          # Ex: "<ip1>,<ip2>,<ip3>"
-          - seeds: "{{.Seeds}}"
-`
-
-// Note: this is the default cassandra.yaml that ships with Cassandra
-// 3.11.1. Add overrides to cassandraDiffYAML instead of editing directly.
-//
-// The two edits below are to comment out listen_address and
-// rpc_address. Apparently those configs cannot be cleared once set.
-const cassandraDefaultYAML = `# Cassandra storage config YAML
-
-# NOTE:
-#   See http://wiki.apache.org/cassandra/StorageConfiguration for
-#   full explanations of configuration directives
-# /NOTE
-
-# The name of the cluster. This is mainly used to prevent machines in
-# one logical cluster from joining another.
-cluster_name: 'Test Cluster'
-
-# This defines the number of tokens randomly assigned to this node on the ring
-# The more tokens, relative to other nodes, the larger the proportion of data
-# that this node will store. You probably want all nodes to have the same number
-# of tokens assuming they have equal hardware capability.
-#
-# If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility,
-# and will use the initial_token as described below.
-#
-# Specifying initial_token will override this setting on the node's initial start,
-# on subsequent starts, this setting will apply even if initial token is set.
-#
-# If you already have a cluster with 1 token per node, and wish to migrate to 
-# multiple tokens per node, see http://wiki.apache.org/cassandra/Operations
-num_tokens: 256
-
-# Triggers automatic allocation of num_tokens tokens for this node. The allocation
-# algorithm attempts to choose tokens in a way that optimizes replicated load over
-# the nodes in the datacenter for the replication strategy used by the specified
-# keyspace.
-#
-# The load assigned to each node will be close to proportional to its number of
-# vnodes.
-#
-# Only supported with the Murmur3Partitioner.
-# allocate_tokens_for_keyspace: KEYSPACE
-
-# initial_token allows you to specify tokens manually.  While you can use it with
-# vnodes (num_tokens > 1, above) -- in which case you should provide a 
-# comma-separated list -- it's primarily used when adding nodes to legacy clusters 
-# that do not have vnodes enabled.
-# initial_token:
-
-# See http://wiki.apache.org/cassandra/HintedHandoff
-# May either be "true" or "false" to enable globally
-hinted_handoff_enabled: true
-
-# When hinted_handoff_enabled is true, a blocklist of data centers that will not
-# perform hinted handoff
-# hinted_handoff_disabled_datacenters:
-#    - DC1
-#    - DC2
-
-# this defines the maximum amount of time a dead host will have hints
-# generated.  After it has been dead this long, new hints for it will not be
-# created until it has been seen alive and gone down again.
-max_hint_window_in_ms: 10800000 # 3 hours
-
-# Maximum throttle in KBs per second, per delivery thread.  This will be
-# reduced proportionally to the number of nodes in the cluster.  (If there
-# are two nodes in the cluster, each delivery thread will use the maximum
-# rate; if there are three, each will throttle to half of the maximum,
-# since we expect two nodes to be delivering hints simultaneously.)
-hinted_handoff_throttle_in_kb: 1024
-
-# Number of threads with which to deliver hints;
-# Consider increasing this number when you have multi-dc deployments, since
-# cross-dc handoff tends to be slower
-max_hints_delivery_threads: 2
-
-# Directory where Cassandra should store hints.
-# If not set, the default directory is $CASSANDRA_HOME/data/hints.
-# hints_directory: /var/lib/cassandra/hints
-
-# How often hints should be flushed from the internal buffers to disk.
-# Will *not* trigger fsync.
-hints_flush_period_in_ms: 10000
-
-# Maximum size for a single hints file, in megabytes.
-max_hints_file_size_in_mb: 128
-
-# Compression to apply to the hint files. If omitted, hints files
-# will be written uncompressed. LZ4, Snappy, and Deflate compressors
-# are supported.
-#hints_compression:
-#   - class_name: LZ4Compressor
-#     parameters:
-#         -
-
-# Maximum throttle in KBs per second, total. This will be
-# reduced proportionally to the number of nodes in the cluster.
-batchlog_replay_throttle_in_kb: 1024
-
-# Authentication backend, implementing IAuthenticator; used to identify users
-# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator,
-# PasswordAuthenticator}.
-#
-# - AllowAllAuthenticator performs no checks - set it to disable authentication.
-# - PasswordAuthenticator relies on username/password pairs to authenticate
-#   users. It keeps usernames and hashed passwords in system_auth.roles table.
-#   Please increase system_auth keyspace replication factor if you use this authenticator.
-#   If using PasswordAuthenticator, CassandraRoleManager must also be used (see below)
-authenticator: AllowAllAuthenticator
-
-# Authorization backend, implementing IAuthorizer; used to limit access/provide permissions
-# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer,
-# CassandraAuthorizer}.
-#
-# - AllowAllAuthorizer allows any action to any user - set it to disable authorization.
-# - CassandraAuthorizer stores permissions in system_auth.role_permissions table. Please
-#   increase system_auth keyspace replication factor if you use this authorizer.
-authorizer: AllowAllAuthorizer
-
-# Part of the Authentication & Authorization backend, implementing IRoleManager; used
-# to maintain grants and memberships between roles.
-# Out of the box, Cassandra provides org.apache.cassandra.auth.CassandraRoleManager,
-# which stores role information in the system_auth keyspace. Most functions of the
-# IRoleManager require an authenticated login, so unless the configured IAuthenticator
-# actually implements authentication, most of this functionality will be unavailable.
-#
-# - CassandraRoleManager stores role data in the system_auth keyspace. Please
-#   increase system_auth keyspace replication factor if you use this role manager.
-role_manager: CassandraRoleManager
-
-# Validity period for roles cache (fetching granted roles can be an expensive
-# operation depending on the role manager, CassandraRoleManager is one example)
-# Granted roles are cached for authenticated sessions in AuthenticatedUser and
-# after the period specified here, become eligible for (async) reload.
-# Defaults to 2000, set to 0 to disable caching entirely.
-# Will be disabled automatically for AllowAllAuthenticator.
-roles_validity_in_ms: 2000
-
-# Refresh interval for roles cache (if enabled).
-# After this interval, cache entries become eligible for refresh. Upon next
-# access, an async reload is scheduled and the old value returned until it
-# completes. If roles_validity_in_ms is non-zero, then this must be
-# also.
-# Defaults to the same value as roles_validity_in_ms.
-# roles_update_interval_in_ms: 2000
-
-# Validity period for permissions cache (fetching permissions can be an
-# expensive operation depending on the authorizer, CassandraAuthorizer is
-# one example). Defaults to 2000, set to 0 to disable.
-# Will be disabled automatically for AllowAllAuthorizer.
-permissions_validity_in_ms: 2000
-
-# Refresh interval for permissions cache (if enabled).
-# After this interval, cache entries become eligible for refresh. Upon next
-# access, an async reload is scheduled and the old value returned until it
-# completes. If permissions_validity_in_ms is non-zero, then this must be
-# also.
-# Defaults to the same value as permissions_validity_in_ms.
-# permissions_update_interval_in_ms: 2000
-
-# Validity period for credentials cache. This cache is tightly coupled to
-# the provided PasswordAuthenticator implementation of IAuthenticator. If
-# another IAuthenticator implementation is configured, this cache will not
-# be automatically used and so the following settings will have no effect.
-# Please note, credentials are cached in their encrypted form, so while
-# activating this cache may reduce the number of queries made to the
-# underlying table, it may not  bring a significant reduction in the
-# latency of individual authentication attempts.
-# Defaults to 2000, set to 0 to disable credentials caching.
-credentials_validity_in_ms: 2000
-
-# Refresh interval for credentials cache (if enabled).
-# After this interval, cache entries become eligible for refresh. Upon next
-# access, an async reload is scheduled and the old value returned until it
-# completes. If credentials_validity_in_ms is non-zero, then this must be
-# also.
-# Defaults to the same value as credentials_validity_in_ms.
-# credentials_update_interval_in_ms: 2000
-
-# The partitioner is responsible for distributing groups of rows (by
-# partition key) across nodes in the cluster.  You should leave this
-# alone for new clusters.  The partitioner can NOT be changed without
-# reloading all data, so when upgrading you should set this to the
-# same partitioner you were already using.
-#
-# Besides Murmur3Partitioner, partitioners included for backwards
-# compatibility include RandomPartitioner, ByteOrderedPartitioner, and
-# OrderPreservingPartitioner.
-#
-partitioner: org.apache.cassandra.dht.Murmur3Partitioner
-
-# Directories where Cassandra should store data on disk.  Cassandra
-# will spread data evenly across them, subject to the granularity of
-# the configured compaction strategy.
-# If not set, the default directory is $CASSANDRA_HOME/data/data.
-# data_file_directories:
-#     - /var/lib/cassandra/data
-
-# commit log.  when running on magnetic HDD, this should be a
-# separate spindle than the data directories.
-# If not set, the default directory is $CASSANDRA_HOME/data/commitlog.
-# commitlog_directory: /var/lib/cassandra/commitlog
-
-# Enable / disable CDC functionality on a per-node basis. This modifies the logic used
-# for write path allocation rejection (standard: never reject. cdc: reject Mutation
-# containing a CDC-enabled table if at space limit in cdc_raw_directory).
-cdc_enabled: false
-
-# CommitLogSegments are moved to this directory on flush if cdc_enabled: true and the
-# segment contains mutations for a CDC-enabled table. This should be placed on a
-# separate spindle than the data directories. If not set, the default directory is
-# $CASSANDRA_HOME/data/cdc_raw.
-# cdc_raw_directory: /var/lib/cassandra/cdc_raw
-
-# Policy for data disk failures:
-#
-# die
-#   shut down gossip and client transports and kill the JVM for any fs errors or
-#   single-sstable errors, so the node can be replaced.
-#
-# stop_paranoid
-#   shut down gossip and client transports even for single-sstable errors,
-#   kill the JVM for errors during startup.
-#
-# stop
-#   shut down gossip and client transports, leaving the node effectively dead, but
-#   can still be inspected via JMX, kill the JVM for errors during startup.
-#
-# best_effort
-#    stop using the failed disk and respond to requests based on
-#    remaining available sstables.  This means you WILL see obsolete
-#    data at CL.ONE!
-#
-# ignore
-#    ignore fatal errors and let requests fail, as in pre-1.2 Cassandra
-disk_failure_policy: stop
-
-# Policy for commit disk failures:
-#
-# die
-#   shut down gossip and Thrift and kill the JVM, so the node can be replaced.
-#
-# stop
-#   shut down gossip and Thrift, leaving the node effectively dead, but
-#   can still be inspected via JMX.
-#
-# stop_commit
-#   shutdown the commit log, letting writes collect but
-#   continuing to service reads, as in pre-2.0.5 Cassandra
-#
-# ignore
-#   ignore fatal errors and let the batches fail
-commit_failure_policy: stop
-
-# Maximum size of the native protocol prepared statement cache
-#
-# Valid values are either "auto" (omitting the value) or a value greater 0.
-#
-# Note that specifying a too large value will result in long running GCs and possibly
-# out-of-memory errors. Keep the value at a small fraction of the heap.
-#
-# If you constantly see "prepared statements discarded in the last minute because
-# cache limit reached" messages, the first step is to investigate the root cause
-# of these messages and check whether prepared statements are used correctly -
-# i.e. use bind markers for variable parts.
-#
-# Do only change the default value, if you really have more prepared statements than
-# fit in the cache. In most cases it is not necssary to change this value.
-# Constantly re-preparing statements is a performance penalty.
-#
-# Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater
-prepared_statements_cache_size_mb:
-
-# Maximum size of the Thrift prepared statement cache
-#
-# If you do not use Thrift at all, it is safe to leave this value at "auto".
-#
-# See description of 'prepared_statements_cache_size_mb' above for more information.
-#
-# Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater
-thrift_prepared_statements_cache_size_mb:
-
-# Maximum size of the key cache in memory.
-#
-# Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the
-# minimum, sometimes more. The key cache is fairly tiny for the amount of
-# time it saves, so it's worthwhile to use it at large numbers.
-# The row cache saves even more time, but must contain the entire row,
-# so it is extremely space-intensive. It's best to only use the
-# row cache if you have hot rows or static rows.
-#
-# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
-#
-# Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache.
-key_cache_size_in_mb:
-
-# Duration in seconds after which Cassandra should
-# save the key cache. Caches are saved to saved_caches_directory as
-# specified in this configuration file.
-#
-# Saved caches greatly improve cold-start speeds, and is relatively cheap in
-# terms of I/O for the key cache. Row cache saving is much more expensive and
-# has limited use.
-#
-# Default is 14400 or 4 hours.
-key_cache_save_period: 14400
-
-# Number of keys from the key cache to save
-# Disabled by default, meaning all keys are going to be saved
-# key_cache_keys_to_save: 100
-
-# Row cache implementation class name. Available implementations:
-#
-# org.apache.cassandra.cache.OHCProvider
-#   Fully off-heap row cache implementation (default).
-#
-# org.apache.cassandra.cache.SerializingCacheProvider
-#   This is the row cache implementation availabile
-#   in previous releases of Cassandra.
-# row_cache_class_name: org.apache.cassandra.cache.OHCProvider
-
-# Maximum size of the row cache in memory.
-# Please note that OHC cache implementation requires some additional off-heap memory to manage
-# the map structures and some in-flight memory during operations before/after cache entries can be
-# accounted against the cache capacity. This overhead is usually small compared to the whole capacity.
-# Do not specify more memory that the system can afford in the worst usual situation and leave some
-# headroom for OS block level cache. Do never allow your system to swap.
-#
-# Default value is 0, to disable row caching.
-row_cache_size_in_mb: 0
-
-# Duration in seconds after which Cassandra should save the row cache.
-# Caches are saved to saved_caches_directory as specified in this configuration file.
-#
-# Saved caches greatly improve cold-start speeds, and is relatively cheap in
-# terms of I/O for the key cache. Row cache saving is much more expensive and
-# has limited use.
-#
-# Default is 0 to disable saving the row cache.
-row_cache_save_period: 0
-
-# Number of keys from the row cache to save.
-# Specify 0 (which is the default), meaning all keys are going to be saved
-# row_cache_keys_to_save: 100
-
-# Maximum size of the counter cache in memory.
-#
-# Counter cache helps to reduce counter locks' contention for hot counter cells.
-# In case of RF = 1 a counter cache hit will cause Cassandra to skip the read before
-# write entirely. With RF > 1 a counter cache hit will still help to reduce the duration
-# of the lock hold, helping with hot counter cell updates, but will not allow skipping
-# the read entirely. Only the local (clock, count) tuple of a counter cell is kept
-# in memory, not the whole counter, so it's relatively cheap.
-#
-# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
-#
-# Default value is empty to make it "auto" (min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable counter cache.
-# NOTE: if you perform counter deletes and rely on low gcgs, you should disable the counter cache.
-counter_cache_size_in_mb:
-
-# Duration in seconds after which Cassandra should
-# save the counter cache (keys only). Caches are saved to saved_caches_directory as
-# specified in this configuration file.
-#
-# Default is 7200 or 2 hours.
-counter_cache_save_period: 7200
-
-# Number of keys from the counter cache to save
-# Disabled by default, meaning all keys are going to be saved
-# counter_cache_keys_to_save: 100
-
-# saved caches
-# If not set, the default directory is $CASSANDRA_HOME/data/saved_caches.
-# saved_caches_directory: /var/lib/cassandra/saved_caches
-
-# commitlog_sync may be either "periodic" or "batch." 
-# 
-# When in batch mode, Cassandra won't ack writes until the commit log
-# has been fsynced to disk.  It will wait
-# commitlog_sync_batch_window_in_ms milliseconds between fsyncs.
-# This window should be kept short because the writer threads will
-# be unable to do extra work while waiting.  (You may need to increase
-# concurrent_writes for the same reason.)
-#
-# commitlog_sync: batch
-# commitlog_sync_batch_window_in_ms: 2
-#
-# the other option is "periodic" where writes may be acked immediately
-# and the CommitLog is simply synced every commitlog_sync_period_in_ms
-# milliseconds. 
-commitlog_sync: periodic
-commitlog_sync_period_in_ms: 10000
-
-# The size of the individual commitlog file segments.  A commitlog
-# segment may be archived, deleted, or recycled once all the data
-# in it (potentially from each columnfamily in the system) has been
-# flushed to sstables.
-#
-# The default size is 32, which is almost always fine, but if you are
-# archiving commitlog segments (see commitlog_archiving.properties),
-# then you probably want a finer granularity of archiving; 8 or 16 MB
-# is reasonable.
-# Max mutation size is also configurable via max_mutation_size_in_kb setting in
-# cassandra.yaml. The default is half the size commitlog_segment_size_in_mb * 1024.
-# This should be positive and less than 2048.
-#
-# NOTE: If max_mutation_size_in_kb is set explicitly then commitlog_segment_size_in_mb must
-# be set to at least twice the size of max_mutation_size_in_kb / 1024
-#
-commitlog_segment_size_in_mb: 32
-
-# Compression to apply to the commit log. If omitted, the commit log
-# will be written uncompressed.  LZ4, Snappy, and Deflate compressors
-# are supported.
-# commitlog_compression:
-#   - class_name: LZ4Compressor
-#     parameters:
-#         -
-
-# any class that implements the SeedProvider interface and has a
-# constructor that takes a Map<String, String> of parameters will do.
-seed_provider:
-    # Addresses of hosts that are deemed contact points. 
-    # Cassandra nodes use this list of hosts to find each other and learn
-    # the topology of the ring.  You must change this if you are running
-    # multiple nodes!
-    - class_name: org.apache.cassandra.locator.SimpleSeedProvider
-      parameters:
-          # seeds is actually a comma-delimited list of addresses.
-          # Ex: "<ip1>,<ip2>,<ip3>"
-          - seeds: "127.0.0.1"
-
-# For workloads with more data than can fit in memory, Cassandra's
-# bottleneck will be reads that need to fetch data from
-# disk. "concurrent_reads" should be set to (16 * number_of_drives) in
-# order to allow the operations to enqueue low enough in the stack
-# that the OS and drives can reorder them. Same applies to
-# "concurrent_counter_writes", since counter writes read the current
-# values before incrementing and writing them back.
-#
-# On the other hand, since writes are almost never IO bound, the ideal
-# number of "concurrent_writes" is dependent on the number of cores in
-# your system; (8 * number_of_cores) is a good rule of thumb.
-concurrent_reads: 32
-concurrent_writes: 32
-concurrent_counter_writes: 32
-
-# For materialized view writes, as there is a read involved, so this should
-# be limited by the less of concurrent reads or concurrent writes.
-concurrent_materialized_view_writes: 32
-
-# Maximum memory to use for sstable chunk cache and buffer pooling.
-# 32MB of this are reserved for pooling buffers, the rest is used as an
-# cache that holds uncompressed sstable chunks.
-# Defaults to the smaller of 1/4 of heap or 512MB. This pool is allocated off-heap,
-# so is in addition to the memory allocated for heap. The cache also has on-heap
-# overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size
-# if the default 64k chunk size is used).
-# Memory is only allocated when needed.
-# file_cache_size_in_mb: 512
-
-# Flag indicating whether to allocate on or off heap when the sstable buffer
-# pool is exhausted, that is when it has exceeded the maximum memory
-# file_cache_size_in_mb, beyond which it will not cache buffers but allocate on request.
-
-# buffer_pool_use_heap_if_exhausted: true
-
-# The strategy for optimizing disk read
-# Possible values are:
-# ssd (for solid state disks, the default)
-# spinning (for spinning disks)
-# disk_optimization_strategy: ssd
-
-# Total permitted memory to use for memtables. Cassandra will stop
-# accepting writes when the limit is exceeded until a flush completes,
-# and will trigger a flush based on memtable_cleanup_threshold
-# If omitted, Cassandra will set both to 1/4 the size of the heap.
-# memtable_heap_space_in_mb: 2048
-# memtable_offheap_space_in_mb: 2048
-
-# memtable_cleanup_threshold is deprecated. The default calculation
-# is the only reasonable choice. See the comments on  memtable_flush_writers
-# for more information.
-#
-# Ratio of occupied non-flushing memtable size to total permitted size
-# that will trigger a flush of the largest memtable. Larger mct will
-# mean larger flushes and hence less compaction, but also less concurrent
-# flush activity which can make it difficult to keep your disks fed
-# under heavy write load.
-#
-# memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1)
-# memtable_cleanup_threshold: 0.11
-
-# Specify the way Cassandra allocates and manages memtable memory.
-# Options are:
-#
-# heap_buffers
-#   on heap nio buffers
-#
-# offheap_buffers
-#   off heap (direct) nio buffers
-#
-# offheap_objects
-#    off heap objects
-memtable_allocation_type: heap_buffers
-
-# Total space to use for commit logs on disk.
-#
-# If space gets above this value, Cassandra will flush every dirty CF
-# in the oldest segment and remove it.  So a small total commitlog space
-# will tend to cause more flush activity on less-active columnfamilies.
-#
-# The default value is the smaller of 8192, and 1/4 of the total space
-# of the commitlog volume.
-#
-# commitlog_total_space_in_mb: 8192
-
-# This sets the number of memtable flush writer threads per disk
-# as well as the total number of memtables that can be flushed concurrently.
-# These are generally a combination of compute and IO bound.
-#
-# Memtable flushing is more CPU efficient than memtable ingest and a single thread
-# can keep up with the ingest rate of a whole server on a single fast disk
-# until it temporarily becomes IO bound under contention typically with compaction.
-# At that point you need multiple flush threads. At some point in the future
-# it may become CPU bound all the time.
-#
-# You can tell if flushing is falling behind using the MemtablePool.BlockedOnAllocation
-# metric which should be 0, but will be non-zero if threads are blocked waiting on flushing
-# to free memory.
-#
-# memtable_flush_writers defaults to two for a single data directory.
-# This means that two  memtables can be flushed concurrently to the single data directory.
-# If you have multiple data directories the default is one memtable flushing at a time
-# but the flush will use a thread per data directory so you will get two or more writers.
-#
-# Two is generally enough to flush on a fast disk [array] mounted as a single data directory.
-# Adding more flush writers will result in smaller more frequent flushes that introduce more
-# compaction overhead.
-#
-# There is a direct tradeoff between number of memtables that can be flushed concurrently
-# and flush size and frequency. More is not better you just need enough flush writers
-# to never stall waiting for flushing to free memory.
-#
-#memtable_flush_writers: 2
-
-# Total space to use for change-data-capture logs on disk.
-#
-# If space gets above this value, Cassandra will throw WriteTimeoutException
-# on Mutations including tables with CDC enabled. A CDCCompactor is responsible
-# for parsing the raw CDC logs and deleting them when parsing is completed.
-#
-# The default value is the min of 4096 mb and 1/8th of the total space
-# of the drive where cdc_raw_directory resides.
-# cdc_total_space_in_mb: 4096
-
-# When we hit our cdc_raw limit and the CDCCompactor is either running behind
-# or experiencing backpressure, we check at the following interval to see if any
-# new space for cdc-tracked tables has been made available. Default to 250ms
-# cdc_free_space_check_interval_ms: 250
-
-# A fixed memory pool size in MB for for SSTable index summaries. If left
-# empty, this will default to 5% of the heap size. If the memory usage of
-# all index summaries exceeds this limit, SSTables with low read rates will
-# shrink their index summaries in order to meet this limit.  However, this
-# is a best-effort process. In extreme conditions Cassandra may need to use
-# more than this amount of memory.
-index_summary_capacity_in_mb:
-
-# How frequently index summaries should be resampled.  This is done
-# periodically to redistribute memory from the fixed-size pool to sstables
-# proportional their recent read rates.  Setting to -1 will disable this
-# process, leaving existing index summaries at their current sampling level.
-index_summary_resize_interval_in_minutes: 60
-
-# Whether to, when doing sequential writing, fsync() at intervals in
-# order to force the operating system to flush the dirty
-# buffers. Enable this to avoid sudden dirty buffer flushing from
-# impacting read latencies. Almost always a good idea on SSDs; not
-# necessarily on platters.
-trickle_fsync: false
-trickle_fsync_interval_in_kb: 10240
-
-# TCP port, for commands and data
-# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
-storage_port: 7000
-
-# SSL port, for encrypted communication.  Unused unless enabled in
-# encryption_options
-# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
-ssl_storage_port: 7001
-
-# Address or interface to bind to and tell other Cassandra nodes to connect to.
-# You _must_ change this if you want multiple nodes to be able to communicate!
-#
-# Set listen_address OR listen_interface, not both.
-#
-# Leaving it blank leaves it up to InetAddress.getLocalHost(). This
-# will always do the Right Thing _if_ the node is properly configured
-# (hostname, name resolution, etc), and the Right Thing is to use the
-# address associated with the hostname (it might not be).
-#
-# Setting listen_address to 0.0.0.0 is always wrong.
-#
-# listen_address: localhost
-
-# Set listen_address OR listen_interface, not both. Interfaces must correspond
-# to a single address, IP aliasing is not supported.
-# listen_interface: eth0
-
-# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address
-# you can specify which should be chosen using listen_interface_prefer_ipv6. If false the first ipv4
-# address will be used. If true the first ipv6 address will be used. Defaults to false preferring
-# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6.
-# listen_interface_prefer_ipv6: false
-
-# Address to broadcast to other Cassandra nodes
-# Leaving this blank will set it to the same value as listen_address
-# broadcast_address: 1.2.3.4
-
-# When using multiple physical network interfaces, set this
-# to true to listen on broadcast_address in addition to
-# the listen_address, allowing nodes to communicate in both
-# interfaces.
-# Ignore this property if the network configuration automatically
-# routes  between the public and private networks such as EC2.
-# listen_on_broadcast_address: false
-
-# Internode authentication backend, implementing IInternodeAuthenticator;
-# used to allow/disallow connections from peer nodes.
-# internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator
-
-# Whether to start the native transport server.
-# Please note that the address on which the native transport is bound is the
-# same as the rpc_address. The port however is different and specified below.
-start_native_transport: true
-# port for the CQL native transport to listen for clients on
-# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
-native_transport_port: 9042
-# Enabling native transport encryption in client_encryption_options allows you to either use
-# encryption for the standard port or to use a dedicated, additional port along with the unencrypted
-# standard native_transport_port.
-# Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption
-# for native_transport_port. Setting native_transport_port_ssl to a different value
-# from native_transport_port will use encryption for native_transport_port_ssl while
-# keeping native_transport_port unencrypted.
-# native_transport_port_ssl: 9142
-# The maximum threads for handling requests when the native transport is used.
-# This is similar to rpc_max_threads though the default differs slightly (and
-# there is no native_transport_min_threads, idle threads will always be stopped
-# after 30 seconds).
-# native_transport_max_threads: 128
-#
-# The maximum size of allowed frame. Frame (requests) larger than this will
-# be rejected as invalid. The default is 256MB. If you're changing this parameter,
-# you may want to adjust max_value_size_in_mb accordingly. This should be positive and less than 2048.
-# native_transport_max_frame_size_in_mb: 256
-
-# The maximum number of concurrent client connections.
-# The default is -1, which means unlimited.
-# native_transport_max_concurrent_connections: -1
-
-# The maximum number of concurrent client connections per source ip.
-# The default is -1, which means unlimited.
-# native_transport_max_concurrent_connections_per_ip: -1
-
-# Whether to start the thrift rpc server.
-start_rpc: false
-
-# The address or interface to bind the Thrift RPC service and native transport
-# server to.
-#
-# Set rpc_address OR rpc_interface, not both.
-#
-# Leaving rpc_address blank has the same effect as on listen_address
-# (i.e. it will be based on the configured hostname of the node).
-#
-# Note that unlike listen_address, you can specify 0.0.0.0, but you must also
-# set broadcast_rpc_address to a value other than 0.0.0.0.
-#
-# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
-# rpc_address: localhost
-
-# Set rpc_address OR rpc_interface, not both. Interfaces must correspond
-# to a single address, IP aliasing is not supported.
-# rpc_interface: eth1
-
-# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address
-# you can specify which should be chosen using rpc_interface_prefer_ipv6. If false the first ipv4
-# address will be used. If true the first ipv6 address will be used. Defaults to false preferring
-# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6.
-# rpc_interface_prefer_ipv6: false
-
-# port for Thrift to listen for clients on
-rpc_port: 9160
-
-# RPC address to broadcast to drivers and other Cassandra nodes. This cannot
-# be set to 0.0.0.0. If left blank, this will be set to the value of
-# rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must
-# be set.
-# broadcast_rpc_address: 1.2.3.4
-
-# enable or disable keepalive on rpc/native connections
-rpc_keepalive: true
-
-# Cassandra provides two out-of-the-box options for the RPC Server:
-#
-# sync
-#   One thread per thrift connection. For a very large number of clients, memory
-#   will be your limiting factor. On a 64 bit JVM, 180KB is the minimum stack size
-#   per thread, and that will correspond to your use of virtual memory (but physical memory
-#   may be limited depending on use of stack space).
-#
-# hsha
-#   Stands for "half synchronous, half asynchronous." All thrift clients are handled
-#   asynchronously using a small number of threads that does not vary with the amount
-#   of thrift clients (and thus scales well to many clients). The rpc requests are still
-#   synchronous (one thread per active request). If hsha is selected then it is essential
-#   that rpc_max_threads is changed from the default value of unlimited.
-#
-# The default is sync because on Windows hsha is about 30% slower.  On Linux,
-# sync/hsha performance is about the same, with hsha of course using less memory.
-#
-# Alternatively,  can provide your own RPC server by providing the fully-qualified class name
-# of an o.a.c.t.TServerFactory that can create an instance of it.
-rpc_server_type: sync
-
-# Uncomment rpc_min|max_thread to set request pool size limits.
-#
-# Regardless of your choice of RPC server (see above), the number of maximum requests in the
-# RPC thread pool dictates how many concurrent requests are possible (but if you are using the sync
-# RPC server, it also dictates the number of clients that can be connected at all).
-#
-# The default is unlimited and thus provides no protection against clients overwhelming the server. You are
-# encouraged to set a maximum that makes sense for you in production, but do keep in mind that
-# rpc_max_threads represents the maximum number of client requests this server may execute concurrently.
-#
-# rpc_min_threads: 16
-# rpc_max_threads: 2048
-
-# uncomment to set socket buffer sizes on rpc connections
-# rpc_send_buff_size_in_bytes:
-# rpc_recv_buff_size_in_bytes:
-
-# Uncomment to set socket buffer size for internode communication
-# Note that when setting this, the buffer size is limited by net.core.wmem_max
-# and when not setting it it is defined by net.ipv4.tcp_wmem
-# See also:
-# /proc/sys/net/core/wmem_max
-# /proc/sys/net/core/rmem_max
-# /proc/sys/net/ipv4/tcp_wmem
-# /proc/sys/net/ipv4/tcp_wmem
-# and 'man tcp'
-# internode_send_buff_size_in_bytes:
-
-# Uncomment to set socket buffer size for internode communication
-# Note that when setting this, the buffer size is limited by net.core.wmem_max
-# and when not setting it it is defined by net.ipv4.tcp_wmem
-# internode_recv_buff_size_in_bytes:
-
-# Frame size for thrift (maximum message length).
-thrift_framed_transport_size_in_mb: 15
-
-# Set to true to have Cassandra create a hard link to each sstable
-# flushed or streamed locally in a backups/ subdirectory of the
-# keyspace data.  Removing these links is the operator's
-# responsibility.
-incremental_backups: false
-
-# Whether or not to take a snapshot before each compaction.  Be
-# careful using this option, since Cassandra won't clean up the
-# snapshots for you.  Mostly useful if you're paranoid when there
-# is a data format change.
-snapshot_before_compaction: false
-
-# Whether or not a snapshot is taken of the data before keyspace truncation
-# or dropping of column families. The STRONGLY advised default of true 
-# should be used to provide data safety. If you set this flag to false, you will
-# lose data on truncation or drop.
-auto_snapshot: true
-
-# Granularity of the collation index of rows within a partition.
-# Increase if your rows are large, or if you have a very large
-# number of rows per partition.  The competing goals are these:
-#
-# - a smaller granularity means more index entries are generated
-#   and looking up rows within the partition by collation column
-#   is faster
-# - but, Cassandra will keep the collation index in memory for hot
-#   rows (as part of the key cache), so a larger granularity means
-#   you can cache more hot rows
-column_index_size_in_kb: 64
-
-# Per sstable indexed key cache entries (the collation index in memory
-# mentioned above) exceeding this size will not be held on heap.
-# This means that only partition information is held on heap and the
-# index entries are read from disk.
-#
-# Note that this size refers to the size of the
-# serialized index information and not the size of the partition.
-column_index_cache_size_in_kb: 2
-
-# Number of simultaneous compactions to allow, NOT including
-# validation "compactions" for anti-entropy repair.  Simultaneous
-# compactions can help preserve read performance in a mixed read/write
-# workload, by mitigating the tendency of small sstables to accumulate
-# during a single long running compactions. The default is usually
-# fine and if you experience problems with compaction running too
-# slowly or too fast, you should look at
-# compaction_throughput_mb_per_sec first.
-#
-# concurrent_compactors defaults to the smaller of (number of disks,
-# number of cores), with a minimum of 2 and a maximum of 8.
-# 
-# If your data directories are backed by SSD, you should increase this
-# to the number of cores.
-#concurrent_compactors: 1
-
-# Throttles compaction to the given total throughput across the entire
-# system. The faster you insert data, the faster you need to compact in
-# order to keep the sstable count down, but in general, setting this to
-# 16 to 32 times the rate you are inserting data is more than sufficient.
-# Setting this to 0 disables throttling. Note that this account for all types
-# of compaction, including validation compaction.
-compaction_throughput_mb_per_sec: 16
-
-# When compacting, the replacement sstable(s) can be opened before they
-# are completely written, and used in place of the prior sstables for
-# any range that has been written. This helps to smoothly transfer reads 
-# between the sstables, reducing page cache churn and keeping hot rows hot
-sstable_preemptive_open_interval_in_mb: 50
-
-# Throttles all outbound streaming file transfers on this node to the
-# given total throughput in Mbps. This is necessary because Cassandra does
-# mostly sequential IO when streaming data during bootstrap or repair, which
-# can lead to saturating the network connection and degrading rpc performance.
-# When unset, the default is 200 Mbps or 25 MB/s.
-# stream_throughput_outbound_megabits_per_sec: 200
-
-# Throttles all streaming file transfer between the datacenters,
-# this setting allows users to throttle inter dc stream throughput in addition
-# to throttling all network stream traffic as configured with
-# stream_throughput_outbound_megabits_per_sec
-# When unset, the default is 200 Mbps or 25 MB/s
-# inter_dc_stream_throughput_outbound_megabits_per_sec: 200
-
-# How long the coordinator should wait for read operations to complete
-read_request_timeout_in_ms: 5000
-# How long the coordinator should wait for seq or index scans to complete
-range_request_timeout_in_ms: 10000
-# How long the coordinator should wait for writes to complete
-write_request_timeout_in_ms: 2000
-# How long the coordinator should wait for counter writes to complete
-counter_write_request_timeout_in_ms: 5000
-# How long a coordinator should continue to retry a CAS operation
-# that contends with other proposals for the same row
-cas_contention_timeout_in_ms: 1000
-# How long the coordinator should wait for truncates to complete
-# (This can be much longer, because unless auto_snapshot is disabled
-# we need to flush first so we can snapshot before removing the data.)
-truncate_request_timeout_in_ms: 60000
-# The default timeout for other, miscellaneous operations
-request_timeout_in_ms: 10000
-
-# How long before a node logs slow queries. Select queries that take longer than
-# this timeout to execute, will generate an aggregated log message, so that slow queries
-# can be identified. Set this value to zero to disable slow query logging.
-slow_query_log_timeout_in_ms: 500
-
-# Enable operation timeout information exchange between nodes to accurately
-# measure request timeouts.  If disabled, replicas will assume that requests
-# were forwarded to them instantly by the coordinator, which means that
-# under overload conditions we will waste that much extra time processing 
-# already-timed-out requests.
-#
-# Warning: before enabling this property make sure to ntp is installed
-# and the times are synchronized between the nodes.
-cross_node_timeout: false
-
-# Set keep-alive period for streaming
-# This node will send a keep-alive message periodically with this period.
-# If the node does not receive a keep-alive message from the peer for
-# 2 keep-alive cycles the stream session times out and fail
-# Default value is 300s (5 minutes), which means stalled stream
-# times out in 10 minutes by default
-# streaming_keep_alive_period_in_secs: 300
-
-# phi value that must be reached for a host to be marked down.
-# most users should never need to adjust this.
-# phi_convict_threshold: 8
-
-# endpoint_snitch -- Set this to a class that implements
-# IEndpointSnitch.  The snitch has two functions:
-#
-# - it teaches Cassandra enough about your network topology to route
-#   requests efficiently
-# - it allows Cassandra to spread replicas around your cluster to avoid
-#   correlated failures. It does this by grouping machines into
-#   "datacenters" and "racks."  Cassandra will do its best not to have
-#   more than one replica on the same "rack" (which may not actually
-#   be a physical location)
-#
-# CASSANDRA WILL NOT ALLOW YOU TO SWITCH TO AN INCOMPATIBLE SNITCH
-# ONCE DATA IS INSERTED INTO THE CLUSTER.  This would cause data loss.
-# This means that if you start with the default SimpleSnitch, which
-# locates every node on "rack1" in "datacenter1", your only options
-# if you need to add another datacenter are GossipingPropertyFileSnitch
-# (and the older PFS).  From there, if you want to migrate to an
-# incompatible snitch like Ec2Snitch you can do it by adding new nodes
-# under Ec2Snitch (which will locate them in a new "datacenter") and
-# decommissioning the old ones.
-#
-# Out of the box, Cassandra provides:
-#
-# SimpleSnitch:
-#    Treats Strategy order as proximity. This can improve cache
-#    locality when disabling read repair.  Only appropriate for
-#    single-datacenter deployments.
-#
-# GossipingPropertyFileSnitch
-#    This should be your go-to snitch for production use.  The rack
-#    and datacenter for the local node are defined in
-#    cassandra-rackdc.properties and propagated to other nodes via
-#    gossip.  If cassandra-topology.properties exists, it is used as a
-#    fallback, allowing migration from the PropertyFileSnitch.
-#
-# PropertyFileSnitch:
-#    Proximity is determined by rack and data center, which are
-#    explicitly configured in cassandra-topology.properties.
-#
-# Ec2Snitch:
-#    Appropriate for EC2 deployments in a single Region. Loads Region
-#    and Availability Zone information from the EC2 API. The Region is
-#    treated as the datacenter, and the Availability Zone as the rack.
-#    Only private IPs are used, so this will not work across multiple
-#    Regions.
-#
-# Ec2MultiRegionSnitch:
-#    Uses public IPs as broadcast_address to allow cross-region
-#    connectivity.  (Thus, you should set seed addresses to the public
-#    IP as well.) You will need to open the storage_port or
-#    ssl_storage_port on the public IP firewall.  (For intra-Region
-#    traffic, Cassandra will switch to the private IP after
-#    establishing a connection.)
-#
-# RackInferringSnitch:
-#    Proximity is determined by rack and data center, which are
-#    assumed to correspond to the 3rd and 2nd octet of each node's IP
-#    address, respectively.  Unless this happens to match your
-#    deployment conventions, this is best used as an example of
-#    writing a custom Snitch class and is provided in that spirit.
-#
-# You can use a custom Snitch by setting this to the full class name
-# of the snitch, which will be assumed to be on your classpath.
-endpoint_snitch: SimpleSnitch
-
-# controls how often to perform the more expensive part of host score
-# calculation
-dynamic_snitch_update_interval_in_ms: 100 
-# controls how often to reset all host scores, allowing a bad host to
-# possibly recover
-dynamic_snitch_reset_interval_in_ms: 600000
-# if set greater than zero and read_repair_chance is < 1.0, this will allow
-# 'pinning' of replicas to hosts in order to increase cache capacity.
-# The badness threshold will control how much worse the pinned host has to be
-# before the dynamic snitch will prefer other replicas over it.  This is
-# expressed as a double which represents a percentage.  Thus, a value of
-# 0.2 means Cassandra would continue to prefer the static snitch values
-# until the pinned host was 20% worse than the fastest.
-dynamic_snitch_badness_threshold: 0.1
-
-# request_scheduler -- Set this to a class that implements
-# RequestScheduler, which will schedule incoming client requests
-# according to the specific policy. This is useful for multi-tenancy
-# with a single Cassandra cluster.
-# NOTE: This is specifically for requests from the client and does
-# not affect inter node communication.
-# org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place
-# org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of
-# client requests to a node with a separate queue for each
-# request_scheduler_id. The scheduler is further customized by
-# request_scheduler_options as described below.
-request_scheduler: org.apache.cassandra.scheduler.NoScheduler
-
-# Scheduler Options vary based on the type of scheduler
-#
-# NoScheduler
-#   Has no options
-#
-# RoundRobin
-#   throttle_limit
-#     The throttle_limit is the number of in-flight
-#     requests per client.  Requests beyond 
-#     that limit are queued up until
-#     running requests can complete.
-#     The value of 80 here is twice the number of
-#     concurrent_reads + concurrent_writes.
-#   default_weight
-#     default_weight is optional and allows for
-#     overriding the default which is 1.
-#   weights
-#     Weights are optional and will default to 1 or the
-#     overridden default_weight. The weight translates into how
-#     many requests are handled during each turn of the
-#     RoundRobin, based on the scheduler id.
-#
-# request_scheduler_options:
-#    throttle_limit: 80
-#    default_weight: 5
-#    weights:
-#      Keyspace1: 1
-#      Keyspace2: 5
-
-# request_scheduler_id -- An identifier based on which to perform
-# the request scheduling. Currently the only valid option is keyspace.
-# request_scheduler_id: keyspace
-
-# Enable or disable inter-node encryption
-# JVM defaults for supported SSL socket protocols and cipher suites can
-# be replaced using custom encryption options. This is not recommended
-# unless you have policies in place that dictate certain settings, or
-# need to disable vulnerable ciphers or protocols in case the JVM cannot
-# be updated.
-# FIPS compliant settings can be configured at JVM level and should not
-# involve changing encryption settings here:
-# https://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/FIPS.html
-# *NOTE* No custom encryption options are enabled at the moment
-# The available internode options are : all, none, dc, rack
-#
-# If set to dc cassandra will encrypt the traffic between the DCs
-# If set to rack cassandra will encrypt the traffic between the racks
-#
-# The passwords used in these options must match the passwords used when generating
-# the keystore and truststore.  For instructions on generating these files, see:
-# http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore
-#
-server_encryption_options:
-    internode_encryption: none
-    keystore: conf/.keystore
-    keystore_password: cassandra
-    truststore: conf/.truststore
-    truststore_password: cassandra
-    # More advanced defaults below:
-    # protocol: TLS
-    # algorithm: SunX509
-    # store_type: JKS
-    # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA]
-    # require_client_auth: false
-    # require_endpoint_verification: false
-
-# enable or disable client/server encryption.
-client_encryption_options:
-    enabled: false
-    # If enabled and optional is set to true encrypted and unencrypted connections are handled.
-    optional: false
-    keystore: conf/.keystore
-    keystore_password: cassandra
-    # require_client_auth: false
-    # Set trustore and truststore_password if require_client_auth is true
-    # truststore: conf/.truststore
-    # truststore_password: cassandra
-    # More advanced defaults below:
-    # protocol: TLS
-    # algorithm: SunX509
-    # store_type: JKS
-    # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA]
-
-# internode_compression controls whether traffic between nodes is
-# compressed.
-# Can be:
-#
-# all
-#   all traffic is compressed
-#
-# dc
-#   traffic between different datacenters is compressed
-#
-# none
-#   nothing is compressed.
-internode_compression: dc
-
-# Enable or disable tcp_nodelay for inter-dc communication.
-# Disabling it will result in larger (but fewer) network packets being sent,
-# reducing overhead from the TCP protocol itself, at the cost of increasing
-# latency if you block for cross-datacenter responses.
-inter_dc_tcp_nodelay: false
-
-# TTL for different trace types used during logging of the repair process.
-tracetype_query_ttl: 86400
-tracetype_repair_ttl: 604800
-
-# By default, Cassandra logs GC Pauses greater than 200 ms at INFO level
-# This threshold can be adjusted to minimize logging if necessary
-# gc_log_threshold_in_ms: 200
-
-# If unset, all GC Pauses greater than gc_log_threshold_in_ms will log at
-# INFO level
-# UDFs (user defined functions) are disabled by default.
-# As of Cassandra 3.0 there is a sandbox in place that should prevent execution of evil code.
-enable_user_defined_functions: false
-
-# Enables scripted UDFs (JavaScript UDFs).
-# Java UDFs are always enabled, if enable_user_defined_functions is true.
-# Enable this option to be able to use UDFs with "language javascript" or any custom JSR-223 provider.
-# This option has no effect, if enable_user_defined_functions is false.
-enable_scripted_user_defined_functions: false
-
-# The default Windows kernel timer and scheduling resolution is 15.6ms for power conservation.
-# Lowering this value on Windows can provide much tighter latency and better throughput, however
-# some virtualized environments may see a negative performance impact from changing this setting
-# below their system default. The sysinternals 'clockres' tool can confirm your system's default
-# setting.
-windows_timer_interval: 1
-
-
-# Enables encrypting data at-rest (on disk). Different key providers can be plugged in, but the default reads from
-# a JCE-style keystore. A single keystore can hold multiple keys, but the one referenced by
-# the "key_alias" is the only key that will be used for encrypt opertaions; previously used keys
-# can still (and should!) be in the keystore and will be used on decrypt operations
-# (to handle the case of key rotation).
-#
-# It is strongly recommended to download and install Java Cryptography Extension (JCE)
-# Unlimited Strength Jurisdiction Policy Files for your version of the JDK.
-# (current link: http://www.oracle.com/technetwork/java/javase/downloads/jce8-download-2133166.html)
-#
-# Currently, only the following file types are supported for transparent data encryption, although
-# more are coming in future cassandra releases: commitlog, hints
-transparent_data_encryption_options:
-    enabled: false
-    chunk_length_kb: 64
-    cipher: AES/CBC/PKCS5Padding
-    key_alias: testing:1
-    # CBC IV length for AES needs to be 16 bytes (which is also the default size)
-    # iv_length: 16
-    key_provider: 
-      - class_name: org.apache.cassandra.security.JKSKeyProvider
-        parameters: 
-          - keystore: conf/.keystore
-            keystore_password: cassandra
-            store_type: JCEKS
-            key_password: cassandra
-
-
-#####################
-# SAFETY THRESHOLDS #
-#####################
-
-# When executing a scan, within or across a partition, we need to keep the
-# tombstones seen in memory so we can return them to the coordinator, which
-# will use them to make sure other replicas also know about the deleted rows.
-# With workloads that generate a lot of tombstones, this can cause performance
-# problems and even exaust the server heap.
-# (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets)
-# Adjust the thresholds here if you understand the dangers and want to
-# scan more tombstones anyway.  These thresholds may also be adjusted at runtime
-# using the StorageService mbean.
-tombstone_warn_threshold: 1000
-tombstone_failure_threshold: 100000
-
-# Log WARN on any multiple-partition batch size exceeding this value. 5kb per batch by default.
-# Caution should be taken on increasing the size of this threshold as it can lead to node instability.
-batch_size_warn_threshold_in_kb: 5
-
-# Fail any multiple-partition batch exceeding this value. 50kb (10x warn threshold) by default.
-batch_size_fail_threshold_in_kb: 50
-
-# Log WARN on any batches not of type LOGGED than span across more partitions than this limit
-unlogged_batch_across_partitions_warn_threshold: 10
-
-# Log a warning when compacting partitions larger than this value
-compaction_large_partition_warning_threshold_mb: 100
-
-# GC Pauses greater than gc_warn_threshold_in_ms will be logged at WARN level
-# Adjust the threshold based on your application throughput requirement
-# By default, Cassandra logs GC Pauses greater than 200 ms at INFO level
-gc_warn_threshold_in_ms: 1000
-
-# Maximum size of any value in SSTables. Safety measure to detect SSTable corruption
-# early. Any value size larger than this threshold will result into marking an SSTable
-# as corrupted. This should be positive and less than 2048.
-# max_value_size_in_mb: 256
-
-# Back-pressure settings #
-# If enabled, the coordinator will apply the back-pressure strategy specified below to each mutation
-# sent to replicas, with the aim of reducing pressure on overloaded replicas.
-back_pressure_enabled: false
-# The back-pressure strategy applied.
-# The default implementation, RateBasedBackPressure, takes three arguments:
-# high ratio, factor, and flow type, and uses the ratio between incoming mutation responses and outgoing mutation requests.
-# If below high ratio, outgoing mutations are rate limited according to the incoming rate decreased by the given factor;
-# if above high ratio, the rate limiting is increased by the given factor;
-# such factor is usually best configured between 1 and 10, use larger values for a faster recovery
-# at the expense of potentially more dropped mutations;
-# the rate limiting is applied according to the flow type: if FAST, it's rate limited at the speed of the fastest replica,
-# if SLOW at the speed of the slowest one.
-# New strategies can be added. Implementors need to implement org.apache.cassandra.net.BackpressureStrategy and
-# provide a public constructor accepting a Map<String, Object>.
-back_pressure_strategy:
-    - class_name: org.apache.cassandra.net.RateBasedBackPressure
-      parameters:
-        - high_ratio: 0.90
-          factor: 5
-          flow: FAST
-
-# Coalescing Strategies #
-# Coalescing multiples messages turns out to significantly boost message processing throughput (think doubling or more).
-# On bare metal, the floor for packet processing throughput is high enough that many applications won't notice, but in
-# virtualized environments, the point at which an application can be bound by network packet processing can be
-# surprisingly low compared to the throughput of task processing that is possible inside a VM. It's not that bare metal
-# doesn't benefit from coalescing messages, it's that the number of packets a bare metal network interface can process
-# is sufficient for many applications such that no load starvation is experienced even without coalescing.
-# There are other benefits to coalescing network messages that are harder to isolate with a simple metric like messages
-# per second. By coalescing multiple tasks together, a network thread can process multiple messages for the cost of one
-# trip to read from a socket, and all the task submission work can be done at the same time reducing context switching
-# and increasing cache friendliness of network message processing.
-# See CASSANDRA-8692 for details.
-
-# Strategy to use for coalescing messages in OutboundTcpConnection.
-# Can be fixed, movingaverage, timehorizon, disabled (default).
-# You can also specify a subclass of CoalescingStrategies.CoalescingStrategy by name.
-# otc_coalescing_strategy: DISABLED
-
-# How many microseconds to wait for coalescing. For fixed strategy this is the amount of time after the first
-# message is received before it will be sent with any accompanying messages. For moving average this is the
-# maximum amount of time that will be waited as well as the interval at which messages must arrive on average
-# for coalescing to be enabled.
-# otc_coalescing_window_us: 200
-
-# Do not try to coalesce messages if we already got that many messages. This should be more than 2 and less than 128.
-# otc_coalescing_enough_coalesced_messages: 8
-
-# How many milliseconds to wait between two expiration runs on the backlog (queue) of the OutboundTcpConnection.
-# Expiration is done if messages are piling up in the backlog. Droppable messages are expired to free the memory
-# taken by expired messages. The interval should be between 0 and 1000, and in most installations the default value
-# will be appropriate. A smaller value could potentially expire messages slightly sooner at the expense of more CPU
-# time and queue contention while iterating the backlog of messages.
-# An interval of 0 disables any wait time, which is the behavior of former Cassandra versions.
-#
-# otc_backlog_expiration_interval_ms: 200
-`
diff --git a/pkg/cmd/roachprod/main.go b/pkg/cmd/roachprod/main.go
index 5a48649b4edd..8b30d422545f 100644
--- a/pkg/cmd/roachprod/main.go
+++ b/pkg/cmd/roachprod/main.go
@@ -87,7 +87,6 @@ var (
 	listDetails       bool
 	listJSON          bool
 	listMine          bool
-	clusterType       = "cockroach"
 	secure            = false
 	nodeEnv           = []string{
 		"COCKROACH_ENABLE_RPC_COMPRESSION=false",
@@ -158,22 +157,15 @@ Available clusters:
 		return nil, err
 	}
 
-	switch clusterType {
-	case "cockroach":
-		c.Impl = install.Cockroach{}
-		if numRacks > 0 {
-			for i := range c.Localities {
-				rack := fmt.Sprintf("rack=%d", i%numRacks)
-				if c.Localities[i] != "" {
-					rack = "," + rack
-				}
-				c.Localities[i] += rack
+	c.Impl = install.Cockroach{}
+	if numRacks > 0 {
+		for i := range c.Localities {
+			rack := fmt.Sprintf("rack=%d", i%numRacks)
+			if c.Localities[i] != "" {
+				rack = "," + rack
 			}
+			c.Localities[i] += rack
 		}
-	case "cassandra":
-		c.Impl = install.Cassandra{}
-	default:
-		return nil, fmt.Errorf("unknown cluster type: %s", clusterType)
 	}
 
 	nodes, err := install.ListNodes(nodeNames, len(c.VMs))
@@ -1995,8 +1987,6 @@ func main() {
 				&nodeArgs, "args", "a", nil, "node arguments")
 			cmd.Flags().StringArrayVarP(
 				&nodeEnv, "env", "e", nodeEnv, "node environment variables")
-			cmd.Flags().StringVarP(
-				&clusterType, "type", "t", clusterType, `cluster type ("cockroach" or "cassandra")`)
 			cmd.Flags().BoolVar(
 				&install.StartOpts.Encrypt, "encrypt", encrypt, "start nodes with encryption at rest turned on")
 			cmd.Flags().BoolVar(
diff --git a/pkg/cmd/roachprod/ssh/BUILD.bazel b/pkg/cmd/roachprod/ssh/BUILD.bazel
index e411e183868e..72504464df19 100644
--- a/pkg/cmd/roachprod/ssh/BUILD.bazel
+++ b/pkg/cmd/roachprod/ssh/BUILD.bazel
@@ -8,13 +8,4 @@ go_library(
     ],
     importpath = "github.com/cockroachdb/cockroach/pkg/cmd/roachprod/ssh",
     visibility = ["//visibility:public"],
-    deps = [
-        "//pkg/cmd/roachprod/config",
-        "//pkg/util/syncutil",
-        "@com_github_cockroachdb_errors//:errors",
-        "@com_github_cockroachdb_errors//oserror",
-        "@org_golang_x_crypto//ssh",
-        "@org_golang_x_crypto//ssh/agent",
-        "@org_golang_x_crypto//ssh/knownhosts",
-    ],
 )
diff --git a/pkg/cmd/roachprod/ssh/ssh.go b/pkg/cmd/roachprod/ssh/ssh.go
index 2da9aba7e8fe..93cfb8947e02 100644
--- a/pkg/cmd/roachprod/ssh/ssh.go
+++ b/pkg/cmd/roachprod/ssh/ssh.go
@@ -10,170 +10,7 @@
 
 package ssh
 
-import (
-	"fmt"
-	"io"
-	"io/ioutil"
-	"log"
-	"net"
-	"os"
-	"path/filepath"
-	"strings"
-	"sync"
-	"time"
-
-	"github.com/cockroachdb/cockroach/pkg/cmd/roachprod/config"
-	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
-	"github.com/cockroachdb/errors"
-	"github.com/cockroachdb/errors/oserror"
-	"golang.org/x/crypto/ssh"
-	"golang.org/x/crypto/ssh/agent"
-	"golang.org/x/crypto/ssh/knownhosts"
-)
-
-var knownHosts ssh.HostKeyCallback
-var knownHostsOnce sync.Once
-
-// InsecureIgnoreHostKey TODO(peter): document
-var InsecureIgnoreHostKey bool
-
-func getKnownHosts() ssh.HostKeyCallback {
-	knownHostsOnce.Do(func() {
-		var err error
-		if InsecureIgnoreHostKey {
-			knownHosts = ssh.InsecureIgnoreHostKey()
-		} else {
-			knownHosts, err = knownhosts.New(filepath.Join(os.Getenv("HOME"), ".ssh", "known_hosts"))
-			if err != nil {
-				log.Fatal(err)
-			}
-		}
-	})
-	return knownHosts
-}
-
-func getSSHAgentSigners() []ssh.Signer {
-	const authSockEnv = "SSH_AUTH_SOCK"
-	agentSocket := os.Getenv(authSockEnv)
-	if agentSocket == "" {
-		return nil
-	}
-	sock, err := net.Dial("unix", agentSocket)
-	if err != nil {
-		log.Printf("SSH_AUTH_SOCK set but unable to connect to agent: %s", err)
-		return nil
-	}
-	agent := agent.NewClient(sock)
-	signers, err := agent.Signers()
-	if err != nil {
-		log.Printf("unable to retrieve keys from agent: %s", err)
-		return nil
-	}
-	return signers
-}
-
-func getSSHKeySigner(path string, haveAgent bool) ssh.Signer {
-	key, err := ioutil.ReadFile(path)
-	if err != nil {
-		if !oserror.IsNotExist(err) {
-			log.Printf("unable to read SSH key %q: %s", path, err)
-		}
-		return nil
-	}
-
-	signer, err := ssh.ParsePrivateKey(key)
-	if err != nil {
-		if strings.Contains(err.Error(), "cannot decode encrypted private key") {
-			if !haveAgent {
-				log.Printf(
-					"skipping encrypted SSH key %q; if necessary, add the key to your SSH agent", path)
-			}
-		} else {
-			log.Printf("unable to parse SSH key %q: %s", path, err)
-		}
-		return nil
-	}
-	return signer
-}
-
-func getDefaultSSHKeySigners(haveAgent bool) []ssh.Signer {
-	var signers []ssh.Signer
-	for _, name := range []string{"id_rsa", "google_compute_engine"} {
-		s := getSSHKeySigner(filepath.Join(config.OSUser.HomeDir, ".ssh", name), haveAgent)
-		if s != nil {
-			signers = append(signers, s)
-		}
-	}
-	return signers
-}
-
-func newSSHClient(user, host string) (*ssh.Client, net.Conn, error) {
-	config := &ssh.ClientConfig{
-		User:            user,
-		Auth:            []ssh.AuthMethod{ssh.PublicKeys(sshState.signers...)},
-		HostKeyCallback: getKnownHosts(),
-	}
-	config.SetDefaults()
-
-	addr := fmt.Sprintf("%s:22", host)
-	conn, err := net.DialTimeout("tcp", addr, 30*time.Second)
-	if err != nil {
-		return nil, nil, err
-	}
-	c, chans, reqs, err := ssh.NewClientConn(conn, addr, config)
-	if err != nil {
-		return nil, nil, err
-	}
-	return ssh.NewClient(c, chans, reqs), conn, nil
-}
-
-type sshClient struct {
-	syncutil.Mutex
-	*ssh.Client
-}
-
-var sshState = struct {
-	signers     []ssh.Signer
-	signersInit sync.Once
-
-	clients  map[string]*sshClient
-	clientMu syncutil.Mutex
-}{
-	clients: map[string]*sshClient{},
-}
-
-// NewSSHSession TODO(peter): document
-func NewSSHSession(user, host string) (*ssh.Session, error) {
-	if host == "127.0.0.1" || host == "localhost" {
-		return nil, errors.New("unable to ssh to localhost; file a bug")
-	}
-
-	sshState.clientMu.Lock()
-	target := fmt.Sprintf("%s@%s", user, host)
-	client := sshState.clients[target]
-	if client == nil {
-		client = &sshClient{}
-		sshState.clients[target] = client
-	}
-	sshState.clientMu.Unlock()
-
-	sshState.signersInit.Do(func() {
-		sshState.signers = append(sshState.signers, getSSHAgentSigners()...)
-		haveAgentSigner := len(sshState.signers) > 0
-		sshState.signers = append(sshState.signers, getDefaultSSHKeySigners(haveAgentSigner)...)
-	})
-
-	client.Lock()
-	defer client.Unlock()
-	if client.Client == nil {
-		var err error
-		client.Client, _, err = newSSHClient(user, host)
-		if err != nil {
-			return nil, err
-		}
-	}
-	return client.NewSession()
-}
+import "io"
 
 // ProgressWriter TODO(peter): document
 type ProgressWriter struct {
@@ -191,3 +28,6 @@ func (p *ProgressWriter) Write(b []byte) (int, error) {
 	}
 	return n, err
 }
+
+// InsecureIgnoreHostKey TODO(peter): document
+var InsecureIgnoreHostKey bool
diff --git a/vendor b/vendor
index 5f02580983f4..17d1c1fc0764 160000
--- a/vendor
+++ b/vendor
@@ -1 +1 @@
-Subproject commit 5f02580983f42ee57ed1158ae27b96f1b6d87c8c
+Subproject commit 17d1c1fc076405f1135f223e02de9bb0ff81eadd