Skip to content

Commit

Permalink
cli: default --cache and --max-sql-memory to 128MiB
Browse files Browse the repository at this point in the history
Previously, these settings defaulted to 1/4 of physical memory. That is
a reasonable default for production, but not for development or an
environment where cockroach is sharing resources with other processes on
the machine. For example, it was previously very easy to run a 5-node
cluster locally on a single machine and have each cockroach node
configured to use 1/4 of physical memory for cache. Leave it running too
long and the cluster will crash.

Using a relatively small cache size by default also avoids the perceived
memory leak where cockroach will grow to fill the cache even when idle
due to the writing of timeseries data.

The downside to this change is that we need to educate users to increase
--cache and --max-sql-memory when deploying to production. This change
adds a warning to the logs when running with the default settings.

  Using the default setting for --cache (128 MiB).
  A significantly larger value is usually needed for good performance.
  If you have a dedicated server a reasonable setting is --cache=25% (4.0 GiB).

A future enhancement can add a warning to the admin UI.

Allow specifying a percentage of physical memory to --cache and
--max-sql-memory. This allows an easy fallback to the previous behavior
by specifying --cache=25% and --max-sql-memory=25%.
  • Loading branch information
petermattis committed Aug 31, 2017
1 parent 6dedb02 commit e12ba3c
Show file tree
Hide file tree
Showing 7 changed files with 90 additions and 33 deletions.
8 changes: 4 additions & 4 deletions pkg/cli/cliflags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,17 +95,17 @@ accept requests.`,
Total size in bytes available for use to store temporary data for SQL
clients, including prepared queries and intermediate data rows during
query execution. Size suffixes are supported (e.g. 1GB and 1GiB). If
left unspecified, defaults to 25% of the physical memory, or 512MB if
the memory size cannot be determined.`,
left unspecified, defaults to 128MiB. A percentage of physical memory
can also be specified (e.g. 25%).`,
}

Cache = FlagInfo{
Name: "cache",
Description: `
Total size in bytes for caches, shared evenly if there are multiple
storage devices. Size suffixes are supported (e.g. 1GB and 1GiB).
If left unspecified, defaults to 25% of the physical memory, or
512MB if the memory size cannot be determined.`,
If left unspecified, defaults to 128MiB. A percentage of physical memory
can also be specified (e.g. 25%).`,
}

ClientHost = FlagInfo{
Expand Down
2 changes: 1 addition & 1 deletion pkg/cli/debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func parseRangeID(arg string) (roachpb.RangeID, error) {
}

func openStore(cmd *cobra.Command, dir string, stopper *stop.Stopper) (*engine.RocksDB, error) {
cache := engine.NewRocksDBCache(512 << 20)
cache := engine.NewRocksDBCache(server.DefaultCacheSize)
defer cache.Release()
maxOpenFiles, err := server.SetOpenFileLimitForOneStore()
if err != nil {
Expand Down
9 changes: 2 additions & 7 deletions pkg/cli/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/cli/cliflags"
"github.com/cockroachdb/cockroach/pkg/security"
"github.com/cockroachdb/cockroach/pkg/util/envutil"
"github.com/cockroachdb/cockroach/pkg/util/humanizeutil"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/log/logflags"
)
Expand Down Expand Up @@ -247,12 +246,8 @@ func init() {
varFlag(f, &serverCfg.JoinList, cliflags.Join)

// Engine flags.
setDefaultSizeParameters(&serverCfg)
cacheSize := humanizeutil.NewBytesValue(&serverCfg.CacheSize)
varFlag(f, cacheSize, cliflags.Cache)

sqlSize := humanizeutil.NewBytesValue(&serverCfg.SQLMemoryPoolSize)
varFlag(f, sqlSize, cliflags.SQLMem)
varFlag(f, cacheSizeValue, cliflags.Cache)
varFlag(f, sqlSizeValue, cliflags.SQLMem)
}

for _, cmd := range certCmds {
Expand Down
2 changes: 1 addition & 1 deletion pkg/cli/interactive_tests/test_sql_mem_monitor.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ send "ulimit -v [ expr {2*$vmem+400} ]\r"
eexpect ":/# "

# Start a server with this limit set. The server will now run in the foreground.
send "$argv start --insecure --no-redirect-stderr -s=path=logs/db \r"
send "$argv start --insecure --max-sql-memory=25% --no-redirect-stderr -s=path=logs/db \r"
eexpect "restarted pre-existing node"
sleep 1

Expand Down
81 changes: 66 additions & 15 deletions pkg/cli/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"path/filepath"
"runtime"
"runtime/pprof"
"strconv"
"strings"
"syscall"
"text/tabwriter"
Expand All @@ -47,6 +48,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/envutil"
"github.com/cockroachdb/cockroach/pkg/util/grpcutil"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/humanizeutil"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/log/logflags"
"github.com/cockroachdb/cockroach/pkg/util/stop"
Expand Down Expand Up @@ -77,21 +79,6 @@ uninitialized, specify the --join flag to point to any healthy node
RunE: MaybeShoutError(MaybeDecorateGRPCError(runStart)),
}

func setDefaultSizeParameters(ctx *server.Config) {
if size, err := server.GetTotalMemory(context.Background()); err == nil {
// Default the cache size to 1/4 of total memory. A larger cache size
// doesn't necessarily improve performance as this is memory that is
// dedicated to uncompressed blocks in RocksDB. A larger value here will
// compete with the OS buffer cache which holds compressed blocks.
ctx.CacheSize = size / 4

// Default the SQL memory pool size to 1/4 of total memory. Again
// we do not want to allow too much lest this will pressure
// against OS buffers and decrease overall client throughput.
ctx.SQLMemoryPoolSize = size / 4
}
}

// maxSizePerProfile is the maximum total size in bytes for profiles per
// profile type.
var maxSizePerProfile = envutil.EnvOrDefaultInt64(
Expand Down Expand Up @@ -276,6 +263,51 @@ func initBlockProfile() {
// can change this.
var ErrorCode = 1

type bytesOrPercentageValue struct {
val *int64
bval *humanizeutil.BytesValue
}

func newBytesOrPercentageValue(v *int64) *bytesOrPercentageValue {
return &bytesOrPercentageValue{
val: v,
bval: humanizeutil.NewBytesValue(v),
}
}

func (b *bytesOrPercentageValue) Set(s string) error {
if strings.HasSuffix(s, "%") {
percent, err := strconv.Atoi(s[:len(s)-1])
if err != nil {
return err
}
if percent < 0 || percent > 99 {
return fmt.Errorf("percentage out of range")
}
size, err := server.GetTotalMemory(context.Background())
if err != nil {
return err
}
s = fmt.Sprint((size * int64(percent)) / 100)
}
return b.bval.Set(s)
}

func (b *bytesOrPercentageValue) Type() string {
return b.bval.Type()
}

func (b *bytesOrPercentageValue) String() string {
return b.bval.String()
}

func (b *bytesOrPercentageValue) IsSet() bool {
return b.bval.IsSet()
}

var cacheSizeValue = newBytesOrPercentageValue(&serverCfg.CacheSize)
var sqlSizeValue = newBytesOrPercentageValue(&serverCfg.SQLMemoryPoolSize)

// runStart starts the cockroach node using --store as the list of
// storage devices ("stores") on this machine and --join as the list
// of other active nodes used to join this node to the cockroach
Expand Down Expand Up @@ -521,6 +553,23 @@ func runStart(cmd *cobra.Command, args []string) error {
return returnErr
}

func maybeWarnCacheSize() {
if cacheSizeValue.IsSet() {
return
}

var buf bytes.Buffer
fmt.Fprintf(&buf, "Using the default setting for --cache (%s).\n", cacheSizeValue)
fmt.Fprintf(&buf, " A significantly larger value is usually needed for good performance.\n")
if size, err := server.GetTotalMemory(context.Background()); err == nil {
fmt.Fprintf(&buf, " If you have a dedicated server a reasonable setting is --cache=25%% (%s).",
humanizeutil.IBytes(size/4))
} else {
fmt.Fprintf(&buf, " If you have a dedicated server a reasonable setting is 25%% of physical memory.")
}
log.Warning(context.Background(), buf.String())
}

// setupAndInitializeLoggingAndProfiling does what it says on the label.
// Prior to this however it determines suitable defaults for the
// logging output directory and the verbosity level of stderr logging.
Expand Down Expand Up @@ -601,6 +650,8 @@ func setupAndInitializeLoggingAndProfiling(ctx context.Context) (*stop.Stopper,
"Check out how to secure your cluster: https://www.cockroachlabs.com/docs/stable/secure-a-cluster.html")
}

maybeWarnCacheSize()

// We log build information to stdout (for the short summary), but also
// to stderr to coincide with the full logs.
info := build.GetInfo()
Expand Down
14 changes: 10 additions & 4 deletions pkg/server/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,15 @@ import (

// Context defaults.
const (
defaultCGroupMemPath = "/sys/fs/cgroup/memory/memory.limit_in_bytes"
defaultCacheSize = 512 << 20 // 512 MB
defaultSQLMemoryPoolSize = 512 << 20 // 512 MB
defaultCGroupMemPath = "/sys/fs/cgroup/memory/memory.limit_in_bytes"
// DefaultCacheSize is the default size of the RocksDB cache. We default the
// cache size and SQL memory pool size to 128 MiB. Larger values might
// provide significantly better performance, but we're not sure what type of
// system we're running on (development or production or some shared
// environment). Production users should almost certainly override these
// settings and we'll warn in the logs about doing so.
DefaultCacheSize = 128 << 20 // 128 MB
defaultSQLMemoryPoolSize = 128 << 20 // 128 MB
defaultScanInterval = 10 * time.Minute
defaultScanMaxIdleTime = 200 * time.Millisecond
defaultMetricsSampleInterval = 10 * time.Second
Expand Down Expand Up @@ -359,7 +365,7 @@ func MakeConfig(st *cluster.Settings) Config {
Config: new(base.Config),
MaxOffset: MaxOffsetType(base.DefaultMaxClockOffset),
Settings: st,
CacheSize: defaultCacheSize,
CacheSize: DefaultCacheSize,
SQLMemoryPoolSize: defaultSQLMemoryPoolSize,
ScanInterval: defaultScanInterval,
ScanMaxIdleTime: defaultScanMaxIdleTime,
Expand Down
7 changes: 6 additions & 1 deletion pkg/util/humanizeutil/humanize.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ var _ pflag.Value = &BytesValue{}

// NewBytesValue creates a new pflag.Value bound to the specified
// int64 variable. It also happens to be a flag.Value.
func NewBytesValue(val *int64) pflag.Value {
func NewBytesValue(val *int64) *BytesValue {
return &BytesValue{val: val}
}

Expand Down Expand Up @@ -108,3 +108,8 @@ func (b *BytesValue) String() string {
// vs 1024.
return IBytes(atomic.LoadInt64(b.val))
}

// IsSet returns true iff Set has successfully been called.
func (b *BytesValue) IsSet() bool {
return b.isSet
}

0 comments on commit e12ba3c

Please sign in to comment.