Skip to content

Commit

Permalink
Merge #52622
Browse files Browse the repository at this point in the history
52622: cli: add debug doctor command r=spaskob a=spaskob

This PR introduces two new debug commands:
`/cockroach debug doctor cluster --url=<cluster_conn_string>`
and
`/cockroach debug doctor zipdir <debug-dir>`

which will validate the descriptors of a live cluster or from an unzipped debug
directory respectfully. The commands print out all descriptor ids that were inspected
and the problems with specific ids if found.

Fixes #52077.
Informs #51153.

Release note: none.

Co-authored-by: Spas Bojanov <[email protected]>
  • Loading branch information
craig[bot] and Spas Bojanov committed Aug 20, 2020
2 parents 19aae21 + c2e07e3 commit 24c69e8
Show file tree
Hide file tree
Showing 14 changed files with 486 additions and 15 deletions.
7 changes: 6 additions & 1 deletion pkg/cli/cli_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ func isSQLCommand(args []string) bool {
case "sql", "dump", "workload", "nodelocal", "userfile", "statement-diag":
return true
case "node":
if len(args) == 0 {
if len(args) == 1 {
return false
}
switch args[1] {
Expand All @@ -316,6 +316,11 @@ func isSQLCommand(args []string) bool {
default:
return false
}
case "debug":
if len(args) < 3 {
return false
}
return args[1] == "doctor" && args[2] == "cluster"
default:
return false
}
Expand Down
3 changes: 3 additions & 0 deletions pkg/cli/debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -1250,6 +1250,9 @@ func init() {
debugPebbleCmd.AddCommand(pebbleTool.Commands...)
DebugCmd.AddCommand(debugPebbleCmd)

debugDoctorCmd.AddCommand(debugDoctorCmds...)
DebugCmd.AddCommand(debugDoctorCmd)

f := debugSyncBenchCmd.Flags()
f.IntVarP(&syncBenchOpts.Concurrency, "concurrency", "c", syncBenchOpts.Concurrency,
"number of concurrent writers")
Expand Down
172 changes: 172 additions & 0 deletions pkg/cli/doctor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
// Copyright 2020 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package cli

import (
"bufio"
"database/sql/driver"
hx "encoding/hex"
"fmt"
"io"
"os"
"path"
"strconv"
"strings"

"github.com/cockroachdb/apd/v2"
"github.com/cockroachdb/cockroach/pkg/sql/doctor"
"github.com/cockroachdb/cockroach/pkg/sql/sem/builtins"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/errors"
"github.com/spf13/cobra"
)

var debugDoctorCmd = &cobra.Command{
Use: "doctor [command]",
Short: "run a cockroach doctor tool command",
Long: `
Runs various consistency checks over cockroach internal system tables read from
a live cluster or a unzipped debug zip.
`,
}

var debugDoctorCmds = []*cobra.Command{
doctorZipDirCmd,
doctorClusterCmd,
}

var doctorZipDirCmd = &cobra.Command{
Use: "zipdir <debug_zip_dir>",
Short: "run doctor tool on data from a directory unzipped from debug.zip",
Long: `
Run doctor tool on system data from directory created by unzipping debug.zip.
`,
Args: cobra.ExactArgs(1),
RunE: runZipDirDoctor,
}

var doctorClusterCmd = &cobra.Command{
Use: "cluster --url=<cluster connection string>",
Short: "run doctor tool on live cockroach cluster",
Long: `
Run doctor tool reading system data from a live cluster specified by --url.
`,
Args: cobra.NoArgs,
RunE: MaybeDecorateGRPCError(runClusterDoctor),
}

func wrapExamine(descTable []doctor.DescriptorTableRow) error {
// TODO(spaskob): add --verbose flag.
valid, err := doctor.Examine(descTable, false, os.Stdout)
if err != nil {
return &cliError{exitCode: 2, cause: errors.Wrap(err, "examine failed")}
}
if !valid {
return &cliError{exitCode: 1, cause: errors.New("validation failed")}
}
fmt.Println("No problems found!")
return nil
}

// runClusterDoctor runs the doctors tool reading data from a live cluster.
func runClusterDoctor(cmd *cobra.Command, args []string) (retErr error) {
sqlConn, err := makeSQLClient("cockroach doctor", useSystemDb)
if err != nil {
return errors.Wrap(err, "could not establish connection to cluster")
}
defer sqlConn.Close()

rows, err := sqlConn.Query(`
SELECT id, descriptor, crdb_internal_mvcc_timestamp AS mod_time_logical
FROM system.descriptor
ORDER BY id`,
nil,
)
if err != nil {
return errors.Wrap(err, "could not read system.descriptor")
}

descTable := make([]doctor.DescriptorTableRow, 0)
vals := make([]driver.Value, 3)
for {
if err := rows.Next(vals); err == io.EOF {
break
}
var row doctor.DescriptorTableRow
if id, ok := vals[0].(int64); ok {
row.ID = id
} else {
return errors.Errorf("unexpected value: %T of %v", vals[0], vals[0])
}
if descBytes, ok := vals[1].([]byte); ok {
row.DescBytes = descBytes
} else {
return errors.Errorf("unexpected value: %T of %v", vals[1], vals[1])
}
if vals[2] == nil {
row.ModTime = hlc.Timestamp{WallTime: timeutil.Now().UnixNano()}
} else if mt, ok := vals[2].([]byte); ok {
decimal, _, err := apd.NewFromString(string(mt))
if err != nil {
return err
}
ts, err := tree.DecimalToHLC(decimal)
if err != nil {
return err
}
row.ModTime = ts
} else {
return errors.Errorf("unexpected value: %T of %v", vals[2], vals[2])
}
descTable = append(descTable, row)
}

return wrapExamine(descTable)
}

// runZipDirDoctor runs the doctors tool reading data from a debug zip dir.
func runZipDirDoctor(cmd *cobra.Command, args []string) (retErr error) {
// To make parsing user functions code happy.
_ = builtins.AllBuiltinNames

file, err := os.Open(path.Join(args[0], "system.descriptor.txt"))
if err != nil {
return err
}
defer file.Close()

descTable := make([]doctor.DescriptorTableRow, 0)
sc := bufio.NewScanner(file)
firstLine := true
for sc.Scan() {
if firstLine {
firstLine = false
continue
}
fields := strings.Fields(sc.Text())
last := len(fields) - 1
i, err := strconv.Atoi(fields[0])
if err != nil {
return errors.Errorf("failed to parse descriptor id %s: %v", fields[0], err)
}

descBytes, err := hx.DecodeString(fields[last])
if err != nil {
return errors.Errorf("failed to decode hex descriptor %d: %v", i, err)
}
ts := hlc.Timestamp{WallTime: timeutil.Now().UnixNano()}
descTable = append(descTable, doctor.DescriptorTableRow{ID: int64(i), DescBytes: descBytes, ModTime: ts})
}

return wrapExamine(descTable)
}
61 changes: 61 additions & 0 deletions pkg/cli/doctor_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright 2020 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package cli

import (
"testing"

"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/datadriven"
)

// This test doctoring a secure cluster.
func TestDoctorCluster(t *testing.T) {
defer leaktest.AfterTest(t)()
c := newCLITest(cliTestParams{t: t})
defer c.cleanup()

// Introduce a corruption in the descriptor table by adding a table and
// removing its parent.
c.RunWithArgs([]string{"sql", "-e", `
CREATE TABLE t (id INT);
INSERT INTO system.users VALUES ('node', NULL, true);
GRANT node TO root;
DELETE FROM system.descriptor WHERE id = 50;`,
})

out, err := c.RunWithCapture("debug doctor cluster")
if err != nil {
t.Fatal(err)
}

// Using datadriven allows TESTFLAGS=-rewrite.
datadriven.RunTest(t, "testdata/doctor/testcluster", func(t *testing.T, td *datadriven.TestData) string {
return out
})
}

// This test the operation of zip over secure clusters.
func TestDoctorZipDir(t *testing.T) {
defer leaktest.AfterTest(t)()
c := newCLITest(cliTestParams{t: t, noServer: true})
defer c.cleanup()

out, err := c.RunWithCapture("debug doctor zipdir testdata/doctor/debugzip")
if err != nil {
t.Fatal(err)
}

// Using datadriven allows TESTFLAGS=-rewrite.
datadriven.RunTest(t, "testdata/doctor/testzipdir", func(t *testing.T, td *datadriven.TestData) string {
return out
})
}
4 changes: 3 additions & 1 deletion pkg/cli/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,7 @@ func init() {
debugGossipValuesCmd,
debugTimeSeriesDumpCmd,
debugZipCmd,
doctorClusterCmd,
dumpCmd,
genHAProxyCmd,
initCmd,
Expand Down Expand Up @@ -576,6 +577,7 @@ func init() {
statusNodeCmd,
lsNodesCmd,
debugZipCmd,
doctorClusterCmd,
// If you add something here, make sure the actual implementation
// of the command uses `cmdTimeoutContext(.)` or it will ignore
// the timeout.
Expand Down Expand Up @@ -659,7 +661,7 @@ func init() {
boolFlag(dumpCmd.Flags(), &dumpCtx.dumpAll, cliflags.DumpAll)

// Commands that establish a SQL connection.
sqlCmds := []*cobra.Command{sqlShellCmd, dumpCmd, demoCmd}
sqlCmds := []*cobra.Command{sqlShellCmd, dumpCmd, demoCmd, doctorClusterCmd}
sqlCmds = append(sqlCmds, authCmds...)
sqlCmds = append(sqlCmds, demoCmd.Commands()...)
sqlCmds = append(sqlCmds, stmtDiagCmds...)
Expand Down
Loading

0 comments on commit 24c69e8

Please sign in to comment.