From 3ea73f47f81813072120b7d39571cc21dd22a6b2 Mon Sep 17 00:00:00 2001 From: justinsb Date: Sun, 9 Feb 2025 16:18:18 -0500 Subject: [PATCH] Better dumping via private IP when bastion is not set Previously this would always fail in a confusing way, regardless of whether we had connectivity, because we tried to connect to an empty-string host. Now we are more explicit about the error, and will at least try to connect directly. --- cmd/kops/toolbox_dump.go | 3 +++ pkg/dump/dumper.go | 30 +++++++++++++++++++++++++----- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/cmd/kops/toolbox_dump.go b/cmd/kops/toolbox_dump.go index ab205c9696fed..2887903b34056 100644 --- a/cmd/kops/toolbox_dump.go +++ b/cmd/kops/toolbox_dump.go @@ -209,6 +209,9 @@ func RunToolboxDump(ctx context.Context, f commandutils.Factory, out io.Writer, HostKeyCallback: ssh.InsecureIgnoreHostKey(), } + klog.Infof("will SSH using username %q", sshConfig.User) + klog.Infof("ssh auth methods %v", sshConfig.Auth) + keyRing := agent.NewKeyring() defer func(keyRing agent.Agent) { _ = keyRing.RemoveAll() diff --git a/pkg/dump/dumper.go b/pkg/dump/dumper.go index 3c3aee7d6e2aa..424bd81eb491a 100644 --- a/pkg/dump/dumper.go +++ b/pkg/dump/dumper.go @@ -199,7 +199,12 @@ func (d *logDumper) dumpRegistered(ctx context.Context, node *corev1.Node) error if publicIP != "" { return d.dumpNode(ctx, node.Name, publicIP, false) } else { - return d.dumpNode(ctx, node.Name, privateIP, true) + useBastion := true + if !d.sshClientFactory.HasBastion() { + klog.Warningf("no bastion address set, will attempt to connect to node %s directly via private IP %v", node.Name, privateIP) + useBastion = false + } + return d.dumpNode(ctx, node.Name, privateIP, useBastion) } } @@ -274,7 +279,12 @@ type sshClient interface { // sshClientFactory is an interface abstracting to a node over SSH type sshClientFactory interface { + // Dial creates a new sshClient Dial(ctx context.Context, host string, useBastion bool) (sshClient, error) + + // HasBastion returns true if the sshClientFactory has a bastion configured. + // Calling Dial with useBastion=true will return an error if there is no bastion. + HasBastion() bool } // logDumperNode holds state for a particular node we are dumping @@ -540,13 +550,23 @@ type sshClientFactoryImplementation struct { var _ sshClientFactory = &sshClientFactoryImplementation{} +// HasBastion implements sshClientFactory::HasBastion +func (f *sshClientFactoryImplementation) HasBastion() bool { + return f.bastion != "" +} + // Dial implements sshClientFactory::Dial func (f *sshClientFactoryImplementation) Dial(ctx context.Context, host string, useBastion bool) (sshClient, error) { - var addr string + addr := host if useBastion { + if f.bastion == "" { + return nil, fmt.Errorf("bastion is not set, but useBastion is true") + } addr = f.bastion - } else { - addr = host + } + + if addr == "" { + return nil, fmt.Errorf("host is empty") } addr = net.JoinHostPort(addr, "22") d := net.Dialer{ @@ -554,7 +574,7 @@ func (f *sshClientFactoryImplementation) Dial(ctx context.Context, host string, } conn, err := d.DialContext(ctx, "tcp", addr) if err != nil { - return nil, err + return nil, fmt.Errorf("error dialing tcp %s: %w", addr, err) } // We have a TCP connection; we will force-close it to support context cancellation