Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Let telepresence connect despite DNS failure. #3154

Merged
merged 6 commits into from
May 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,15 @@

### 2.13.2 (TBD)

- Feature: Configurable strategy (`auto`, `powershell`. or `registry`) to set the global DNS search path on Windows. Default
is `auto` which means try `powershell` first, and if it fails, fall back to `registry`.

- Feature: The timeout for the traffic manager to wait for traffic agent to arrive can
now be configured in the `values.yaml` file using `timeouts.agentArrival`. The default
timeout is still 30 seconds.

- Bugfix: Ensure that `telepresence connect` succeeds even though DNS isn't configured correctly.

- Bugfix: The traffic-manager would sometimes panic and exit after some time due to a type cast panic.

### 2.13.1 (April 20, 2023)
Expand Down
17 changes: 13 additions & 4 deletions pkg/client/cli/cmd/leave.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package cmd

import (
"context"
"fmt"
"strings"

"github.com/spf13/cobra"
Expand All @@ -16,6 +17,7 @@ import (
"github.com/telepresenceio/telepresence/v2/pkg/client/cli/daemon"
"github.com/telepresenceio/telepresence/v2/pkg/client/cli/intercept"
"github.com/telepresenceio/telepresence/v2/pkg/client/docker"
"github.com/telepresenceio/telepresence/v2/pkg/errcat"
)

func leave() *cobra.Command {
Expand Down Expand Up @@ -70,17 +72,24 @@ func removeIntercept(ctx context.Context, name string) error {
ic, err := userD.GetIntercept(ctx, &manager.GetInterceptRequest{Name: name})
if err != nil {
if st, ok := status.FromError(err); ok && st.Code() == codes.NotFound {
// Obviously not there. That's ok.
return nil
// User probably misspelled the name of the intercept
return errcat.User.Newf("Intercept named %q not found", name)
}
return err
}
if handlerContainer, ok := ic.Environment["TELEPRESENCE_HANDLER_CONTAINER_NAME"]; ok {
handlerContainer, stopContainer := ic.Environment["TELEPRESENCE_HANDLER_CONTAINER_NAME"]
if stopContainer {
// Stop the intercept handler's container. The daemon is most likely running in another
// container, and won't be able to.
if err = docker.StopContainer(ctx, handlerContainer); err != nil {
dlog.Error(ctx, err)
}
}
return intercept.Result(userD.RemoveIntercept(ctx, &manager.RemoveInterceptRequest2{Name: name}))
if err := intercept.Result(userD.RemoveIntercept(ctx, &manager.RemoveInterceptRequest2{Name: name})); err != nil {
if stopContainer && strings.Contains(err.Error(), fmt.Sprintf("%q not found", name)) {
// race condition between stopping the intercept handler, which causes the intercept to leave, and this call
err = nil
}
}
return err
}
4 changes: 4 additions & 0 deletions pkg/client/cli/cmd/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ func BasicGetStatusInfo(ctx context.Context) (ioutil.WriterTos, error) {
// Local IP is only set when the overriding resolver is used
rs.DNS.LocalIP = dns.LocalIp
}
rs.DNS.Error = dns.Error
rs.DNS.RemoteIP = dns.RemoteIp
rs.DNS.ExcludeSuffixes = dns.ExcludeSuffixes
rs.DNS.IncludeSuffixes = dns.IncludeSuffixes
Expand Down Expand Up @@ -278,6 +279,9 @@ func (ds *rootDaemonStatus) printNetwork(kvf *ioutil.KeyValueFormatter) {
func printDNS(kvf *ioutil.KeyValueFormatter, d *client.DNSSnake) {
dnsKvf := ioutil.DefaultKeyValueFormatter()
kvf.Indent = " "
if d.Error != "" {
dnsKvf.Add("Error", d.Error)
}
if len(d.LocalIP) > 0 {
dnsKvf.Add("Local IP", d.LocalIP.String())
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/client/cli/connect/connector.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/telepresenceio/telepresence/v2/pkg/client/socket"
"github.com/telepresenceio/telepresence/v2/pkg/dos"
"github.com/telepresenceio/telepresence/v2/pkg/errcat"
"github.com/telepresenceio/telepresence/v2/pkg/ioutil"
"github.com/telepresenceio/telepresence/v2/pkg/proc"
)

Expand Down Expand Up @@ -205,6 +206,9 @@ func ensureSession(ctx context.Context, required bool) (context.Context, error)
if s == nil {
return ctx, nil
}
if dns := s.Info.GetDaemonStatus().GetOutboundConfig().GetDns(); dns != nil && dns.Error != "" {
ioutil.Printf(output.Err(ctx), "Warning: %s\n", dns.Error)
}
return daemon.WithSession(ctx, s), nil
}

Expand Down
21 changes: 13 additions & 8 deletions pkg/client/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,15 @@ const ConfigFile = "config.yml"

// Config contains all configuration values for the telepresence CLI.
type Config struct {
Timeouts Timeouts `json:"timeouts,omitempty" yaml:"timeouts,omitempty"`
LogLevels LogLevels `json:"logLevels,omitempty" yaml:"logLevels,omitempty"`
Images Images `json:"images,omitempty" yaml:"images,omitempty"`
Cloud Cloud `json:"cloud,omitempty" yaml:"cloud,omitempty"`
Grpc Grpc `json:"grpc,omitempty" yaml:"grpc,omitempty"`
TelepresenceAPI TelepresenceAPI `json:"telepresenceAPI,omitempty" yaml:"telepresenceAPI,omitempty"`
Intercept Intercept `json:"intercept,omitempty" yaml:"intercept,omitempty"`
Cluster Cluster `json:"cluster,omitempty" yaml:"cluster,omitempty"`
OSSpecificConfig `yaml:",inline"`
Timeouts Timeouts `json:"timeouts,omitempty" yaml:"timeouts,omitempty"`
LogLevels LogLevels `json:"logLevels,omitempty" yaml:"logLevels,omitempty"`
Images Images `json:"images,omitempty" yaml:"images,omitempty"`
Cloud Cloud `json:"cloud,omitempty" yaml:"cloud,omitempty"`
Grpc Grpc `json:"grpc,omitempty" yaml:"grpc,omitempty"`
TelepresenceAPI TelepresenceAPI `json:"telepresenceAPI,omitempty" yaml:"telepresenceAPI,omitempty"`
Intercept Intercept `json:"intercept,omitempty" yaml:"intercept,omitempty"`
Cluster Cluster `json:"cluster,omitempty" yaml:"cluster,omitempty"`
}

func ParseConfigYAML(data []byte) (*Config, error) {
Expand All @@ -49,6 +50,7 @@ func ParseConfigYAML(data []byte) (*Config, error) {

// Merge merges this instance with the non-zero values of the given argument. The argument values take priority.
func (c *Config) Merge(o *Config) {
c.OSSpecificConfig.Merge(&o.OSSpecificConfig)
c.Timeouts.merge(&o.Timeouts)
c.LogLevels.merge(&o.LogLevels)
c.Images.merge(&o.Images)
Expand Down Expand Up @@ -873,6 +875,7 @@ func GetConfigFile(c context.Context) string {
// GetDefaultConfig returns the default configuration settings.
func GetDefaultConfig() Config {
return Config{
OSSpecificConfig: GetDefaultOSSpecificConfig(),
Timeouts: Timeouts{
PrivateAgentInstall: defaultTimeoutsAgentInstall,
PrivateApply: defaultTimeoutsApply,
Expand Down Expand Up @@ -975,6 +978,7 @@ type RoutingSnake struct {
}

type DNS struct {
Error string `json:"error,omitempty" yaml:"error,omitempty"`
LocalIP net.IP `json:"localIP,omitempty" yaml:"localIP,omitempty"`
RemoteIP net.IP `json:"remoteIP,omitempty" yaml:"remoteIP,omitempty"`
IncludeSuffixes []string `json:"includeSuffixes,omitempty" yaml:"includeSuffixes,omitempty"`
Expand All @@ -984,6 +988,7 @@ type DNS struct {

// DNSSnake is the same as DNS but with snake_case json/yaml names.
type DNSSnake struct {
Error string `json:"error,omitempty" yaml:"error,omitempty"`
LocalIP net.IP `json:"local_ip,omitempty" yaml:"local_ip,omitempty"`
RemoteIP net.IP `json:"remote_ip,omitempty" yaml:"remote_ip,omitempty"`
IncludeSuffixes []string `json:"include_suffixes,omitempty" yaml:"include_suffixes,omitempty"`
Expand Down
12 changes: 12 additions & 0 deletions pkg/client/config_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
//go:build !windows

package client

type OSSpecificConfig struct{}

func GetDefaultOSSpecificConfig() OSSpecificConfig {
return OSSpecificConfig{}
}

func (c *OSSpecificConfig) Merge(o *OSSpecificConfig) {
}
91 changes: 91 additions & 0 deletions pkg/client/config_windows.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package client

import (
"errors"
"fmt"

"gopkg.in/yaml.v3"

"github.com/datawire/dlib/dlog"
)

type OSSpecificConfig struct {
Network Network `json:"network,omitempty" yaml:"network,omitempty"`
}

func GetDefaultOSSpecificConfig() OSSpecificConfig {
return OSSpecificConfig{
Network: Network{
GlobalDNSSearchConfigStrategy: defaultGlobalDNSSearchConfigStrategy,
},
}
}

// Merge merges this instance with the non-zero values of the given argument. The argument values take priority.
func (c *OSSpecificConfig) Merge(o *OSSpecificConfig) {
c.Network.merge(&o.Network)
}

type GSCStrategy string

const (
// GSCAuto configure DNS search first attempting GSCPowershell and if that fails, GSCRegistry.
GSCAuto = "auto"

// GSCRegistry configure DNS search by setting the registry value System\CurrentControlSet\Services\Tcpip\Parameters\SearchList.
GSCRegistry = "registry"

// GSCPowershell configure DNS search using the powershell Set-DnsClientGlobalSetting command.
GSCPowershell = "powershell"

defaultGlobalDNSSearchConfigStrategy = GSCAuto
)

type Network struct {
GlobalDNSSearchConfigStrategy GSCStrategy `json:"globalDNSSearchConfigStrategy,omitempty" yaml:"globalDNSSearchConfigStrategy,omitempty"`
}

func (n *Network) merge(o *Network) {
if o.GlobalDNSSearchConfigStrategy != defaultGlobalDNSSearchConfigStrategy {
n.GlobalDNSSearchConfigStrategy = o.GlobalDNSSearchConfigStrategy
}
}

func (n Network) IsZero() bool {
return n.GlobalDNSSearchConfigStrategy == defaultGlobalDNSSearchConfigStrategy
}

func (n *Network) UnmarshalYAML(node *yaml.Node) (err error) {
if node.Kind != yaml.MappingNode {
return errors.New(withLoc("network must be an object", node))
}
ms := node.Content
top := len(ms)
for i := 0; i < top; i += 2 {
kv, err := stringKey(ms[i])
if err != nil {
return err
}
v := ms[i+1]
switch kv {
case "globalDNSSearchConfigStrategy":
switch v.Value {
case GSCAuto, GSCRegistry, GSCPowershell:
n.GlobalDNSSearchConfigStrategy = GSCStrategy(v.Value)
default:
if parseContext != nil {
dlog.Warn(parseContext, withLoc(fmt.Sprintf("invalid globalDNSSearchConfigStrategy %q. Valid values are %q, %q or %q",
v.Value, GSCAuto, GSCRegistry, GSCPowershell), ms[i+1]))
}
}
default:
if parseContext != nil {
dlog.Warn(parseContext, withLoc(fmt.Sprintf("unknown key %q", kv), ms[i]))
}
}
}
if n.GlobalDNSSearchConfigStrategy == "" {
n.GlobalDNSSearchConfigStrategy = defaultGlobalDNSSearchConfigStrategy
}
return nil
}
37 changes: 18 additions & 19 deletions pkg/client/rootd/dns/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ type Server struct {
onlyNames bool

// ready is closed when the DNS server is fully configured
ready chan error
ready chan struct{}
}

type cacheEntry struct {
Expand Down Expand Up @@ -129,7 +129,7 @@ func NewServer(config *rpc.DNSConfig, clusterLookup Resolver, onlyNames bool) *S
clusterDomain: defaultClusterDomain,
clusterLookup: clusterLookup,
onlyNames: onlyNames,
ready: make(chan error, 2),
ready: make(chan struct{}),
}
s.cacheResolve = s.resolveWithRecursionCheck
return s
Expand Down Expand Up @@ -250,17 +250,18 @@ func (s *Server) resolveInCluster(c context.Context, q *dns.Question) (result dn
}

func (s *Server) GetConfig() *rpc.DNSConfig {
dnsConfig := &rpc.DNSConfig{}
if s.config != nil {
dnsConfig.LocalIp = s.config.LocalIp
dnsConfig.ExcludeSuffixes = s.config.ExcludeSuffixes
dnsConfig.IncludeSuffixes = s.config.IncludeSuffixes
dnsConfig.LookupTimeout = s.config.LookupTimeout
}
return dnsConfig
sc := s.config
return &rpc.DNSConfig{
LocalIp: sc.LocalIp,
RemoteIp: sc.RemoteIp,
ExcludeSuffixes: sc.ExcludeSuffixes,
IncludeSuffixes: sc.IncludeSuffixes,
LookupTimeout: sc.LookupTimeout,
Error: sc.Error,
}
}

func (s *Server) Ready() <-chan error {
func (s *Server) Ready() <-chan struct{} {
return s.ready
}

Expand Down Expand Up @@ -325,7 +326,8 @@ func newLocalUDPListener(c context.Context) (net.PacketConn, error) {
func (s *Server) processSearchPaths(g *dgroup.Group, processor func(context.Context, []string, vif.Device) error, dev vif.Device) {
g.Go("RecursionCheck", func(c context.Context) error {
_ = dev.SetDNS(c, s.clusterDomain, s.config.RemoteIp, []string{tel2SubDomain})
return s.performRecursionCheck(c)
s.performRecursionCheck(c)
return nil
})

g.Go("SearchPaths", func(c context.Context) error {
Expand Down Expand Up @@ -468,7 +470,7 @@ func (d dfs) String() string {
return d()
}

func (s *Server) performRecursionCheck(c context.Context) error {
func (s *Server) performRecursionCheck(c context.Context) {
defer close(s.ready)
defer dlog.Debug(c, "Recursion check finished")
var rc string
Expand All @@ -493,7 +495,7 @@ func (s *Server) performRecursionCheck(c context.Context) error {
if derr, ok := err.(*net.DNSError); ok {
if atomic.LoadInt32(&s.recursive) != recursionTestInProgress {
if derr.IsTimeout || derr.IsNotFound {
return nil
return
}
}
if derr.IsTimeout {
Expand All @@ -504,17 +506,14 @@ func (s *Server) performRecursionCheck(c context.Context) error {
dlog.Errorf(c, "unexpected error during recursion check: %v", err)
}
if atomic.LoadInt32(&s.recursive) != recursionTestInProgress {
return nil
return
}
// Check didn't hit our resolver. Try again
dtime.SleepWithContext(c, 100*time.Millisecond)
}
if i == maxRecursionTestRetries {
err := errors.New("recursion check failed. The DNS isn't working properly")
s.ready <- err
return err
s.config.Error = "DNS doesn't seem to work properly"
}
return nil
}

// ServeDNS is an implementation of github.com/miekg/dns Handler.ServeDNS.
Expand Down
2 changes: 1 addition & 1 deletion pkg/client/rootd/dns/server_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
)

const (
maxRecursionTestRetries = 20
maxRecursionTestRetries = 40
recursionTestTimeout = 1500 * time.Millisecond
)

Expand Down
7 changes: 2 additions & 5 deletions pkg/client/rootd/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -385,10 +385,7 @@ func (s *Session) networkReady(ctx context.Context) <-chan error {
} else {
select {
case <-ctx.Done():
case err, ok = <-s.dnsServer.Ready():
if ok {
rdy <- err
}
case <-s.dnsServer.Ready():
}
}
}
Expand Down Expand Up @@ -576,7 +573,7 @@ func (s *Session) readAdditionalRouting(ctx context.Context, mgrInfo *manager.Cl

func (s *Session) checkSvcConnectivity(ctx context.Context, info *manager.ClusterInfo) bool {
// The traffic-manager service is headless, which means we can't try a GRPC connection to its ClusterIP.
// Instead we try an HTTP health check on the agent-injector server, since that one does expose a ClusterIP.
// Instead, we try an HTTP health check on the agent-injector server, since that one does expose a ClusterIP.
// This is less precise than if we could check for our own GRPC, since /healthz is a common enough health check path,
// but hopefully the server on the other end isn't configured to respond to the hostname "agent-injector" if it isn't the agent-injector.
if info.InjectorSvcIp == nil {
Expand Down
Loading