Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci: move hubble connectivity tests to nightly pipeline #2310

Merged
merged 31 commits into from
Oct 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
3c43d7e
[test] add hubble system test to CI (#2265)
matmerr Oct 11, 2023
8d1816b
ci: move hubble connectivity test to nightly pipeline
jshr-w Oct 11, 2023
2b11ae9
fix: move to correct file
jshr-w Oct 11, 2023
07073fc
style: indentation change
jshr-w Oct 11, 2023
cc5cffa
ci: update configmap to enable Hubble
jshr-w Oct 12, 2023
e684c32
fix: move hubble test
jshr-w Oct 12, 2023
f0a6bd7
fix: move connectivity test before delete
jshr-w Oct 12, 2023
a7d4b1b
fix: add daemonset namespace
jshr-w Oct 13, 2023
b7766e5
fix: update command for configmap replace
jshr-w Oct 13, 2023
838340f
test no restart after replace
jshr-w Oct 13, 2023
0b5bb19
fix: try apply instead of replace
jshr-w Oct 13, 2023
050d422
fix: add back restart ds
jshr-w Oct 13, 2023
481af23
add longer timeout after ds restart
jshr-w Oct 13, 2023
8ebb5ad
adjust timeout setup
jshr-w Oct 13, 2023
daa576d
extend timeout, add logging
jshr-w Oct 13, 2023
ac77e11
add logging, change cm command
jshr-w Oct 16, 2023
0c79537
update hubble configmap
jshr-w Oct 16, 2023
281de47
clean up sleep statements
jshr-w Oct 17, 2023
c9afdd5
Merge branch 'master' into shjayaraman/hubble-ci
jshr-w Oct 17, 2023
4350e5d
remove hubble connectivity test from PR pipeline
jshr-w Oct 17, 2023
9770a9f
use kubernetes utils
jshr-w Oct 17, 2023
2850e5f
fix style
jshr-w Oct 17, 2023
d3bca63
update ds restart wait
jshr-w Oct 17, 2023
5397746
enable Hubble on nightly, disable on PR
jshr-w Oct 18, 2023
3b570cb
tag networkobservability test
jshr-w Oct 18, 2023
5c44984
fix test call
jshr-w Oct 18, 2023
cffa774
Merge branch 'master' into shjayaraman/hubble-ci
jshr-w Oct 19, 2023
22b8975
enable Hubble after Cilium is ready
jshr-w Oct 19, 2023
d6d88b1
change location of hubble enable
jshr-w Oct 19, 2023
c4331da
Merge branch 'master' into shjayaraman/hubble-ci
jshr-w Oct 19, 2023
8ea3f09
Merge branch 'master' into shjayaraman/hubble-ci
jshr-w Oct 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,6 @@ go.work*

# scale-test
test/scale/generated/*

# test env file
*.env
1 change: 1 addition & 0 deletions .pipelines/cni/cilium/nightly-release-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ stages:
name: "cilium_nightly"
testDropgz: ""
clusterName: ciliumnightly-$(commitID)
testHubble: true
- job: logs
displayName: "Failure Logs"
dependsOn:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ parameters:
name: ""
testDropgz: ""
clusterName: ""
testHubble: false

steps:
- bash: |
Expand Down Expand Up @@ -153,6 +154,21 @@ steps:
name: "ciliumConnectivityTests"
displayName: "Run Cilium Connectivity Tests"

- ${{ if eq( parameters['testHubble'], true) }}:
- script: |
echo "enable Hubble metrics server"
kubectl apply -f test/integration/manifests/cilium/hubble/hubble-peer-svc.yaml
kubectl apply -f test/integration/manifests/cilium/cilium-config-hubble.yaml
kubectl rollout restart ds cilium -n kube-system
echo "wait <3 minutes for pods to be ready after restart"
kubectl rollout status ds cilium -n kube-system --timeout=3m
kubectl get pods -Aowide
echo "verify Hubble metrics endpoint is usable"
go test ./test/integration/networkobservability -count=1 -v -tags=networkobservability
retryCountOnTaskFailure: 3
name: "HubbleConnectivityTests"
displayName: "Run Hubble Connectivity Tests"

- script: |
echo "validate pod IP assignment and check systemd-networkd restart"
kubectl get pod -owide -A
Expand Down
4 changes: 2 additions & 2 deletions hack/toolbox/server/Dockerfile.heavy
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ ADD ./ /
WORKDIR /
RUN CGO_ENABLED=0 GOOS=linux go build -o server .

FROM mcr.microsoft.com/oss/mirror/docker.io/library/ubuntu:20.04
FROM mcr.microsoft.com/mirror/docker/library/ubuntu:22.04
RUN apt-get update
RUN apt-get install -y \
axel \
Expand All @@ -21,12 +21,12 @@ RUN apt-get install -y \
net-tools \
netcat \
nmap \
python \
python3 \
ssh \
sudo \
tcpdump \
traceroute \
unzip \
vim \
wget

Expand Down
98 changes: 98 additions & 0 deletions test/integration/manifests/cilium/cilium-config-hubble.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
apiVersion: v1
data:
agent-not-ready-taint-key: node.cilium.io/agent-not-ready
arping-refresh-period: 30s
auto-direct-node-routes: "false"
bpf-lb-external-clusterip: "false"
bpf-lb-map-max: "65536"
bpf-lb-mode: snat
bpf-map-dynamic-size-ratio: "0.0025"
bpf-policy-map-max: "16384"
bpf-root: /sys/fs/bpf
cgroup-root: /run/cilium/cgroupv2
cilium-endpoint-gc-interval: 5m0s
cluster-id: "0"
cluster-name: default
debug: "false"
disable-cnp-status-updates: "true"
disable-endpoint-crd: "false"
enable-auto-protect-node-port-range: "true"
enable-bgp-control-plane: "false"
enable-bpf-clock-probe: "true"
enable-endpoint-health-checking: "false"
enable-endpoint-routes: "true"
enable-health-check-nodeport: "true"
enable-health-checking: "true"
enable-host-legacy-routing: "true"
enable-hubble: "true"
enable-ipv4: "true"
enable-ipv4-masquerade: "false"
enable-ipv6: "false"
enable-ipv6-masquerade: "false"
enable-k8s-terminating-endpoint: "true"
enable-l2-neigh-discovery: "true"
enable-l7-proxy: "false"
enable-local-node-route: "false"
enable-local-redirect-policy: "false"
enable-metrics: "true"
enable-policy: default
enable-remote-node-identity: "true"
enable-session-affinity: "true"
enable-svc-source-range-check: "true"
enable-vtep: "false"
enable-well-known-identities: "false"
enable-xt-socket-fallback: "true"
hubble-metrics: flow:sourceContext=workload-name;destinationContext=workload-name
tcp:sourceContext=workload-name;destinationContext=workload-name
dns:flow:sourceContext=workload-name;destinationContext=workload-name
hubble-metrics-server: :9965
hubble-disable-tls: "false"
hubble-listen-address: ""
hubble-socket-path: /dev/null
hubble-tls-cert-file: /var/lib/cilium/tls/hubble/server.crt
hubble-tls-client-ca-files: /var/lib/cilium/tls/hubble/client-ca.crt
hubble-tls-key-file: /var/lib/cilium/tls/hubble/server.key
identity-allocation-mode: crd
install-iptables-rules: "true"
install-no-conntrack-iptables-rules: "false"
ipam: delegated-plugin
kube-proxy-replacement: strict
kube-proxy-replacement-healthz-bind-address: "0.0.0.0:10256"
local-router-ipv4: 169.254.23.0
metrics: +cilium_bpf_map_pressure
monitor-aggregation: medium
monitor-aggregation-flags: all
monitor-aggregation-interval: 5s
node-port-bind-protection: "true"
nodes-gc-interval: 5m0s
operator-api-serve-addr: 127.0.0.1:9234
operator-prometheus-serve-addr: :9963
preallocate-bpf-maps: "false"
procfs: /host/proc
prometheus-serve-addr: :9962
remove-cilium-node-taints: "true"
set-cilium-is-up-condition: "true"
sidecar-istio-proxy-image: cilium/istio_proxy
synchronize-k8s-nodes: "true"
tofqdns-dns-reject-response-code: refused
tofqdns-enable-dns-compression: "true"
tofqdns-endpoint-max-ip-per-hostname: "50"
tofqdns-idle-connection-grace-period: 0s
tofqdns-max-deferred-connection-deletes: "10000"
tofqdns-min-ttl: "3600"
tofqdns-proxy-response-max-delay: 100ms
tunnel: disabled
unmanaged-pod-watcher-interval: "15"
vtep-cidr: ""
vtep-endpoint: ""
vtep-mac: ""
vtep-mask: ""
kind: ConfigMap
metadata:
annotations:
meta.helm.sh/release-name: cilium
meta.helm.sh/release-namespace: kube-system
labels:
app.kubernetes.io/managed-by: Helm
name: cilium-config
namespace: kube-system
18 changes: 18 additions & 0 deletions test/integration/manifests/cilium/hubble/hubble-peer-svc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
apiVersion: v1
kind: Service
metadata:
labels:
k8s-app: cilium
name: hubble-peer
namespace: kube-system
spec:
internalTrafficPolicy: Cluster
ports:
- name: peer-service
port: 443
protocol: TCP
targetPort: 4244
selector:
k8s-app: cilium
sessionAffinity: None
type: ClusterIP
134 changes: 134 additions & 0 deletions test/integration/networkobservability/hubble_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
//go:build networkobservability

package networkobservability

import (
"context"
"fmt"
"io"
"net/http"
"strings"
"testing"
"time"

k8s "github.com/Azure/azure-container-networking/test/integration"
"github.com/Azure/azure-container-networking/test/internal/kubernetes"
"github.com/Azure/azure-container-networking/test/internal/retry"
)

const (
retryAttempts = 10
retryDelay = 5 * time.Second
promAddress = "http://localhost:9965/metrics"
labelSelector = "k8s-app=cilium"
namespace = "kube-system"
)

var (
defaultRetrier = retry.Retrier{Attempts: retryAttempts, Delay: retryDelay}
requiredMetrics = []string{
"hubble_flows_processed_total",
"hubble_tcp_flags_total",
}
)

func TestEndpoints(t *testing.T) {
config := kubernetes.MustGetRestConfig()
ctx := context.Background()
clusterCtx, cancel := context.WithTimeout(ctx, 5*time.Minute)
defer cancel()
pingCheckFn := func() error {
var pf *k8s.PortForwarder
pf, err := k8s.NewPortForwarder(config, t, k8s.PortForwardingOpts{
Namespace: namespace,
LabelSelector: labelSelector,
LocalPort: 9965,
DestPort: 9965,
})
if err != nil {
t.Error(err)
}
pctx := context.Background()

portForwardCtx, cancel := context.WithTimeout(pctx, (retryAttempts+1)*retryDelay)
defer cancel()

portForwardFn := func() error {
t.Logf("attempting port forward to a pod with label %s, in namespace %s...", labelSelector, namespace)
if err = pf.Forward(portForwardCtx); err != nil {
return fmt.Errorf("could not start port forward: %w", err)
}
return nil
}

if err = defaultRetrier.Do(portForwardCtx, portForwardFn); err != nil {
t.Fatalf("could not start port forward within %d: %v", (retryAttempts+1)*retryDelay, err)
}
defer pf.Stop()

// scrape the hubble metrics
metrics, err := getPrometheusMetrics(promAddress)
if err != nil {
return fmt.Errorf("scraping %s, failed with error: %w", promAddress, err)
}

// verify that the response contains the required metrics
for _, reqMetric := range requiredMetrics {
if val, exists := metrics[reqMetric]; !exists {
return fmt.Errorf("scraping %s, did not find metric %s", val, promAddress) //nolint:goerr113,gocritic
}
}
t.Logf("all metrics validated: %+v", requiredMetrics)
return nil
}

if err := defaultRetrier.Do(clusterCtx, pingCheckFn); err != nil {
t.Fatalf("metrics check failed with error: %v", err)
}
}

func getPrometheusMetrics(url string) (map[string]struct{}, error) {
client := http.Client{}
resp, err := client.Get(url) //nolint
if err != nil {
return nil, fmt.Errorf("HTTP request failed: %w", err)
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("HTTP request failed with status: %v", resp.Status) //nolint:goerr113,gocritic
}

metricsData, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("reading HTTP response body failed: %w", err)
}

metrics := parseMetrics(string(metricsData))
return metrics, nil
}

func parseMetrics(metricsData string) map[string]struct{} {
// Create a map to store the strings before the first '{'.
metrics := make(map[string]struct{})

// sample metrics
// hubble_tcp_flags_total{destination="",family="IPv4",flag="RST",source="kube-system/metrics-server"} 980
// hubble_tcp_flags_total{destination="",family="IPv4",flag="SYN",source="kube-system/ama-metrics"} 1777
// we only want the metric name for the time being
// label order/parseing can happen later
lines := strings.Split(metricsData, "\n")
// Iterate through each line.
for _, line := range lines {
// Find the index of the first '{' character.
index := strings.Index(line, "{")
if index >= 0 {
// Extract the string before the first '{'.
str := strings.TrimSpace(line[:index])
// Store the string in the map.
metrics[str] = struct{}{}
}
}

return metrics
}