Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

node pod probe daemon #1077

Merged
merged 1 commit into from
Sep 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ jobs:
version: ${{ env.GOLANGCI_VERSION }}
args: --verbose
skip-pkg-cache: true
mod: readonly

markdownlint-misspell-shellcheck:
runs-on: ubuntu-18.04
Expand Down
62 changes: 61 additions & 1 deletion .github/workflows/e2e-1.16.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ on:

env:
# Common versions
GO_VERSION: '1.17'
GO_VERSION: '1.18'
KIND_VERSION: 'v0.14.0'
KIND_IMAGE: 'kindest/node:v1.16.15'
KIND_CLUSTER_NAME: 'ci-testing'
Expand Down Expand Up @@ -85,6 +85,18 @@ jobs:
kubectl get pod -n kruise-system --no-headers -l control-plane=controller-manager | awk '{print $1}' | xargs kubectl logs -p -n kruise-system
exit 1
fi
kubectl get pods -n kruise-system -l control-plane=daemon -o=jsonpath="{range .items[*]}{.metadata.namespace}{\"\t\"}{.metadata.name}{\"\n\"}{end}" | while read ns name;
do
restartCount=$(kubectl get pod -n ${ns} ${name} --no-headers | awk '{print $4}')
if [ "${restartCount}" -eq "0" ];then
echo "Kruise-daemon has not restarted"
else
kubectl get pods -n ${ns} -l control-plane=daemon --no-headers
echo "Kruise-daemon has restarted, abort!!!"
kubectl logs -p -n ${ns} ${name}
exit 1
fi
done
exit $retVal

pullimages-containerrecreate:
Expand Down Expand Up @@ -154,6 +166,18 @@ jobs:
kubectl get pod -n kruise-system --no-headers -l control-plane=controller-manager | awk '{print $1}' | xargs kubectl logs -p -n kruise-system
exit 1
fi
kubectl get pods -n kruise-system -l control-plane=daemon -o=jsonpath="{range .items[*]}{.metadata.namespace}{\"\t\"}{.metadata.name}{\"\n\"}{end}" | while read ns name;
do
restartCount=$(kubectl get pod -n ${ns} ${name} --no-headers | awk '{print $4}')
if [ "${restartCount}" -eq "0" ];then
echo "Kruise-daemon has not restarted"
else
kubectl get pods -n ${ns} -l control-plane=daemon --no-headers
echo "Kruise-daemon has restarted, abort!!!"
kubectl logs -p -n ${ns} ${name}
exit 1
fi
done
exit $retVal

advanced-daemonset:
Expand Down Expand Up @@ -223,6 +247,18 @@ jobs:
kubectl get pod -n kruise-system --no-headers -l control-plane=controller-manager | awk '{print $1}' | xargs kubectl logs -p -n kruise-system
exit 1
fi
kubectl get pods -n kruise-system -l control-plane=daemon -o=jsonpath="{range .items[*]}{.metadata.namespace}{\"\t\"}{.metadata.name}{\"\n\"}{end}" | while read ns name;
do
restartCount=$(kubectl get pod -n ${ns} ${name} --no-headers | awk '{print $4}')
if [ "${restartCount}" -eq "0" ];then
echo "Kruise-daemon has not restarted"
else
kubectl get pods -n ${ns} -l control-plane=daemon --no-headers
echo "Kruise-daemon has restarted, abort!!!"
kubectl logs -p -n ${ns} ${name}
exit 1
fi
done
exit $retVal

sidecarset:
Expand Down Expand Up @@ -292,6 +328,18 @@ jobs:
kubectl get pod -n kruise-system --no-headers -l control-plane=controller-manager | awk '{print $1}' | xargs kubectl logs -p -n kruise-system
exit 1
fi
kubectl get pods -n kruise-system -l control-plane=daemon -o=jsonpath="{range .items[*]}{.metadata.namespace}{\"\t\"}{.metadata.name}{\"\n\"}{end}" | while read ns name;
do
restartCount=$(kubectl get pod -n ${ns} ${name} --no-headers | awk '{print $4}')
if [ "${restartCount}" -eq "0" ];then
echo "Kruise-daemon has not restarted"
else
kubectl get pods -n ${ns} -l control-plane=daemon --no-headers
echo "Kruise-daemon has restarted, abort!!!"
kubectl logs -p -n ${ns} ${name}
exit 1
fi
done
exit $retVal

podUnavailableBudget:
Expand Down Expand Up @@ -419,4 +467,16 @@ jobs:
kubectl get pod -n kruise-system --no-headers -l control-plane=controller-manager | awk '{print $1}' | xargs kubectl logs -p -n kruise-system
exit 1
fi
kubectl get pods -n kruise-system -l control-plane=daemon -o=jsonpath="{range .items[*]}{.metadata.namespace}{\"\t\"}{.metadata.name}{\"\n\"}{end}" | while read ns name;
do
restartCount=$(kubectl get pod -n ${ns} ${name} --no-headers | awk '{print $4}')
if [ "${restartCount}" -eq "0" ];then
echo "Kruise-daemon has not restarted"
else
kubectl get pods -n ${ns} -l control-plane=daemon --no-headers
echo "Kruise-daemon has restarted, abort!!!"
kubectl logs -p -n ${ns} ${name}
exit 1
fi
done
exit $retVal
48 changes: 48 additions & 0 deletions .github/workflows/e2e-1.24.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,18 @@ jobs:
kubectl get pod -n kruise-system --no-headers -l control-plane=controller-manager | awk '{print $1}' | xargs kubectl logs -p -n kruise-system
exit 1
fi
kubectl get pods -n kruise-system -l control-plane=daemon -o=jsonpath="{range .items[*]}{.metadata.namespace}{\"\t\"}{.metadata.name}{\"\n\"}{end}" | while read ns name;
do
restartCount=$(kubectl get pod -n ${ns} ${name} --no-headers | awk '{print $4}')
if [ "${restartCount}" -eq "0" ];then
echo "Kruise-daemon has not restarted"
else
kubectl get pods -n ${ns} -l control-plane=daemon --no-headers
echo "Kruise-daemon has restarted, abort!!!"
kubectl logs -p -n ${ns} ${name}
exit 1
fi
done
exit $retVal

advanced-daemonset:
Expand Down Expand Up @@ -223,6 +235,18 @@ jobs:
kubectl get pod -n kruise-system --no-headers -l control-plane=controller-manager | awk '{print $1}' | xargs kubectl logs -p -n kruise-system
exit 1
fi
kubectl get pods -n kruise-system -l control-plane=daemon -o=jsonpath="{range .items[*]}{.metadata.namespace}{\"\t\"}{.metadata.name}{\"\n\"}{end}" | while read ns name;
do
restartCount=$(kubectl get pod -n ${ns} ${name} --no-headers | awk '{print $4}')
if [ "${restartCount}" -eq "0" ];then
echo "Kruise-daemon has not restarted"
else
kubectl get pods -n ${ns} -l control-plane=daemon --no-headers
echo "Kruise-daemon has restarted, abort!!!"
kubectl logs -p -n ${ns} ${name}
exit 1
fi
done
exit $retVal

sidecarset:
Expand Down Expand Up @@ -292,6 +316,18 @@ jobs:
kubectl get pod -n kruise-system --no-headers -l control-plane=controller-manager | awk '{print $1}' | xargs kubectl logs -p -n kruise-system
exit 1
fi
kubectl get pods -n kruise-system -l control-plane=daemon -o=jsonpath="{range .items[*]}{.metadata.namespace}{\"\t\"}{.metadata.name}{\"\n\"}{end}" | while read ns name;
do
restartCount=$(kubectl get pod -n ${ns} ${name} --no-headers | awk '{print $4}')
if [ "${restartCount}" -eq "0" ];then
echo "Kruise-daemon has not restarted"
else
kubectl get pods -n ${ns} -l control-plane=daemon --no-headers
echo "Kruise-daemon has restarted, abort!!!"
kubectl logs -p -n ${ns} ${name}
exit 1
fi
done
exit $retVal

ephemeraljob:
Expand Down Expand Up @@ -477,4 +513,16 @@ jobs:
kubectl get pod -n kruise-system --no-headers -l control-plane=controller-manager | awk '{print $1}' | xargs kubectl logs -p -n kruise-system
exit 1
fi
kubectl get pods -n kruise-system -l control-plane=daemon -o=jsonpath="{range .items[*]}{.metadata.namespace}{\"\t\"}{.metadata.name}{\"\n\"}{end}" | while read ns name;
do
restartCount=$(kubectl get pod -n ${ns} ${name} --no-headers | awk '{print $4}')
if [ "${restartCount}" -eq "0" ];then
echo "Kruise-daemon has not restarted"
else
kubectl get pods -n ${ns} -l control-plane=daemon --no-headers
echo "Kruise-daemon has restarted, abort!!!"
kubectl logs -p -n ${ns} ${name}
exit 1
fi
done
exit $retVal
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ We encourage contributors to follow the [PR template](./.github/PULL_REQUEST_TEM
As a contributor, if you want to make any contribution to Kruise project, we should reach an agreement on the version of tools used in the development environment.
Here are some dependents with specific version:

- Golang : v1.17+
- Golang : v1.18+
- Kubernetes: v1.16+

### Developing guide
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Build the manager and daemon binaries
FROM golang:1.17 as builder
FROM golang:1.18 as builder

WORKDIR /workspace
# Copy the Go Modules manifests
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile_multiarch
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Build the manager and daemon binaries
FROM --platform=$BUILDPLATFORM golang:1.17 as builder
FROM --platform=$BUILDPLATFORM golang:1.18 as builder

WORKDIR /workspace
# Copy the Go Modules manifests
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ all: build
##@ Development

go_check:
@scripts/check_go_version "1.17.0"
@scripts/check_go_version "1.18.0"

generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
@scripts/generate_client.sh
Expand Down
16 changes: 16 additions & 0 deletions config/rbac/daemon_role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,19 @@ rules:
- get
- list
- watch
- apiGroups:
- apps.kruise.io
resources:
- nodepodprobes
verbs:
- get
- list
- watch
- apiGroups:
- apps.kruise.io
resources:
- nodepodprobes/status
verbs:
- get
- patch
- update
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/openkruise/kruise

go 1.17
go 1.18

require (
github.com/alibaba/pouch v0.0.0-20190328125340-37051654f368
Expand Down
29 changes: 13 additions & 16 deletions pkg/controller/nodepodprobe/node_pod_probe_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,19 @@ import (
"strings"
"time"

"k8s.io/apimachinery/pkg/util/sets"

appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
"github.com/openkruise/kruise/pkg/features"
"github.com/openkruise/kruise/pkg/util"
utilclient "github.com/openkruise/kruise/pkg/util/client"
"github.com/openkruise/kruise/pkg/util/controllerfinder"
utildiscovery "github.com/openkruise/kruise/pkg/util/discovery"
utilfeature "github.com/openkruise/kruise/pkg/util/feature"
"github.com/openkruise/kruise/pkg/util/ratelimiter"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/util/retry"
"k8s.io/klog/v2"
kubecontroller "k8s.io/kubernetes/pkg/controller"
Expand Down Expand Up @@ -67,7 +68,7 @@ var (
// Add creates a new NodePodProbe Controller and adds it to the Manager with default RBAC. The Manager will set fields on the Controller
// and Start it when the Manager is Started.
func Add(mgr manager.Manager) error {
if !utildiscovery.DiscoverGVK(controllerKind) {
if !utildiscovery.DiscoverGVK(controllerKind) || !utilfeature.DefaultFeatureGate.Enabled(features.PodProbeMarkerGate) {
return nil
}
return add(mgr, newReconciler(mgr))
Expand Down Expand Up @@ -220,9 +221,10 @@ func (r *ReconcileNodePodProbe) syncPodFromNodePodProbe(npp *appsv1alpha1.NodePo

func (r *ReconcileNodePodProbe) updatePodProbeStatus(pod *corev1.Pod, status appsv1alpha1.PodProbeStatus) error {
// map[probe.name]->probeState
currentConditions := make(map[string]appsv1alpha1.ProbeState)
for _, condition := range pod.Status.Conditions {
currentConditions[string(condition.Type)] = appsv1alpha1.ProbeState(condition.Status)
currentConditions := make(map[string]*corev1.PodCondition)
for i := range pod.Status.Conditions {
condition := &pod.Status.Conditions[i]
currentConditions[string(condition.Type)] = condition
}
type metadata struct {
Labels map[string]interface{} `json:"labels,omitempty"`
Expand All @@ -239,11 +241,9 @@ func (r *ReconcileNodePodProbe) updatePodProbeStatus(pod *corev1.Pod, status app
validConditionTypes := sets.NewString()
for i := range status.ProbeStates {
probeState := status.ProbeStates[i]
// ignore the probe state
if probeState.State == "" || probeState.State == currentConditions[probeState.Name] {
if probeState.State == "" {
continue
}

// fetch podProbeMarker
ppmName, probeName := strings.Split(probeState.Name, "#")[0], strings.Split(probeState.Name, "#")[1]
ppm := &appsv1alpha1.PodProbeMarker{}
Expand All @@ -267,7 +267,6 @@ func (r *ReconcileNodePodProbe) updatePodProbeStatus(pod *corev1.Pod, status app
break
}
}

if conditionType != "" && validConditionTypes.Has(conditionType) {
klog.Warningf("NodePodProbe(%s) pod(%s/%s) condition(%s) is conflict", ppmName, pod.Namespace, pod.Name, conditionType)
// patch pod condition
Expand All @@ -287,11 +286,9 @@ func (r *ReconcileNodePodProbe) updatePodProbeStatus(pod *corev1.Pod, status app
Message: probeState.Message,
})
}

if len(policy) == 0 {
continue
}

// matchedPolicy is when policy.state is equal to probeState.State, otherwise oppositePolicy
// 1. If policy[0].state = Succeeded, policy[1].state = Failed. probeState.State = Succeeded.
// So policy[0] is matchedPolicy, policy[1] is oppositePolicy
Expand Down Expand Up @@ -328,15 +325,14 @@ func (r *ReconcileNodePodProbe) updatePodProbeStatus(pod *corev1.Pod, status app
if len(probeConditions) == 0 && len(probeMetadata.Labels) == 0 && len(probeMetadata.Annotations) == 0 {
return nil
}

//update pod metadata and status condition
podClone := pod.DeepCopy()
if err = retry.RetryOnConflict(retry.DefaultBackoff, func() error {
if err = r.Client.Get(context.TODO(), types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}, podClone); err != nil {
klog.Errorf("error getting updated pod(%s/%s) from client", pod.Namespace, pod.Name)
return err
}
oldStatus := podClone.DeepCopy()
oldStatus := podClone.Status.DeepCopy()
for i := range probeConditions {
condition := probeConditions[i]
util.SetPodCondition(podClone, condition)
Expand All @@ -363,7 +359,7 @@ func (r *ReconcileNodePodProbe) updatePodProbeStatus(pod *corev1.Pod, status app
podClone.Annotations[k] = v.(string)
}
}
if reflect.DeepEqual(oldStatus, podClone.Status) && reflect.DeepEqual(oldMetadata.Labels, podClone.Labels) &&
if reflect.DeepEqual(oldStatus.Conditions, podClone.Status.Conditions) && reflect.DeepEqual(oldMetadata.Labels, podClone.Labels) &&
reflect.DeepEqual(oldMetadata.Annotations, podClone.Annotations) {
return nil
}
Expand All @@ -372,6 +368,7 @@ func (r *ReconcileNodePodProbe) updatePodProbeStatus(pod *corev1.Pod, status app
klog.Errorf("NodePodProbe patch pod(%s/%s) status failed: %s", podClone.Namespace, podClone.Name, err.Error())
return err
}
klog.V(3).Infof("NodePodProbe update pod(%s/%s) status conditions(%s) success", podClone.Namespace, podClone.Name, util.DumpJSON(probeConditions))
klog.V(3).Infof("NodePodProbe update pod(%s/%s) metadata(%s) conditions(%s) success", podClone.Namespace, podClone.Name,
util.DumpJSON(probeMetadata), util.DumpJSON(probeConditions))
return nil
}
Loading