-
Notifications
You must be signed in to change notification settings - Fork 193
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Surface service-controller LB provisioning failures through status #245
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,24 +7,17 @@ import ( | |
"github.com/google/go-cmp/cmp" | ||
"github.com/google/go-cmp/cmp/cmpopts" | ||
|
||
"github.com/openshift/cluster-ingress-operator/pkg/manifests" | ||
|
||
operatorv1 "github.com/openshift/api/operator/v1" | ||
|
||
appsv1 "k8s.io/api/apps/v1" | ||
corev1 "k8s.io/api/core/v1" | ||
|
||
"k8s.io/apimachinery/pkg/api/meta" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/runtime" | ||
|
||
"sigs.k8s.io/controller-runtime/pkg/cache" | ||
"sigs.k8s.io/controller-runtime/pkg/client" | ||
) | ||
|
||
// syncIngressControllerStatus computes the current status of ic and | ||
// updates status upon any changes since last sync. | ||
func (r *reconciler) syncIngressControllerStatus(deployment *appsv1.Deployment, ic *operatorv1.IngressController) error { | ||
func (r *reconciler) syncIngressControllerStatus(ic *operatorv1.IngressController, deployment *appsv1.Deployment, service *corev1.Service, operandEvents []corev1.Event) error { | ||
selector, err := metav1.LabelSelectorAsSelector(deployment.Spec.Selector) | ||
if err != nil { | ||
return fmt.Errorf("deployment has invalid spec.selector: %v", err) | ||
|
@@ -36,7 +29,7 @@ func (r *reconciler) syncIngressControllerStatus(deployment *appsv1.Deployment, | |
|
||
updated.Status.Conditions = []operatorv1.OperatorCondition{} | ||
updated.Status.Conditions = append(updated.Status.Conditions, computeIngressStatusConditions(updated.Status.Conditions, deployment)...) | ||
updated.Status.Conditions = append(updated.Status.Conditions, r.statusCache.computeLoadBalancerStatus(ic)...) | ||
updated.Status.Conditions = append(updated.Status.Conditions, computeLoadBalancerStatus(ic, service, operandEvents)...) | ||
|
||
for i := range updated.Status.Conditions { | ||
newCondition := &updated.Status.Conditions[i] | ||
|
@@ -127,119 +120,17 @@ func ingressStatusesEqual(a, b operatorv1.IngressControllerStatus) bool { | |
return true | ||
} | ||
|
||
func isProvisioned(service *corev1.Service) bool { | ||
ingresses := service.Status.LoadBalancer.Ingress | ||
return len(ingresses) > 0 && (len(ingresses[0].Hostname) > 0 || len(ingresses[0].IP) > 0) | ||
} | ||
|
||
func isPending(service *corev1.Service) bool { | ||
return !isProvisioned(service) | ||
} | ||
|
||
func getServiceOwnerIfMatches(service *corev1.Service, matches func(*corev1.Service) bool) []string { | ||
if !matches(service) { | ||
return []string{} | ||
} | ||
controller, ok := service.Labels[manifests.OwningIngressControllerLabel] | ||
if !ok { | ||
return []string{} | ||
} | ||
return []string{controller} | ||
} | ||
|
||
func indexLoadBalancerControllerByName(obj runtime.Object) []string { | ||
c, ok := obj.(*operatorv1.IngressController) | ||
if !ok { | ||
return []string{} | ||
} | ||
if c.Status.EndpointPublishingStrategy != nil && | ||
c.Status.EndpointPublishingStrategy.Type == operatorv1.LoadBalancerServiceStrategyType { | ||
return []string{c.Name} | ||
} | ||
return []string{} | ||
} | ||
|
||
func indexProvisionedLoadBalancerServiceByOwner(obj runtime.Object) []string { | ||
service, ok := obj.(*corev1.Service) | ||
if !ok { | ||
return []string{} | ||
} | ||
if service.Spec.Type != corev1.ServiceTypeLoadBalancer { | ||
return []string{} | ||
} | ||
return getServiceOwnerIfMatches(service, isProvisioned) | ||
} | ||
|
||
func indexPendingLoadBalancerServiceByOwner(obj runtime.Object) []string { | ||
service, ok := obj.(*corev1.Service) | ||
if !ok { | ||
return []string{} | ||
} | ||
if service.Spec.Type != corev1.ServiceTypeLoadBalancer { | ||
return []string{} | ||
} | ||
return getServiceOwnerIfMatches(service, isPending) | ||
} | ||
|
||
// ingressStatusCache knows how to compute status for ingress controllers by | ||
// querying indexes caches. | ||
type ingressStatusCache struct { | ||
// contains returns true if there are >0 matches on value for the named index | ||
// for the given kind. | ||
// | ||
// listKind must be the List type for the object being indexed. | ||
contains func(listKind runtime.Object, name, value string) bool | ||
} | ||
|
||
// serviceIndexers are all the Service indexes supported by the cache. | ||
var serviceIndexers = map[string]client.IndexerFunc{ | ||
"pending-for": indexPendingLoadBalancerServiceByOwner, | ||
"provisioned-for": indexProvisionedLoadBalancerServiceByOwner, | ||
} | ||
|
||
// controllerIndexers are all the IngressController indexes supported by the | ||
// cache. | ||
var controllerIndexers = map[string]client.IndexerFunc{ | ||
"wants-load-balancer": indexLoadBalancerControllerByName, | ||
} | ||
|
||
// cacheContains is a contains fuction which knows how to query a cache.Cache. | ||
func cacheContains(cache cache.Cache, list runtime.Object, name, value string) bool { | ||
err := cache.List(context.TODO(), list, client.MatchingField(name, value)) | ||
if err != nil { | ||
return false | ||
} | ||
// TODO: after rebase, replace with: | ||
// meta.LenList(list) > 0 | ||
items, _ := meta.ExtractList(list) | ||
return len(items) > 0 | ||
} | ||
|
||
func NewIngressStatusCache(c cache.Cache) *ingressStatusCache { | ||
add := func(cache cache.Cache, kind runtime.Object, indexers map[string]client.IndexerFunc) { | ||
for name := range indexers { | ||
cache.IndexField(kind, name, indexers[name]) | ||
} | ||
} | ||
add(c, &operatorv1.IngressController{}, controllerIndexers) | ||
add(c, &corev1.Service{}, serviceIndexers) | ||
return &ingressStatusCache{ | ||
contains: func(kind runtime.Object, name, value string) bool { | ||
return cacheContains(c, kind, name, value) | ||
}, | ||
} | ||
} | ||
|
||
// computeLoadBalancerStatus returns the complete set of current | ||
// LoadBalancer-prefixed conditions for the given ingress controller. | ||
func (c *ingressStatusCache) computeLoadBalancerStatus(ic *operatorv1.IngressController) []operatorv1.OperatorCondition { | ||
if !c.contains(&operatorv1.IngressControllerList{}, "wants-load-balancer", ic.Name) { | ||
func computeLoadBalancerStatus(ic *operatorv1.IngressController, service *corev1.Service, operandEvents []corev1.Event) []operatorv1.OperatorCondition { | ||
if ic.Status.EndpointPublishingStrategy == nil || | ||
ic.Status.EndpointPublishingStrategy.Type != operatorv1.LoadBalancerServiceStrategyType { | ||
return []operatorv1.OperatorCondition{ | ||
{ | ||
Type: operatorv1.LoadBalancerManagedIngressConditionType, | ||
Status: operatorv1.ConditionFalse, | ||
Reason: "UnsupportedEndpointPublishingStrategy", | ||
Message: fmt.Sprintf("The %s endpoint publishing strategy does not support a load balancer", ic.Status.EndpointPublishingStrategy.Type), | ||
Message: fmt.Sprintf("The endpoint publishing strategy does not support a load balancer"), | ||
}, | ||
} | ||
} | ||
|
@@ -249,33 +140,68 @@ func (c *ingressStatusCache) computeLoadBalancerStatus(ic *operatorv1.IngressCon | |
conditions = append(conditions, operatorv1.OperatorCondition{ | ||
Type: operatorv1.LoadBalancerManagedIngressConditionType, | ||
Status: operatorv1.ConditionTrue, | ||
Reason: "HasLoadBalancerEndpointPublishingStrategy", | ||
Message: "IngressController has LoadBalancer endpoint publishing strategy", | ||
Reason: "WantedByEndpointPublishingStrategy", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I threw this in for discussion. Reflecting on the k8s API guidelines I wonder if this condition type should have been "LoadBalancerUnmanaged=True" — too late! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
We already violate that guideline with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any thoughts on the Reason I'm proposing here? I can revert if the old one is better (and am open to new suggestions). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No preference. I'm not sure a reason is strictly necessary for the "normal" state, but what you have is fine. |
||
Message: "The endpoint publishing strategy supports a managed load balancer", | ||
}) | ||
|
||
switch { | ||
case c.contains(&corev1.ServiceList{}, "pending-for", ic.Name): | ||
case service == nil: | ||
conditions = append(conditions, operatorv1.OperatorCondition{ | ||
Type: operatorv1.LoadBalancerReadyIngressConditionType, | ||
Status: operatorv1.ConditionFalse, | ||
Reason: "LoadBalancerPending", | ||
Message: "The LoadBalancer service is pending", | ||
Reason: "ServiceNotFound", | ||
Message: "The LoadBalancer service resource is missing", | ||
}) | ||
case c.contains(&corev1.ServiceList{}, "provisioned-for", ic.Name): | ||
case isProvisioned(service): | ||
conditions = append(conditions, operatorv1.OperatorCondition{ | ||
Type: operatorv1.LoadBalancerReadyIngressConditionType, | ||
Status: operatorv1.ConditionTrue, | ||
Reason: "LoadBalancerProvisioned", | ||
Message: "The LoadBalancer service is provisioned", | ||
}) | ||
default: | ||
case isPending(service): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not make this the default case? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Miciah if "Pending" is the default case, does the current default There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. It is dead code anyway. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I went ahead and removed the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's fine. I wonder whether the compiler is smart enough (and the language semantics are flexible enough) to optimize the |
||
reason := "LoadBalancerPending" | ||
message := "The LoadBalancer service is pending" | ||
|
||
// Try and find a more specific reason for for the pending status. | ||
createFailedReason := "CreatingLoadBalancerFailed" | ||
failedLoadBalancerEvents := getEventsByReason(operandEvents, "service-controller", createFailedReason) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps these should be sorted by time, descending? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This would be useful once we check for more than 1 event type, but otherwise it is not strictly necessary since we ignore events once the LB is provisioned. |
||
for _, event := range failedLoadBalancerEvents { | ||
involved := event.InvolvedObject | ||
if involved.Kind == "Service" && involved.Namespace == service.Namespace && involved.Name == service.Name { | ||
reason = "CreatingLoadBalancerFailed" | ||
message = fmt.Sprintf("The %s component is reporting CreatingLoadBalancerFailed events like: %s\n%s", | ||
event.Source.Component, event.Message, "The kube-controller-manager logs may contain more details.") | ||
break | ||
} | ||
} | ||
conditions = append(conditions, operatorv1.OperatorCondition{ | ||
Type: operatorv1.LoadBalancerReadyIngressConditionType, | ||
Status: operatorv1.ConditionFalse, | ||
Reason: "LoadBalancerNotFound", | ||
Message: "The LoadBalancer service resource is missing", | ||
Reason: reason, | ||
Message: message, | ||
}) | ||
} | ||
|
||
return conditions | ||
} | ||
|
||
func isProvisioned(service *corev1.Service) bool { | ||
ingresses := service.Status.LoadBalancer.Ingress | ||
return len(ingresses) > 0 && (len(ingresses[0].Hostname) > 0 || len(ingresses[0].IP) > 0) | ||
} | ||
|
||
func isPending(service *corev1.Service) bool { | ||
return !isProvisioned(service) | ||
} | ||
|
||
func getEventsByReason(events []corev1.Event, component, reason string) []corev1.Event { | ||
filtered := []corev1.Event{} | ||
for i := range events { | ||
event := events[i] | ||
if event.Source.Component == component && event.Reason == reason { | ||
filtered = append(filtered, event) | ||
} | ||
} | ||
return filtered | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"openshift-ingress"
→lbService.Namespace
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, so this is really subtle and dangerous, but
lbService
can benil
. Would love to separately take some action on our prior discussions about:Basically I wonder if there's any local fix right here (where's the canonical place to discover the operand namespace in this context?).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, snap, good point! Well, does it make sense to look for events if the service does not exist?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The lookup isn't dependent on the service, and the lookup is from a cache, so we could choose to provide whatever context we can (consistent with the other possibly-nil inputs).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the service doesn't exist, then
computeLoadBalancerStatus
won't even look at the events, and logically, why would it? Do we anticipate that we might with future changes care about events that are not related to the service?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
computeLoadBalancerStatus
won't, but this function doesn't know that. This function just knowscomputeLoadBalancerStatus
wants events in a namespace. I propose this contract:😁
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That seems reasonable... or would it be simpler for
syncIngressControllerStatus
to look up events itself?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
could perhaps ask the same question regarding all the other arguments... for now unless there's a logic error maybe we can continue the style the discussion on subsequent PRs?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No,
syncIngressControllerStatus
needs to know the ingress controller for which it is updating status, andensureIngressController
just created or got the deployment and service, which are sound reasons forensureIngressController
to pass those values tosyncIngressControllerStatus
, whereas listing events inensureIngressController
instead of insyncIngressControllerStatus
gratuitously separates the logic of listing events from the logic that determines whether the events need to be listed.That is not to say that your current approach is unacceptable; the above is only responding to the above-quoted assertion.
That said, if
ensureIngressController
does handling listing events but gets an error, how about passing a nil slice tosyncIngressControllerStatus
?That's fine.