Skip to content

Commit c8005ed

Browse files
CNS - consider multiple IPs for a pod in reconciliation after restart (#2079)
* modifying reconcile function to take multiple ncs instead of one. we need this because for reconciliation in dual stack cases both IPs for a pod which can come from distinct ncs must be added at the same time * adding comments, and renaming functions and types, to make the intent clearer * adding some dummy values to cns.NewPodInfo invocations in tests, instead of empty strings since we need distinct interface ids * adding a basic test for dual stack reconciliation * adding validation for multiple ips for the same ip family on the same pod, which is not allowed * changing direct use of interface id to pod key, which is better for reconcile flows using information from kubernetes instead of cni * fixing comments to use host network terminology instead of system pod * improving error message on duplicate ip from cni found; improving readability of error handling on ip parsing * only checking for pod ips that are actually set on a pod
1 parent 0e4fbdd commit c8005ed

File tree

5 files changed

+443
-85
lines changed

5 files changed

+443
-85
lines changed

cns/client/client_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ func TestCNSClientPodContextApi(t *testing.T) {
314314

315315
addTestStateToRestServer(t, secondaryIps)
316316

317-
podInfo := cns.NewPodInfo("", "", podName, podNamespace)
317+
podInfo := cns.NewPodInfo("some-guid-1", "abc-eth0", podName, podNamespace)
318318
orchestratorContext, err := json.Marshal(podInfo)
319319
assert.NoError(t, err)
320320

@@ -344,7 +344,7 @@ func TestCNSClientDebugAPI(t *testing.T) {
344344

345345
addTestStateToRestServer(t, secondaryIps)
346346

347-
podInfo := cns.NewPodInfo("", "", podName, podNamespace)
347+
podInfo := cns.NewPodInfo("some-guid-1", "abc-eth0", podName, podNamespace)
348348
orchestratorContext, err := json.Marshal(podInfo)
349349
assert.NoError(t, err)
350350

cns/cnireconciler/podinfoprovider.go

+12-9
Original file line numberDiff line numberDiff line change
@@ -51,19 +51,22 @@ func newCNIPodInfoProvider(exec exec.Interface) (cns.PodInfoByIPProvider, error)
5151
}
5252

5353
// cniStateToPodInfoByIP converts an AzureCNIState dumped from a CNI exec
54-
// into a PodInfo map, using the first endpoint IP as the key in the map.
54+
// into a PodInfo map, using the endpoint IPs as keys in the map.
55+
// for pods with multiple IPs (such as in dualstack cases), this means multiple keys in the map
56+
// will point to the same pod information.
5557
func cniStateToPodInfoByIP(state *api.AzureCNIState) (map[string]cns.PodInfo, error) {
5658
podInfoByIP := map[string]cns.PodInfo{}
5759
for _, endpoint := range state.ContainerInterfaces {
58-
if _, ok := podInfoByIP[endpoint.IPAddresses[0].IP.String()]; ok {
59-
return nil, errors.Wrap(cns.ErrDuplicateIP, endpoint.IPAddresses[0].IP.String())
60+
for _, epIP := range endpoint.IPAddresses {
61+
podInfo := cns.NewPodInfo(endpoint.ContainerID, endpoint.PodEndpointId, endpoint.PodName, endpoint.PodNamespace)
62+
63+
ipKey := epIP.IP.String()
64+
if prevPodInfo, ok := podInfoByIP[ipKey]; ok {
65+
return nil, errors.Wrapf(cns.ErrDuplicateIP, "duplicate ip %s found for different pods: pod: %+v, pod: %+v", ipKey, podInfo, prevPodInfo)
66+
}
67+
68+
podInfoByIP[ipKey] = podInfo
6069
}
61-
podInfoByIP[endpoint.IPAddresses[0].IP.String()] = cns.NewPodInfo(
62-
endpoint.ContainerID,
63-
endpoint.PodEndpointId,
64-
endpoint.PodName,
65-
endpoint.PodNamespace,
66-
)
6770
}
6871
return podInfoByIP, nil
6972
}

cns/restserver/internalapi.go

+140-31
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"context"
99
"encoding/json"
1010
"fmt"
11+
"net"
1112
"net/http"
1213
"net/http/httptest"
1314
"reflect"
@@ -274,57 +275,165 @@ func (service *HTTPRestService) syncHostNCVersion(ctx context.Context, channelMo
274275
return len(programmedNCs), nil
275276
}
276277

277-
// This API will be called by CNS RequestController on CRD update.
278-
func (service *HTTPRestService) ReconcileNCState(ncRequest *cns.CreateNetworkContainerRequest, podInfoByIP map[string]cns.PodInfo, nnc *v1alpha.NodeNetworkConfig) types.ResponseCode {
279-
logger.Printf("Reconciling NC state with CreateNCRequest: [%v], PodInfo [%+v], NNC: [%+v]", ncRequest, podInfoByIP, nnc)
280-
// check if ncRequest is null, then return as there is no CRD state yet
281-
if ncRequest == nil {
278+
func (service *HTTPRestService) ReconcileIPAMState(ncReqs []*cns.CreateNetworkContainerRequest, podInfoByIP map[string]cns.PodInfo, nnc *v1alpha.NodeNetworkConfig) types.ResponseCode {
279+
logger.Printf("Reconciling CNS IPAM state with nc requests: [%+v], PodInfo [%+v], NNC: [%+v]", ncReqs, podInfoByIP, nnc)
280+
// if no nc reqs, there is no CRD state yet
281+
if len(ncReqs) == 0 {
282282
logger.Printf("CNS starting with no NC state, podInfoMap count %d", len(podInfoByIP))
283283
return types.Success
284284
}
285285

286-
// If the NC was created successfully, then reconcile the assigned pod state
287-
returnCode := service.CreateOrUpdateNetworkContainerInternal(ncRequest)
288-
if returnCode != types.Success {
289-
return returnCode
286+
// first step in reconciliation is to create all the NCs in CNS, no IP assignment yet.
287+
for _, ncReq := range ncReqs {
288+
returnCode := service.CreateOrUpdateNetworkContainerInternal(ncReq)
289+
if returnCode != types.Success {
290+
return returnCode
291+
}
290292
}
291293

292-
// now parse the secondaryIP list, if it exists in PodInfo list, then assign that ip.
293-
for _, secIPConfig := range ncRequest.SecondaryIPConfigs {
294-
if podInfo, exists := podInfoByIP[secIPConfig.IPAddress]; exists {
295-
logger.Printf("SecondaryIP %+v is assigned to Pod. %+v, ncId: %s", secIPConfig, podInfo, ncRequest.NetworkContainerid)
294+
// index all the secondary IP configs for all the nc reqs, for easier lookup later on.
295+
allSecIPsIdx := make(map[string]*cns.CreateNetworkContainerRequest)
296+
for i := range ncReqs {
297+
for _, secIPConfig := range ncReqs[i].SecondaryIPConfigs {
298+
allSecIPsIdx[secIPConfig.IPAddress] = ncReqs[i]
299+
}
300+
}
296301

297-
jsonContext, err := podInfo.OrchestratorContext()
298-
if err != nil {
299-
logger.Errorf("Failed to marshal KubernetesPodInfo, error: %v", err)
300-
return types.UnexpectedError
301-
}
302+
// we now need to reconcile IP assignment.
303+
// considering that a single pod may have multiple ips (such as in dual stack scenarios)
304+
// and that IP assignment in CNS (as done by requestIPConfigsHelper) does not allow
305+
// updates (it returns the existing state if one already exists for the pod's interface),
306+
// we need to assign all IPs for a pod interface or name+namespace at the same time.
307+
//
308+
// iterating over single IPs is not appropriate then, since assignment for the first IP for
309+
// a pod will prevent the second IP from being added. the following function call transforms
310+
// pod info indexed by ip address:
311+
//
312+
// {
313+
// "10.0.0.1": podInfo{interface: "aaa-eth0"},
314+
// "fe80::1": podInfo{interface: "aaa-eth0"},
315+
// }
316+
//
317+
// to pod IPs indexed by pod key (interface or name+namespace, depending on scenario):
318+
//
319+
// {
320+
// "aaa-eth0": podIPs{v4IP: 10.0.0.1, v6IP: fe80::1}
321+
// }
322+
//
323+
// such that we can iterate over pod interfaces, and assign all IPs for it at once.
324+
podKeyToPodIPs, err := newPodKeyToPodIPsMap(podInfoByIP)
325+
if err != nil {
326+
logger.Errorf("could not transform pods indexed by IP address to pod IPs indexed by interface: %v", err)
327+
return types.UnexpectedError
328+
}
302329

303-
ipconfigsRequest := cns.IPConfigsRequest{
304-
DesiredIPAddresses: []string{secIPConfig.IPAddress},
305-
OrchestratorContext: jsonContext,
306-
InfraContainerID: podInfo.InfraContainerID(),
307-
PodInterfaceID: podInfo.InterfaceID(),
308-
}
330+
for podKey, podIPs := range podKeyToPodIPs {
331+
var (
332+
desiredIPs []string
333+
ncIDs []string
334+
)
335+
336+
var ips []net.IP
337+
if podIPs.v4IP != nil {
338+
ips = append(ips, podIPs.v4IP)
339+
}
309340

310-
if _, err := requestIPConfigsHelper(service, ipconfigsRequest); err != nil {
311-
logger.Errorf("AllocateIPConfig failed for SecondaryIP %+v, podInfo %+v, ncId %s, error: %v", secIPConfig, podInfo, ncRequest.NetworkContainerid, err)
312-
return types.FailedToAllocateIPConfig
341+
if podIPs.v6IP != nil {
342+
ips = append(ips, podIPs.v6IP)
343+
}
344+
345+
for _, ip := range ips {
346+
if ncReq, ok := allSecIPsIdx[ip.String()]; ok {
347+
logger.Printf("secondary ip %s is assigned to pod %+v, ncId: %s ncVersion: %s", ip, podIPs, ncReq.NetworkContainerid, ncReq.Version)
348+
desiredIPs = append(desiredIPs, ip.String())
349+
ncIDs = append(ncIDs, ncReq.NetworkContainerid)
350+
} else {
351+
// it might still be possible to see host networking pods here (where ips are not from ncs) if we are restoring using the kube podinfo provider
352+
// todo: once kube podinfo provider reconcile flow is removed, this line will not be necessary/should be removed.
353+
logger.Errorf("ip %s assigned to pod %+v but not found in any nc", ip, podIPs)
313354
}
314-
} else {
315-
logger.Printf("SecondaryIP %+v is not assigned. ncId: %s", secIPConfig, ncRequest.NetworkContainerid)
355+
}
356+
357+
if len(desiredIPs) == 0 {
358+
// this may happen for pods in the host network
359+
continue
360+
}
361+
362+
jsonContext, err := podIPs.OrchestratorContext()
363+
if err != nil {
364+
logger.Errorf("Failed to marshal KubernetesPodInfo, error: %v", err)
365+
return types.UnexpectedError
366+
}
367+
368+
ipconfigsRequest := cns.IPConfigsRequest{
369+
DesiredIPAddresses: desiredIPs,
370+
OrchestratorContext: jsonContext,
371+
InfraContainerID: podIPs.InfraContainerID(),
372+
PodInterfaceID: podIPs.InterfaceID(),
373+
}
374+
375+
if _, err := requestIPConfigsHelper(service, ipconfigsRequest); err != nil {
376+
logger.Errorf("requestIPConfigsHelper failed for pod key %s, podInfo %+v, ncIds %v, error: %v", podKey, podIPs, ncIDs, err)
377+
return types.FailedToAllocateIPConfig
316378
}
317379
}
318380

319-
err := service.MarkExistingIPsAsPendingRelease(nnc.Spec.IPsNotInUse)
320-
if err != nil {
381+
if err := service.MarkExistingIPsAsPendingRelease(nnc.Spec.IPsNotInUse); err != nil {
321382
logger.Errorf("[Azure CNS] Error. Failed to mark IPs as pending %v", nnc.Spec.IPsNotInUse)
322383
return types.UnexpectedError
323384
}
324385

325386
return 0
326387
}
327388

389+
var (
390+
errIPParse = errors.New("parse IP")
391+
errMultipleIPPerFamily = errors.New("multiple IPs per family")
392+
)
393+
394+
// newPodKeyToPodIPsMap groups IPs by interface id and returns them indexed by interface id.
395+
func newPodKeyToPodIPsMap(podInfoByIP map[string]cns.PodInfo) (map[string]podIPs, error) {
396+
podKeyToPodIPs := make(map[string]podIPs)
397+
398+
for ipStr, podInfo := range podInfoByIP {
399+
id := podInfo.Key()
400+
401+
ips, ok := podKeyToPodIPs[id]
402+
if !ok {
403+
ips.PodInfo = podInfo
404+
}
405+
406+
ip := net.ParseIP(ipStr)
407+
switch {
408+
case ip == nil:
409+
return nil, errors.Wrapf(errIPParse, "could not parse ip string %q on pod %+v", ipStr, podInfo)
410+
case ip.To4() != nil:
411+
if ips.v4IP != nil {
412+
return nil, errors.Wrapf(errMultipleIPPerFamily, "multiple ipv4 addresses (%v, %v) associated to pod %+v", ips.v4IP, ip, podInfo)
413+
}
414+
415+
ips.v4IP = ip
416+
case ip.To16() != nil:
417+
if ips.v6IP != nil {
418+
return nil, errors.Wrapf(errMultipleIPPerFamily, "multiple ipv6 addresses (%v, %v) associated to pod %+v", ips.v6IP, ip, podInfo)
419+
}
420+
421+
ips.v6IP = ip
422+
}
423+
424+
podKeyToPodIPs[id] = ips
425+
}
426+
427+
return podKeyToPodIPs, nil
428+
}
429+
430+
// podIPs are all the IPs associated with a pod, along with pod info
431+
type podIPs struct {
432+
cns.PodInfo
433+
v4IP net.IP
434+
v6IP net.IP
435+
}
436+
328437
// GetNetworkContainerInternal gets network container details.
329438
func (service *HTTPRestService) GetNetworkContainerInternal(
330439
req cns.GetNetworkContainerRequest,

0 commit comments

Comments
 (0)