diff --git a/pkg/cloudprovider/cloudprovider.go b/pkg/cloudprovider/cloudprovider.go index 8db68862d..6138f29d1 100644 --- a/pkg/cloudprovider/cloudprovider.go +++ b/pkg/cloudprovider/cloudprovider.go @@ -18,7 +18,6 @@ import ( "github.com/Azure/go-autorest/autorest/azure" "github.com/golang/glog" yaml "gopkg.in/yaml.v2" - corev1 "k8s.io/api/core/v1" ) // Client is a cloud provider client @@ -31,10 +30,10 @@ type Client struct { // ClientInt client interface type ClientInt interface { - RemoveUserMSI(userAssignedMSIID string, node *corev1.Node) error - AssignUserMSI(userAssignedMSIID string, node *corev1.Node) error - UpdateUserMSI(addUserAssignedMSIIDs []string, removeUserAssignedMSIIDs []string, node *corev1.Node) error - GetUserMSIs(node *corev1.Node) ([]string, error) + RemoveUserMSI(userAssignedMSIID, name string, isvmss bool) error + AssignUserMSI(userAssignedMSIID, name string, isvmss bool) error + UpdateUserMSI(addUserAssignedMSIIDs, removeUserAssignedMSIIDs []string, name string, isvmss bool) error + GetUserMSIs(name string, isvmss bool) ([]string, error) } // NewCloudProvider returns a azure cloud provider client @@ -152,9 +151,9 @@ func withInspection() autorest.PrepareDecorator { } } -// GetUserMSIs will return a list of all identities on the node -func (c *Client) GetUserMSIs(node *corev1.Node) ([]string, error) { - idH, _, err := c.getIdentityResource(node) +// GetUserMSIs will return a list of all identities on the node or vmss based on value of isvmss +func (c *Client) GetUserMSIs(name string, isvmss bool) ([]string, error) { + idH, _, err := c.getIdentityResource(name, isvmss) if err != nil { glog.Errorf("GetUserMSIs: get identity resource failed with error %v", err) return nil, err @@ -168,8 +167,8 @@ func (c *Client) GetUserMSIs(node *corev1.Node) ([]string, error) { } // UpdateUserMSI will batch process the removal and addition of ids -func (c *Client) UpdateUserMSI(addUserAssignedMSIIDs []string, removeUserAssignedMSIIDs []string, node *corev1.Node) error { - idH, updateFunc, err := c.getIdentityResource(node) +func (c *Client) UpdateUserMSI(addUserAssignedMSIIDs, removeUserAssignedMSIIDs []string, name string, isvmss bool) error { + idH, updateFunc, err := c.getIdentityResource(name, isvmss) if err != nil { return err } @@ -184,43 +183,43 @@ func (c *Client) UpdateUserMSI(addUserAssignedMSIIDs []string, removeUserAssigne for _, userAssignedMSIID := range removeUserAssignedMSIIDs { requiresUpdate = true if err := info.RemoveUserIdentity(userAssignedMSIID); err != nil { - return fmt.Errorf("could not remove identity from node %s: %v", node.Name, err) + return fmt.Errorf("could not remove identity from node %s: %v", name, err) } } // add new ids to the list for _, userAssignedMSIID := range addUserAssignedMSIIDs { addedToList := info.AppendUserIdentity(userAssignedMSIID) if !addedToList { - glog.V(6).Infof("Identity %s already assigned to node %s. Skipping assignment.", userAssignedMSIID, node.Name) + glog.V(6).Infof("Identity %s already assigned to node %s. Skipping assignment.", userAssignedMSIID, name) } requiresUpdate = requiresUpdate || addedToList } if requiresUpdate { - glog.Infof("Updating user assigned MSIs on %s", node.Name) + glog.Infof("Updating user assigned MSIs on %s", name) timeStarted := time.Now() if err := updateFunc(); err != nil { return err } - glog.V(6).Infof("UpdateUserMSI of %s completed in %s", node.Name, time.Since(timeStarted)) + glog.V(6).Infof("UpdateUserMSI of %s completed in %s", name, time.Since(timeStarted)) } return nil } //RemoveUserMSI - Use the underlying cloud api calls and remove the given user assigned MSI from the vm. -func (c *Client) RemoveUserMSI(userAssignedMSIID string, node *corev1.Node) error { - idH, updateFunc, err := c.getIdentityResource(node) +func (c *Client) RemoveUserMSI(userAssignedMSIID, name string, isvmss bool) error { + idH, updateFunc, err := c.getIdentityResource(name, isvmss) if err != nil { return err } info := idH.IdentityInfo() if info == nil { - glog.Errorf("Identity null for vm: %s ", node.Name) - return fmt.Errorf("identity null for vm: %s ", node.Name) + glog.Errorf("Identity null for vm: %s ", name) + return fmt.Errorf("identity null for vm: %s ", name) } if err := info.RemoveUserIdentity(userAssignedMSIID); err != nil { - return fmt.Errorf("could not remove identity from node %s: %v", node.Name, err) + return fmt.Errorf("could not remove identity from node %s: %v", name, err) } if err := updateFunc(); err != nil { @@ -232,18 +231,18 @@ func (c *Client) RemoveUserMSI(userAssignedMSIID string, node *corev1.Node) erro } // AssignUserMSI - Use the underlying cloud api call and add the given user assigned MSI to the vm -func (c *Client) AssignUserMSI(userAssignedMSIID string, node *corev1.Node) error { +func (c *Client) AssignUserMSI(userAssignedMSIID, name string, isvmss bool) error { // Get the vm using the VmClient // Update the assigned identity into the VM using the CreateOrUpdate - glog.Infof("Find %s in resource group: %s", node.Name, c.Config.ResourceGroupName) + glog.Infof("Find %s in resource group: %s", name, c.Config.ResourceGroupName) timeStarted := time.Now() - idH, updateFunc, err := c.getIdentityResource(node) + idH, updateFunc, err := c.getIdentityResource(name, isvmss) if err != nil { return err } - glog.V(6).Infof("Get of %s completed in %s", node.Name, time.Since(timeStarted)) + glog.V(6).Infof("Get of %s completed in %s", name, time.Since(timeStarted)) info := idH.IdentityInfo() if info == nil { @@ -255,34 +254,17 @@ func (c *Client) AssignUserMSI(userAssignedMSIID string, node *corev1.Node) erro if err := updateFunc(); err != nil { return err } - glog.V(6).Infof("CreateOrUpdate of %s completed in %s", node.Name, time.Since(timeStarted)) + glog.V(6).Infof("CreateOrUpdate of %s completed in %s", name, time.Since(timeStarted)) } else { - glog.V(6).Infof("Identity %s already assigned to node %s. Skipping assignment.", userAssignedMSIID, node.Name) + glog.V(6).Infof("Identity %s already assigned to node %s. Skipping assignment.", userAssignedMSIID, name) } return nil } -func (c *Client) getIdentityResource(node *corev1.Node) (idH IdentityHolder, update func() error, retErr error) { - name := node.Name // fallback in case parsing the provider spec fails +func (c *Client) getIdentityResource(name string, isvmss bool) (idH IdentityHolder, update func() error, retErr error) { rg := c.Config.ResourceGroupName - r, err := ParseResourceID(node.Spec.ProviderID) - if err != nil { - glog.Warningf("Could not parse Azure node resource ID: %v", err) - } - - rt := vmTypeOrDefault(&r, c.Config.VMType) - glog.V(6).Infof("Using resource type %s for node %s", rt, name) - - if r.ResourceGroup != "" { - rg = r.ResourceGroup - } - - if r.ResourceName != "" { - name = r.ResourceName - } - switch rt { - case "vmss": + if isvmss { vmss, err := c.VMSSClient.Get(rg, name) if err != nil { return nil, nil, err @@ -292,16 +274,17 @@ func (c *Client) getIdentityResource(node *corev1.Node) (idH IdentityHolder, upd return c.VMSSClient.CreateOrUpdate(rg, name, vmss) } idH = &vmssIdentityHolder{&vmss} - default: - vm, err := c.VMClient.Get(rg, name) - if err != nil { - return nil, nil, err - } - update = func() error { - return c.VMClient.CreateOrUpdate(rg, name, vm) - } - idH = &vmIdentityHolder{&vm} + return idH, update, nil + } + + vm, err := c.VMClient.Get(rg, name) + if err != nil { + return nil, nil, err + } + update = func() error { + return c.VMClient.CreateOrUpdate(rg, name, vm) } + idH = &vmIdentityHolder{&vm} return idH, update, nil } @@ -315,7 +298,9 @@ var ( ) const ( - VMResourceType = "virtualMachines" + // VMResourceType virtual machine resource type + VMResourceType = "virtualMachines" + // VMSSResourceType virtual machine scale sets resource type VMSSResourceType = "virtualMachineScaleSets" ) diff --git a/pkg/cloudprovider/cloudprovider_test.go b/pkg/cloudprovider/cloudprovider_test.go index 32c035823..df1317a19 100644 --- a/pkg/cloudprovider/cloudprovider_test.go +++ b/pkg/cloudprovider/cloudprovider_test.go @@ -80,63 +80,63 @@ func TestSimple(t *testing.T) { node3 := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node3-0"}, Spec: corev1.NodeSpec{ProviderID: vmProvider}} node4 := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node4-vmss0000000"}, Spec: corev1.NodeSpec{ProviderID: vmssProvider}} - cloudClient.AssignUserMSI("ID0", node0) - cloudClient.AssignUserMSI("ID0", node0) - cloudClient.AssignUserMSI("ID0again", node0) - cloudClient.AssignUserMSI("ID1", node1) - cloudClient.AssignUserMSI("ID2", node2) - cloudClient.AssignUserMSI("ID3", node3) - cloudClient.AssignUserMSI("ID4", node4) + cloudClient.AssignUserMSI("ID0", node0.Name, false) + cloudClient.AssignUserMSI("ID0", node0.Name, false) + cloudClient.AssignUserMSI("ID0again", node0.Name, false) + cloudClient.AssignUserMSI("ID1", node1.Name, false) + cloudClient.AssignUserMSI("ID2", node2.Name, false) + cloudClient.AssignUserMSI("ID3", node3.Name, false) + cloudClient.AssignUserMSI("ID4", node4.Name, true) testMSI := []string{"ID0", "ID0again"} - if !cloudClient.CompareMSI(node0, testMSI) { + if !cloudClient.CompareMSI(node0.Name, false, testMSI) { cloudClient.PrintMSI(t) t.Error("MSI mismatch") } - cloudClient.RemoveUserMSI("ID0", node0) - cloudClient.RemoveUserMSI("ID2", node2) + cloudClient.RemoveUserMSI("ID0", node0.Name, false) + cloudClient.RemoveUserMSI("ID2", node2.Name, false) testMSI = []string{"ID0again"} - if !cloudClient.CompareMSI(node0, testMSI) { + if !cloudClient.CompareMSI(node0.Name, false, testMSI) { cloudClient.PrintMSI(t) t.Error("MSI mismatch") } testMSI = []string{} - if !cloudClient.CompareMSI(node2, testMSI) { + if !cloudClient.CompareMSI(node2.Name, false, testMSI) { cloudClient.PrintMSI(t) t.Error("MSI mismatch") } testMSI = []string{"ID3"} - if !cloudClient.CompareMSI(node3, testMSI) { + if !cloudClient.CompareMSI(node3.Name, false, testMSI) { cloudClient.PrintMSI(t) t.Error("MSI mismatch") } testMSI = []string{"ID4"} - if !cloudClient.CompareMSI(node4, testMSI) { + if !cloudClient.CompareMSI(node4.Name, true, testMSI) { cloudClient.PrintMSI(t) t.Error("MSI mismatch") } // test the UpdateUserMSI interface - cloudClient.UpdateUserMSI([]string{"ID1", "ID2", "ID3"}, []string{"ID0again"}, node0) + cloudClient.UpdateUserMSI([]string{"ID1", "ID2", "ID3"}, []string{"ID0again"}, node0.Name, false) testMSI = []string{"ID1", "ID2", "ID3"} - if !cloudClient.CompareMSI(node0, testMSI) { + if !cloudClient.CompareMSI(node0.Name, false, testMSI) { cloudClient.PrintMSI(t) t.Error("MSI mismatch") } - cloudClient.UpdateUserMSI(nil, []string{"ID3"}, node3) + cloudClient.UpdateUserMSI(nil, []string{"ID3"}, node3.Name, false) testMSI = []string{} - if !cloudClient.CompareMSI(node3, testMSI) { + if !cloudClient.CompareMSI(node3.Name, false, testMSI) { cloudClient.PrintMSI(t) t.Error("MSI mismatch") } - cloudClient.UpdateUserMSI([]string{"ID3"}, nil, node4) + cloudClient.UpdateUserMSI([]string{"ID3"}, nil, node4.Name, true) testMSI = []string{"ID4", "ID3"} - if !cloudClient.CompareMSI(node4, testMSI) { + if !cloudClient.CompareMSI(node4.Name, true, testMSI) { cloudClient.PrintMSI(t) t.Error("MSI mismatch") } @@ -295,14 +295,8 @@ func (c *TestCloudClient) ListMSI() (ret map[string]*[]string) { return ret } -func (c *TestCloudClient) CompareMSI(node *corev1.Node, userIDs []string) bool { - r, _ := ParseResourceID(node.Spec.ProviderID) - vmType := vmTypeOrDefault(&r, c.Client.Config.VMType) - name := node.Name - if r.ResourceName != "" { - name = r.ResourceName - } - if vmType == "vmss" { +func (c *TestCloudClient) CompareMSI(name string, isvmss bool, userIDs []string) bool { + if isvmss { return c.testVMSSClient.CompareMSI(name, userIDs) } return c.testVMClient.CompareMSI(name, userIDs) diff --git a/pkg/mic/mic.go b/pkg/mic/mic.go index 757a0c69d..d2d8f5fc4 100644 --- a/pkg/mic/mic.go +++ b/pkg/mic/mic.go @@ -59,8 +59,7 @@ type Client struct { SyncLoopStarted bool syncRetryInterval time.Duration - syncing int32 // protect against conucrrent sync's - statsMutex sync.Mutex + syncing int32 // protect against conucrrent sync's leaderElector *leaderelection.LeaderElector *LeaderElectionConfig @@ -77,6 +76,7 @@ type trackUserAssignedMSIIds struct { removeUserAssignedMSIIDs []string assignedIDsToCreate []aadpodid.AzureAssignedIdentity assignedIDsToDelete []aadpodid.AzureAssignedIdentity + isvmss bool } // NewMICClient returnes new mic client @@ -294,6 +294,8 @@ func (c *Client) Sync(exit <-chan struct{}) { } glog.V(5).Infof("del: %v, add: %v", deleteList, addList) + // the node map is used to track assigned ids to create/delete, identities to assign/remove + // for each node or vmss nodeMap := make(map[string]trackUserAssignedMSIIds) // seperate the add and delete list per node @@ -310,8 +312,15 @@ func (c *Client) Sync(exit <-chan struct{}) { c.getListOfIdsToAssign(*addList, nodeMap) } - // one final createorupdate to each node in the map - c.updateNodeAndDeps(newAssignedIDs, nodeMap, nodeRefs) + var wg sync.WaitGroup + + // check if vmss and consolidate vmss nodes into vmss if necessary + c.consolidateVMSSNodes(nodeMap, &wg) + + // one final createorupdate to each node or vmss in the map + c.updateNodeAndDeps(newAssignedIDs, nodeMap, nodeRefs, &wg) + + wg.Wait() if workDone { idsFound := 0 @@ -340,6 +349,7 @@ func (c *Client) convertAssignedIDListToMap(addList, deleteList *[]aadpodid.Azur nodeMap[createID.Spec.NodeName] = trackUserAssignedMSIIds{assignedIDsToCreate: []aadpodid.AzureAssignedIdentity{createID}} } } + if deleteList != nil { for _, delID := range *deleteList { if trackList, ok := nodeMap[delID.Spec.NodeName]; ok { @@ -632,8 +642,8 @@ func (c *Client) checkIfMSIExistsOnNode(id *aadpodid.AzureIdentity, nodeName str return false } -func (c *Client) getUserMSIListForNode(node *corev1.Node) ([]string, error) { - return c.CloudClient.GetUserMSIs(node) +func (c *Client) getUserMSIListForNode(nodeOrVMSSName string, isvmss bool) ([]string, error) { + return c.CloudClient.GetUserMSIs(nodeOrVMSSName, isvmss) } func (c *Client) convertIDListToMap(arr []aadpodid.AzureIdentity) (m map[string]aadpodid.AzureIdentity, err error) { @@ -702,19 +712,17 @@ func (c *Client) updateAssignedIdentityStatus(assignedID *aadpodid.AzureAssigned return c.CRDClient.UpdateAzureAssignedIdentityStatus(assignedID, status) } -func (c *Client) updateNodeAndDeps(newAssignedIDs []aadpodid.AzureAssignedIdentity, nodeMap map[string]trackUserAssignedMSIIds, nodeRefs map[string]bool) { - var wg sync.WaitGroup +func (c *Client) updateNodeAndDeps(newAssignedIDs []aadpodid.AzureAssignedIdentity, nodeMap map[string]trackUserAssignedMSIIds, nodeRefs map[string]bool, wg *sync.WaitGroup) { for nodeName, nodeTrackList := range nodeMap { wg.Add(1) - go c.updateUserMSI(newAssignedIDs, nodeName, nodeTrackList, nodeRefs, &wg) + go c.updateUserMSI(newAssignedIDs, nodeName, nodeTrackList, nodeRefs, wg) } - wg.Wait() } -func (c *Client) updateUserMSI(newAssignedIDs []aadpodid.AzureAssignedIdentity, nodeName string, nodeTrackList trackUserAssignedMSIIds, nodeRefs map[string]bool, wg *sync.WaitGroup) { +func (c *Client) updateUserMSI(newAssignedIDs []aadpodid.AzureAssignedIdentity, nodeOrVMSSName string, nodeTrackList trackUserAssignedMSIIds, nodeRefs map[string]bool, wg *sync.WaitGroup) { defer wg.Done() beginAdding := time.Now() - glog.Infof("Processing node %s, add [%d], del [%d]", nodeName, len(nodeTrackList.assignedIDsToCreate), len(nodeTrackList.assignedIDsToDelete)) + glog.Infof("Processing node %s, add [%d], del [%d]", nodeOrVMSSName, len(nodeTrackList.assignedIDsToCreate), len(nodeTrackList.assignedIDsToDelete)) for _, createID := range nodeTrackList.assignedIDsToCreate { if createID.Status.Status == "" { @@ -736,27 +744,12 @@ func (c *Client) updateUserMSI(newAssignedIDs []aadpodid.AzureAssignedIdentity, addUserAssignedMSIIDs := c.getUniqueIDs(nodeTrackList.addUserAssignedMSIIDs) removeUserAssignedMSIIDs := c.getUniqueIDs(nodeTrackList.removeUserAssignedMSIIDs) - node, err := c.NodeClient.Get(nodeName) - if err != nil { - if !strings.Contains(err.Error(), "not found") { - glog.Errorf("Unable to get node %s while updating user msis. Error %v", nodeName, err) - return - } - - glog.Warningf("Unable to get node %s while updating user msis. Error %v", nodeName, err) - - // node is no longer found in the cluster, all the assigned identities that were created in this sync loop - // and those that already exist for this node need to be deleted. - c.cleanUpAllAssignedIdentitiesOnNode(nodeName, nodeTrackList) - return - } - - err = c.CloudClient.UpdateUserMSI(addUserAssignedMSIIDs, removeUserAssignedMSIIDs, node) + err := c.CloudClient.UpdateUserMSI(addUserAssignedMSIIDs, removeUserAssignedMSIIDs, nodeOrVMSSName, nodeTrackList.isvmss) if err != nil { - glog.Errorf("Updating msis on node %s, add [%d], del [%d] failed with error %v", nodeName, len(nodeTrackList.assignedIDsToCreate), len(nodeTrackList.assignedIDsToDelete), err) - idList, getErr := c.getUserMSIListForNode(node) + glog.Errorf("Updating msis on node %s, add [%d], del [%d] failed with error %v", nodeOrVMSSName, len(nodeTrackList.assignedIDsToCreate), len(nodeTrackList.assignedIDsToDelete), err) + idList, getErr := c.getUserMSIListForNode(nodeOrVMSSName, nodeTrackList.isvmss) if getErr != nil { - glog.Errorf("Getting list of msis from node %s resulted in error %v", nodeName, getErr) + glog.Errorf("Getting list of msis from node %s resulted in error %v", nodeOrVMSSName, getErr) return } @@ -777,7 +770,7 @@ func (c *Client) updateUserMSI(newAssignedIDs []aadpodid.AzureAssignedIdentity, c.EventRecorder.Event(binding, corev1.EventTypeNormal, "binding applied", fmt.Sprintf("Binding %s applied on node %s for pod %s", binding.Name, createID.Spec.NodeName, createID.Name)) - glog.Infof("Updating msis on node %s failed, but identity %s has successfully been assigned to node", nodeName, binding.Name) + glog.Infof("Updating msis on node %s failed, but identity %s has successfully been assigned to node", createID.Spec.NodeName, binding.Name) // Identity is successfully assigned to node, so update the status of assigned identity to assigned if updateErr := c.updateAssignedIdentityStatus(&createID, aadpodid.AssignedIDAssigned); updateErr != nil { @@ -810,7 +803,7 @@ func (c *Client) updateUserMSI(newAssignedIDs []aadpodid.AzureAssignedIdentity, continue } - glog.Infof("Updating msis on node %s failed, but identity %s has successfully been removed from node", nodeName, removedBinding.Name) + glog.Infof("Updating msis on node %s failed, but identity %s has successfully been removed from node", delID.Spec.NodeName, removedBinding.Name) // remove assigned identity crd from cluster as the identity has successfully been removed from the node err = c.removeAssignedIdentity(&delID) @@ -866,39 +859,82 @@ func (c *Client) updateUserMSI(newAssignedIDs []aadpodid.AzureAssignedIdentity, c.EventRecorder.Event(removedBinding, corev1.EventTypeNormal, "binding removed", fmt.Sprintf("Binding %s removed from node %s for pod %s", removedBinding.Name, delID.Spec.NodeName, delID.Spec.Pod)) } - c.statsMutex.Lock() stats.Put(stats.TotalCreateOrUpdate, time.Since(beginAdding)) - c.statsMutex.Unlock() } // cleanUpAllAssignedIdentitiesOnNode deletes all assigned identities associated with a the node -func (c *Client) cleanUpAllAssignedIdentitiesOnNode(node string, nodeTrackList trackUserAssignedMSIIds) { +func (c *Client) cleanUpAllAssignedIdentitiesOnNode(node string, nodeTrackList trackUserAssignedMSIIds, wg *sync.WaitGroup) { + defer wg.Done() glog.Infof("deleting all assigned identites for node %s", node) - for _, createID := range nodeTrackList.assignedIDsToCreate { - binding := createID.Spec.AzureBindingRef + for _, deleteID := range nodeTrackList.assignedIDsToDelete { + binding := deleteID.Spec.AzureBindingRef - err := c.removeAssignedIdentity(&createID) + err := c.removeAssignedIdentity(&deleteID) if err != nil { c.EventRecorder.Event(binding, corev1.EventTypeWarning, "binding remove error", - fmt.Sprintf("Removing assigned identity binding %s node %s for pod %s resulted in error %v", binding.Name, createID.Spec.NodeName, createID.Name, err.Error())) + fmt.Sprintf("Removing assigned identity binding %s node %s for pod %s resulted in error %v", binding.Name, deleteID.Spec.NodeName, deleteID.Name, err.Error())) glog.Error(err) continue } c.EventRecorder.Event(binding, corev1.EventTypeNormal, "binding removed", - fmt.Sprintf("Binding %s removed from node %s for pod %s", binding.Name, createID.Spec.NodeName, createID.Spec.Pod)) + fmt.Sprintf("Binding %s removed from node %s for pod %s", binding.Name, deleteID.Spec.NodeName, deleteID.Spec.Pod)) } +} - for _, deleteID := range nodeTrackList.assignedIDsToDelete { - binding := deleteID.Spec.AzureBindingRef +// consolidateVMSSNodes takes a list of all nodes that are part of the current sync cycle, checks if the nodes are +// part of vmss and combines the vmss nodes into vmss name. This consolidation is needed because vmss identities +// currently operate on all nodes in the vmss not just a single node. +func (c *Client) consolidateVMSSNodes(nodeMap map[string]trackUserAssignedMSIIds, wg *sync.WaitGroup) { + vmssMap := make(map[string][]string) - err := c.removeAssignedIdentity(&deleteID) + for nodeName, nodeTrackList := range nodeMap { + node, err := c.NodeClient.Get(nodeName) + if err != nil && !strings.Contains(err.Error(), "not found") { + glog.Errorf("Unable to get node %s. Error %v", nodeName, err) + continue + } + if err != nil && strings.Contains(err.Error(), "not found") { + glog.Warningf("Unable to get node %s while updating user msis. Error %v", nodeName, err) + wg.Add(1) + // node is no longer found in the cluster, all the assigned identities that were created in this sync loop + // and those that already exist for this node need to be deleted. + go c.cleanUpAllAssignedIdentitiesOnNode(nodeName, nodeTrackList, wg) + delete(nodeMap, nodeName) + continue + } + vmssName, isvmss, err := isVMSS(node) if err != nil { - c.EventRecorder.Event(binding, corev1.EventTypeWarning, "binding remove error", - fmt.Sprintf("Removing assigned identity binding %s node %s for pod %s resulted in error %v", binding.Name, deleteID.Spec.NodeName, deleteID.Name, err.Error())) - glog.Error(err) + glog.Errorf("error checking if node %s is vmss. Error: %v", nodeName, err) continue } - c.EventRecorder.Event(binding, corev1.EventTypeNormal, "binding removed", - fmt.Sprintf("Binding %s removed from node %s for pod %s", binding.Name, deleteID.Spec.NodeName, deleteID.Spec.Pod)) + if isvmss { + if nodes, ok := vmssMap[vmssName]; ok { + nodes = append(nodes, nodeName) + vmssMap[vmssName] = nodes + continue + } + vmssMap[vmssName] = []string{nodeName} + } + } + + // aggregate vmss nodes into vmss name + for vmssName, vmssNodes := range vmssMap { + if len(vmssNodes) < 1 { + continue + } + + vmssTrackList := trackUserAssignedMSIIds{} + + for _, vmssNode := range vmssNodes { + vmssTrackList.addUserAssignedMSIIDs = append(vmssTrackList.addUserAssignedMSIIDs, nodeMap[vmssNode].addUserAssignedMSIIDs...) + vmssTrackList.removeUserAssignedMSIIDs = append(vmssTrackList.removeUserAssignedMSIIDs, nodeMap[vmssNode].removeUserAssignedMSIIDs...) + vmssTrackList.assignedIDsToCreate = append(vmssTrackList.assignedIDsToCreate, nodeMap[vmssNode].assignedIDsToCreate...) + vmssTrackList.assignedIDsToDelete = append(vmssTrackList.assignedIDsToDelete, nodeMap[vmssNode].assignedIDsToDelete...) + vmssTrackList.isvmss = true + + delete(nodeMap, vmssNode) + + nodeMap[getVMSSName(vmssName)] = vmssTrackList + } } } diff --git a/pkg/mic/vmss.go b/pkg/mic/vmss.go index 855248878..320fc3676 100644 --- a/pkg/mic/vmss.go +++ b/pkg/mic/vmss.go @@ -96,6 +96,11 @@ func makeVMSSID(r azure.Resource) string { return path.Join(r.SubscriptionID, r.ResourceGroup, r.ResourceName) } +func getVMSSName(vmssID string) string { + _, resourceName := path.Split(vmssID) + return resourceName +} + // Either get a vmss group by node reference or lookup the vmss ID from the node's provider ID. // The reason for this is we may have request to delete an identity from a node and it is the last identity, so // the node will not be referenced by any pods and will be absent from the group list diff --git a/test/e2e/aadpodidentity_test.go b/test/e2e/aadpodidentity_test.go index d9776a3dd..5fb9ce3fa 100644 --- a/test/e2e/aadpodidentity_test.go +++ b/test/e2e/aadpodidentity_test.go @@ -609,6 +609,9 @@ var _ = Describe("Kubernetes cluster using aad-pod-identity", func() { Expect(err).NotTo(HaveOccurred()) Expect(ok).To(Equal(true)) + // TODO (aramase) make this deterministic by ensuring pods with desired image are running + time.Sleep(30 * time.Second) + ok, err = waitForIPTableRulesToExist("busybox") Expect(err).NotTo(HaveOccurred()) Expect(ok).To(Equal(true))