From 7a4766d0ae230737d414048a317185c3fcad3559 Mon Sep 17 00:00:00 2001 From: Shreyas Badiger <7680410+shreyas-badiger@users.noreply.github.com> Date: Wed, 21 Jul 2021 12:41:46 -0700 Subject: [PATCH] upgrade-manager-v2: Add CI github action, fix lint errors. (#276) * upgrade-manager-v2: Fix unit tests (#275) * Delete README.md Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * delete all Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * scaffolding Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * add API Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * initial code Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * add more scaffolding Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * Add kubernetes API calls Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * aws API calls Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * AWS API calls & Drift detection Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * initial rotation logic Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * Implemented RollingUpgrade object validation. (#176) * Validation step to check Nodes and ASG launch configs Signed-off-by: shreyas-badiger * Validating launch definition after a rolling upgrade Signed-off-by: shreyas-badiger Signed-off-by: sbadiger * Fix all the "make vet" errors in Controller V2 branch. (#177) * Validation step to check Nodes and ASG launch configs Signed-off-by: shreyas-badiger Signed-off-by: sbadiger * Validating launch definition after a rolling upgrade Signed-off-by: shreyas-badiger Signed-off-by: sbadiger * Resolve error log message and return statement Signed-off-by: shreyas-badiger Signed-off-by: sbadiger * Adding Functional Test (#113) * Adding BDD, workflow and badge * Changing CI workflow job name * Adding make manifests * Clarifying cron time zone comment Signed-off-by: shreyas-badiger Signed-off-by: sbadiger * release 0.13 (#115) * release 0.13 * Update CHANGELOG.md Signed-off-by: shreyas-badiger Signed-off-by: sbadiger * bump version (#116) Signed-off-by: shreyas-badiger Signed-off-by: sbadiger * Repo selection for CI and BDD workflows & CI step for releases (#117) * CI-BDD not on forks & Step for releases (#2) * Testing CI-BDD not on forks & Step for releases * Adding step for image with tag git-tag Signed-off-by: shreyas-badiger Signed-off-by: sbadiger * Terminate unjoined nodes Signed-off-by: shreyas-badiger Signed-off-by: sbadiger * Resolving PR comments Signed-off-by: shreyas-badiger Signed-off-by: sbadiger * Set version and update CHANGELOG for version 0.14. (#121) Co-authored-by: Shri Javadekar Signed-off-by: shreyas-badiger Signed-off-by: sbadiger * Bump version to 0.15-dev. Signed-off-by: shreyas-badiger Signed-off-by: sbadiger * Fix typo in README.md. (#125) Signed-off-by: shreyas-badiger Signed-off-by: sbadiger * Ignore the terminated instance during upgrade Signed-off-by: shreyas-badiger Signed-off-by: sbadiger * Added WARNING prefix in the logging Signed-off-by: shreyas-badiger Signed-off-by: sbadiger * Apply suggestions from code review Co-authored-by: Kevin Downey Signed-off-by: shreyas-badiger Signed-off-by: sbadiger * Capitalize sprintf to Sprintf Signed-off-by: shreyas-badiger Signed-off-by: sbadiger * Upgrade to Go 1.15 (#128) Signed-off-by: Oleg Atamanenko Signed-off-by: shreyas-badiger Signed-off-by: sbadiger * Fix few typos and simplify error returns, remove redundant types (#131) Signed-off-by: Oleg Atamanenko Signed-off-by: shreyas-badiger Signed-off-by: sbadiger * Readiness gates implementation for eager mode (#130) Signed-off-by: Oleg Atamanenko Signed-off-by: shreyas-badiger Signed-off-by: sbadiger * Adding Functional Test (#113) * Adding BDD, workflow and badge * Changing CI workflow job name * Adding make manifests * Clarifying cron time zone comment Signed-off-by: sbadiger * Validation step to check Nodes and ASG launch configs (#112) * Validation step to check Nodes and ASG launch configs * Validating launch definition after a rolling upgrade * Resolve error log message and return statement Co-authored-by: Eytan Avisror Signed-off-by: sbadiger * release 0.13 (#115) * release 0.13 * Update CHANGELOG.md Signed-off-by: sbadiger * bump version (#116) Signed-off-by: sbadiger * Repo selection for CI and BDD workflows & CI step for releases (#117) * CI-BDD not on forks & Step for releases (#2) * Testing CI-BDD not on forks & Step for releases * Adding step for image with tag git-tag Signed-off-by: sbadiger * Terminate unjoined nodes (#120) * Validation step to check Nodes and ASG launch configs * Validating launch definition after a rolling upgrade * Resolve error log message and return statement * Terminate unjoined nodes * Resolving PR comments Co-authored-by: Eytan Avisror Signed-off-by: sbadiger * Set version and update CHANGELOG for version 0.14. (#121) Co-authored-by: Shri Javadekar Signed-off-by: sbadiger * Bump version to 0.15-dev. Signed-off-by: sbadiger * Fix bug when switching to launch templates (#136) * Update rollingupgrade_controller.go * Update rollingupgrade_controller.go Signed-off-by: Eytan Avisror * spacing fixes Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * Extract script runner to a separate type; fix work with env. variables (#132) Signed-off-by: Oleg Atamanenko Signed-off-by: sbadiger * Set version and update CHANGELOG for version v0.15 (#137) Signed-off-by: Shri Javadekar Signed-off-by: sbadiger * Bump version to v0.16-dev. Signed-off-by: Shri Javadekar Signed-off-by: sbadiger * Propagate parent env variables to allow to talk with API Server (#144) Signed-off-by: Oleg Atamanenko Signed-off-by: sbadiger * Bump Golang CI action to fix failed CI run (#146) Signed-off-by: Oleg Atamanenko Signed-off-by: sbadiger * Simplify (#145) Signed-off-by: Oleg Atamanenko Signed-off-by: sbadiger * Add Expiration to cache and do not refresh ASG if cache is not expired (#143) Signed-off-by: Oleg Atamanenko Co-authored-by: Shri Javadekar Signed-off-by: sbadiger * Fix documentation for uniform across AZ Update strategy and fix typos (#147) Signed-off-by: Oleg Atamanenko Signed-off-by: sbadiger * Move cluster state from package level to a cluster state impl (#148) Signed-off-by: Oleg Atamanenko Signed-off-by: sbadiger * Simplify work with intstr type. (#149) Signed-off-by: Oleg Atamanenko Signed-off-by: sbadiger * If instance is in standby mode already, just return (#138) Signed-off-by: Oleg Atamanenko Co-authored-by: Shri Javadekar Signed-off-by: sbadiger * Handle terminated instances gracefully. (#150) Signed-off-by: Oleg Atamanenko Signed-off-by: sbadiger * Template version comparison fix (#155) * get template version Signed-off-by: Eytan Avisror * fix tests Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * release 0.16 (#157) Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * bump version to 0.17-dev (#158) Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * Don't uncordon node on failure to run postDrain script when IgnoreDrainFailures set (#151) * Don't uncordon node on failure to run postDrain script when IgnoreDrainFailures set Signed-off-by: Adam Malcontenti-Wilson * Test node uncordon when postDrain / postDrainWait script fails Signed-off-by: Adam Malcontenti-Wilson Signed-off-by: sbadiger * Abort on strategy failure instead of continuing (#152) * Abort on strategy failure instead of continuing Signed-off-by: Adam Malcontenti-Wilson * Remove unformatted error message placeholder Signed-off-by: Adam Malcontenti-Wilson * Explictly specify strategy for tests Signed-off-by: Adam Malcontenti-Wilson Signed-off-by: sbadiger * use NamespacedName (#160) Signed-off-by: Eytan Avisror Co-authored-by: Shri Javadekar Signed-off-by: sbadiger * Set version and update CHANGELOG for version v0.17 (#161) Signed-off-by: Shri Javadekar Signed-off-by: sbadiger * Bump version to v0.18-dev (#162) Signed-off-by: Shri Javadekar Signed-off-by: sbadiger * Move constants to types so that they can be reused (#167) Signed-off-by: Oleg Atamanenko Signed-off-by: sbadiger * Remove separate module for pkg/log (#168) Signed-off-by: Oleg Atamanenko Co-authored-by: Shri Javadekar Signed-off-by: sbadiger * Bump dependencies. (#169) Signed-off-by: Oleg Atamanenko Signed-off-by: sbadiger * use standard fmt.Errorf to format error message; unify error format (#171) Signed-off-by: Oleg Atamanenko Signed-off-by: sbadiger * Fix namespaced name order (#170) Signed-off-by: Oleg Atamanenko Signed-off-by: sbadiger * Add instance id to the logs (#173) Signed-off-by: Oleg Atamanenko Co-authored-by: Shri Javadekar Signed-off-by: sbadiger * Bump golang and busybox (#172) Signed-off-by: Oleg Atamanenko Co-authored-by: Shri Javadekar Signed-off-by: sbadiger * Expose template list and other execution errors to logs (#166) * Log and return wrapped launchtemplate error Signed-off-by: Adam Malcontenti-Wilson * Expose execution error in logs Signed-off-by: Adam Malcontenti-Wilson Signed-off-by: sbadiger * output can contain other messages from API Server, so be more relaxed (#174) Signed-off-by: Oleg Atamanenko Signed-off-by: sbadiger * Delete README.md Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * delete all Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * scaffolding Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * add API Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * initial code Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * add more scaffolding Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * Add kubernetes API calls Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * aws API calls Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * AWS API calls & Drift detection Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * validate() function Signed-off-by: shreyas-badiger Signed-off-by: sbadiger * modified validate() Signed-off-by: sbadiger * modified validate() Signed-off-by: sbadiger * initial rotation logic Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * basic script_runner without any modifications Signed-off-by: sbadiger * Fix all the vet related errors Signed-off-by: sbadiger Co-authored-by: Alfredo Garo <44888596+garomonegro@users.noreply.github.com> Co-authored-by: Eytan Avisror Co-authored-by: Shri Javadekar Co-authored-by: Shri Javadekar Co-authored-by: Shri Javadekar Co-authored-by: Craig Robson Co-authored-by: Kevin Downey Co-authored-by: Oleg Atamanenko Co-authored-by: Shreyas Badiger <7680410+hard-fault@users.noreply.github.com> Co-authored-by: Adam Malcontenti-Wilson Co-authored-by: Adam Malcontenti-Wilson Co-authored-by: Eytan Avisror Signed-off-by: sbadiger * Controller v2: Implementation of Instance termination (#178) * fix make vet errors. Signed-off-by: sbadiger * Terminate instances and run v2 for first time. Signed-off-by: sbadiger * Addressing review comments Signed-off-by: sbadiger * addressing more review comments Signed-off-by: sbadiger * Log error message Signed-off-by: sbadiger * error handling for instance tagging Signed-off-by: sbadiger * Migrate Script Runner (#179) * Basic script runner Signed-off-by: Eytan Avisror * Update upgrade.go Signed-off-by: Eytan Avisror Signed-off-by: sbadiger * Implemented node drain. (#181) Signed-off-by: sbadiger * Eager mode implementation (#183) * Eager mode implementation Signed-off-by: sbadiger * Metrics features (#189) Signed-off-by: xshao Signed-off-by: sbadiger * Process the batch rotation in parallel (#192) * Process the batch rotation in parallel Signed-off-by: sbadiger * addressing review comments Signed-off-by: sbadiger * Move the DrainManager within ReplaceBatch(), to access one per RollingUpgrade CR (#195) Signed-off-by: sbadiger * Refine metrics implementation to support goroutines (#196) * Refine the metrics status Signed-off-by: xshao * Refine the metrics status Signed-off-by: xshao * Fix test case error Signed-off-by: xshao * Use group instead of ASG Signed-off-by: xshao Signed-off-by: sbadiger * Ignore generated code (#201) * Refine the metrics status Signed-off-by: xshao * Refine the metrics status Signed-off-by: xshao * Fix test case error Signed-off-by: xshao * Use group instead of ASG Signed-off-by: xshao * Ignore generated code Signed-off-by: xshao * Ignore generated code Signed-off-by: xshao Signed-off-by: sbadiger * Fix bug in deleting the entry in syncMap (#203) Signed-off-by: sbadiger * Unit tests for controller-v2 (#215) * Unit tests Signed-off-by: sbadiger * minor change in accessing the namespace name Signed-off-by: sbadiger * move helper functions to a differnt file Signed-off-by: sbadiger * #2285: rollup CR statistic metrics in v2 (#218) * #2285: rollup CR statistic metrics in v2 Signed-off-by: sbadla1 * #2285: updated metric flags Signed-off-by: sbadla1 * #2285: updated metric flags Signed-off-by: sbadla1 Signed-off-by: sbadiger * #2285: renamed some methods related to metrics (#224) Signed-off-by: sbadla1 Signed-off-by: sbadiger * #2286: removed version from metric namespace (#227) Signed-off-by: sbadla1 Signed-off-by: sbadiger * Create RollingUpgradeContext (#234) * #2285: rollup CR statistic metrics in v2 (#218) * #2285: rollup CR statistic metrics in v2 Signed-off-by: sbadla1 * #2285: updated metric flags Signed-off-by: sbadla1 * #2285: updated metric flags Signed-off-by: sbadla1 Signed-off-by: sbadiger * log cloud discovery failure Signed-off-by: sbadiger * Create RollingUpgrade Context Signed-off-by: sbadiger * rollingupgrade context Signed-off-by: sbadiger Co-authored-by: Sahil Badla Signed-off-by: sbadiger * Resolve compile errors caused by merge conflict. (#235) * #2285: rollup CR statistic metrics in v2 (#218) * #2285: rollup CR statistic metrics in v2 Signed-off-by: sbadla1 * #2285: updated metric flags Signed-off-by: sbadla1 * #2285: updated metric flags Signed-off-by: sbadla1 Signed-off-by: sbadiger * log cloud discovery failure Signed-off-by: sbadiger * Create RollingUpgrade Context Signed-off-by: sbadiger * rollingupgrade context Signed-off-by: sbadiger * resolve compile errors due to merge conflict Signed-off-by: sbadiger Co-authored-by: Sahil Badla Signed-off-by: sbadiger * upgrade-manager-v2: Move DrainManager back to Reconciler (#236) * #2285: rollup CR statistic metrics in v2 (#218) * #2285: rollup CR statistic metrics in v2 Signed-off-by: sbadla1 * #2285: updated metric flags Signed-off-by: sbadla1 * #2285: updated metric flags Signed-off-by: sbadla1 Signed-off-by: sbadiger * log cloud discovery failure Signed-off-by: sbadiger * Create RollingUpgrade Context Signed-off-by: sbadiger * rollingupgrade context Signed-off-by: sbadiger * #2285: rollup CR statistic metrics in v2 (#218) * #2285: rollup CR statistic metrics in v2 Signed-off-by: sbadla1 * #2285: updated metric flags Signed-off-by: sbadla1 * #2285: updated metric flags Signed-off-by: sbadla1 Signed-off-by: sbadiger * #2285: renamed some methods related to metrics (#224) Signed-off-by: sbadla1 Signed-off-by: sbadiger * #2286: removed version from metric namespace (#227) Signed-off-by: sbadla1 Signed-off-by: sbadiger * resolve compile errors due to merge conflict Signed-off-by: sbadiger * move drain-manager to reconciler Signed-off-by: sbadiger * initialize RollingUpgrade object Signed-off-by: sbadiger * use bool instead of count for standby function Signed-off-by: sbadiger * refactor in-progress and standby code Signed-off-by: sbadiger * rename instance standby function Signed-off-by: sbadiger * DrainManager changes in unit test files Signed-off-by: sbadiger Co-authored-by: Sahil Badla Signed-off-by: sbadiger * V2 controller metrics concurrency fix (#231) * Refine the metrics status Signed-off-by: xshao * Refine the metrics status Signed-off-by: xshao * Fix test case error Signed-off-by: xshao * Use group instead of ASG Signed-off-by: xshao * Ignore generated code Signed-off-by: xshao * Ignore generated code Signed-off-by: xshao * Fix the concurrent issue Signed-off-by: xshao * Fix the concurrent issue Signed-off-by: xshao * Move metrics related functions into RollingUpgradeContext Signed-off-by: xshao * Move metrics related functions into RollingUpgradeContext Signed-off-by: xshao * Move metrics related functions into upgrade_metrics.go Signed-off-by: xshao * Move metrics related functions into metrics.go Signed-off-by: xshao Signed-off-by: sbadiger * add missing parenthesis (#239) Signed-off-by: sbadiger * metricsMutex should be initialized (#240) Signed-off-by: xshao Signed-off-by: sbadiger * upgrade-manager-v2: Load test fixes (#245) * upgrade-manager-v2: Move DrainManager back to Reconciler (#236) * #2285: rollup CR statistic metrics in v2 (#218) * #2285: rollup CR statistic metrics in v2 Signed-off-by: sbadla1 * #2285: updated metric flags Signed-off-by: sbadla1 * #2285: updated metric flags Signed-off-by: sbadla1 Signed-off-by: sbadiger * log cloud discovery failure Signed-off-by: sbadiger * Create RollingUpgrade Context Signed-off-by: sbadiger * rollingupgrade context Signed-off-by: sbadiger * #2285: rollup CR statistic metrics in v2 (#218) * #2285: rollup CR statistic metrics in v2 Signed-off-by: sbadla1 * #2285: updated metric flags Signed-off-by: sbadla1 * #2285: updated metric flags Signed-off-by: sbadla1 Signed-off-by: sbadiger * #2285: renamed some methods related to metrics (#224) Signed-off-by: sbadla1 Signed-off-by: sbadiger * #2286: removed version from metric namespace (#227) Signed-off-by: sbadla1 Signed-off-by: sbadiger * resolve compile errors due to merge conflict Signed-off-by: sbadiger * move drain-manager to reconciler Signed-off-by: sbadiger * initialize RollingUpgrade object Signed-off-by: sbadiger * use bool instead of count for standby function Signed-off-by: sbadiger * refactor in-progress and standby code Signed-off-by: sbadiger * rename instance standby function Signed-off-by: sbadiger * DrainManager changes in unit test files Signed-off-by: sbadiger Co-authored-by: Sahil Badla Signed-off-by: sbadiger * V2 controller metrics concurrency fix (#231) * Refine the metrics status Signed-off-by: xshao * Refine the metrics status Signed-off-by: xshao * Fix test case error Signed-off-by: xshao * Use group instead of ASG Signed-off-by: xshao * Ignore generated code Signed-off-by: xshao * Ignore generated code Signed-off-by: xshao * Fix the concurrent issue Signed-off-by: xshao * Fix the concurrent issue Signed-off-by: xshao * Move metrics related functions into RollingUpgradeContext Signed-off-by: xshao * Move metrics related functions into RollingUpgradeContext Signed-off-by: xshao * Move metrics related functions into upgrade_metrics.go Signed-off-by: xshao * Move metrics related functions into metrics.go Signed-off-by: xshao Signed-off-by: sbadiger * add missing parenthesis Signed-off-by: sbadiger * load test fixes Signed-off-by: sbadiger * handle scaling group not found Signed-off-by: sbadiger * Update upgrade.go Signed-off-by: sbadiger * log one level up * remove double logging Signed-off-by: sbadiger * final push before RC release. (#254) * support IgnoreDrainFailures flag Signed-off-by: sbadiger * add else condition Signed-off-by: sbadiger * set min for maxUnavailable Signed-off-by: sbadiger * calculateMaxUnavailable function Signed-off-by: sbadiger * add a new coloumn (completePercentage) Signed-off-by: sbadiger * disable debug logs by default Signed-off-by: sbadiger * Fix metrics collecting issue (#249) * metricsMutex should be initialized Signed-off-by: xshao * Use InProcessingNode instead of Stringp[] so that it can have the status of steps Signed-off-by: xshao Signed-off-by: sbadiger * Revert "Fix metrics collecting issue (#249)" (#256) This reverts commit f5dd1cb5f76f2b78cb15c53daed14032a2a4c6ec. Signed-off-by: sbadiger * Fix metrics calculation issue (#258) * metricsMutex should be initialized Signed-off-by: xshao * Use InProcessingNode instead of Stringp[] so that it can have the status of steps Signed-off-by: xshao * Make the change backward compatible Signed-off-by: xshao * Make the change backward compatible Signed-off-by: xshao * Add mutex for InProcessingNode deleting Signed-off-by: xshao Signed-off-by: sbadiger * Add a mock for test and update version in Makefile (#262) Signed-off-by: sbadiger * and CR end time (#264) Signed-off-by: sbadiger * upgrade-manager-v2: expose totalProcessing time and other metrics (#265) * and CR end time Signed-off-by: sbadiger * expose totalProcessing time and other metrics Signed-off-by: sbadiger * addressing review comments Signed-off-by: sbadiger * upgrade-manager-v2: remove function duplicate declaration. (#266) * and CR end time Signed-off-by: sbadiger * expose totalProcessing time and other metrics Signed-off-by: sbadiger * addressing review comments Signed-off-by: sbadiger * remove function duplication Signed-off-by: sbadiger * Carry the metrics status in RollingUpgrade CR (#267) * Update metrics status at same time Signed-off-by: xshao * Update metrics status when terminating instance Signed-off-by: xshao * Add terminated step Signed-off-by: xshao * Add terminated step Signed-off-by: xshao * Add terminated step Signed-off-by: xshao Signed-off-by: sbadiger * move cloud discovery after nodeInterval / drainInterval wait (#270) Signed-off-by: sbadiger * upgrade-manager-v2: Add nodeEvents handler instead of a watch handler (#272) * upgrade-manager-v2: remove function duplicate declaration. (#266) * and CR end time Signed-off-by: sbadiger * expose totalProcessing time and other metrics Signed-off-by: sbadiger * addressing review comments Signed-off-by: sbadiger * remove function duplication Signed-off-by: sbadiger * Carry the metrics status in RollingUpgrade CR (#267) * Update metrics status at same time Signed-off-by: xshao * Update metrics status when terminating instance Signed-off-by: xshao * Add terminated step Signed-off-by: xshao * Add terminated step Signed-off-by: xshao * Add terminated step Signed-off-by: xshao Signed-off-by: sbadiger * move cloud discovery after nodeInterval / drainInterval wait Signed-off-by: sbadiger * Add watch event for cluster nodes instead of API calls Signed-off-by: sbadiger * upon node deletion, remove it from syncMap as well Signed-off-by: sbadiger * Add nodeEvents handler instead of watch handler Signed-off-by: sbadiger * Ignore Reconciles on nodeEvents Signed-off-by: sbadiger * Add comments Signed-off-by: sbadiger Co-authored-by: Sheldon Shao Signed-off-by: sbadiger * upgrade-manager-v2: Process next batch while waiting on nodeInterval period. (#273) * upgrade-manager-v2: remove function duplicate declaration. (#266) * and CR end time Signed-off-by: sbadiger * expose totalProcessing time and other metrics Signed-off-by: sbadiger * addressing review comments Signed-off-by: sbadiger * remove function duplication Signed-off-by: sbadiger * Carry the metrics status in RollingUpgrade CR (#267) * Update metrics status at same time Signed-off-by: xshao * Update metrics status when terminating instance Signed-off-by: xshao * Add terminated step Signed-off-by: xshao * Add terminated step Signed-off-by: xshao * Add terminated step Signed-off-by: xshao Signed-off-by: sbadiger * move cloud discovery after nodeInterval / drainInterval wait Signed-off-by: sbadiger * Add watch event for cluster nodes instead of API calls Signed-off-by: sbadiger * upon node deletion, remove it from syncMap as well Signed-off-by: sbadiger * Add nodeEvents handler instead of watch handler Signed-off-by: sbadiger * Ignore Reconciles on nodeEvents Signed-off-by: sbadiger * Add comments Signed-off-by: sbadiger * Set nextbatch to standBy while waiting for terminate * Avoid parallel reconcile operation per ASG * add default requeue time Co-authored-by: Sheldon Shao Signed-off-by: sbadiger * fix unit tests Signed-off-by: sbadiger Co-authored-by: Eytan Avisror Co-authored-by: Alfredo Garo <44888596+garomonegro@users.noreply.github.com> Co-authored-by: Eytan Avisror Co-authored-by: Shri Javadekar Co-authored-by: Shri Javadekar Co-authored-by: Shri Javadekar Co-authored-by: Craig Robson Co-authored-by: Kevin Downey Co-authored-by: Oleg Atamanenko Co-authored-by: Shreyas Badiger <7680410+hard-fault@users.noreply.github.com> Co-authored-by: Adam Malcontenti-Wilson Co-authored-by: Adam Malcontenti-Wilson Co-authored-by: Sheldon Shao Co-authored-by: Sahil Badla Co-authored-by: Sheldon Shao Signed-off-by: sbadiger * add ci.yaml file Signed-off-by: sbadiger * test commit to trigger ci build Signed-off-by: sbadiger * move ci.yaml inside workflows Signed-off-by: sbadiger * delete ci.yaml file from previous place Signed-off-by: sbadiger * address lint issues Signed-off-by: sbadiger * Update ci.yaml Signed-off-by: sbadiger * generate coverage.txt file Signed-off-by: sbadiger * fix golang lint errors Signed-off-by: sbadiger * Delete delete-me.file * generate coverage.txt file Signed-off-by: sbadiger Co-authored-by: Eytan Avisror Co-authored-by: Alfredo Garo <44888596+garomonegro@users.noreply.github.com> Co-authored-by: Eytan Avisror Co-authored-by: Shri Javadekar Co-authored-by: Shri Javadekar Co-authored-by: Shri Javadekar Co-authored-by: Craig Robson Co-authored-by: Kevin Downey Co-authored-by: Oleg Atamanenko Co-authored-by: Shreyas Badiger <7680410+hard-fault@users.noreply.github.com> Co-authored-by: Adam Malcontenti-Wilson Co-authored-by: Adam Malcontenti-Wilson Co-authored-by: Sheldon Shao Co-authored-by: Sahil Badla Co-authored-by: Sheldon Shao --- .github/workflows/ci.yaml | 82 +++++++++++++++++++++++ Makefile | 3 +- controllers/helpers_test.go | 3 - controllers/providers/kubernetes/utils.go | 10 ++- controllers/rollingupgrade_controller.go | 6 +- 5 files changed, 91 insertions(+), 13 deletions(-) create mode 100644 .github/workflows/ci.yaml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 00000000..f88c3ecf --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,82 @@ +name: Build-Test + +on: + push: + branches: + - controller-v2 + pull_request: + branches: + - controller-v2 + release: + types: + - published + +jobs: + build: + name: CI # Lint, Test, Codecov, Docker build & Push + runs-on: ubuntu-latest + steps: + + - name: Checkout code + uses: actions/checkout@v2 + + - name: Golangci-lint + uses: golangci/golangci-lint-action@v2 + with: + # Required: the version of golangci-lint is required and must be specified without patch version: we always use the latest patch version. + version: v1.32 + args: --timeout 2m + + - name: Get kubebuilder + env: + version: 1.0.8 # latest stable version + arch: amd64 + run: | + # download the release + curl -L -O "https://github.com/kubernetes-sigs/kubebuilder/releases/download/v${version}/kubebuilder_${version}_linux_${arch}.tar.gz" + # extract the archive + tar -zxvf kubebuilder_${version}_linux_${arch}.tar.gz + mv kubebuilder_${version}_linux_${arch} kubebuilder && sudo mv kubebuilder /usr/local/ + # update your PATH to include /usr/local/kubebuilder/bin + export PATH=$PATH:/usr/local/kubebuilder/bin + - name: Run Tests + run: make test + + - name: Codecov + uses: codecov/codecov-action@v1 + with: + file: ./coverage.txt # optional + flags: unittests # optional + name: codecov-umbrella # optional + fail_ci_if_error: true # optional (default = false) + + - name: Docker build + if: github.event_name == 'pull_request' || (github.repository != 'keikoproj/upgrade-manager' && github.event_name == 'push') + run: make docker-build + + - name: Build and push Docker image with tag controller-v2 # only on pushes to keikoproj/upgrade-manager + if: github.event_name == 'push' && github.repository == 'keikoproj/upgrade-manager' + uses: docker/build-push-action@v1 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + repository: keikoproj/rolling-upgrade-controller + tags: controller-v2 + + - name: Build and push Docker image with tag latest # only on releases of keikoproj/upgrade-manager + if: github.event_name == 'release' && github.repository == 'keikoproj/upgrade-manager' + uses: docker/build-push-action@v1 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + repository: keikoproj/rolling-upgrade-controller + tags: latest + + - name: Build and push Docker image with tag git-tag # only on releases of keikoproj/upgrade-manager + if: github.event_name == 'release' && github.repository == 'keikoproj/upgrade-manager' + uses: docker/build-push-action@v1 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + repository: keikoproj/rolling-upgrade-controller + tag_with_ref: true diff --git a/Makefile b/Makefile index 72bcec3d..628d8c7d 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,8 @@ ENVTEST_ASSETS_DIR=$(shell pwd)/testbin test: generate fmt vet manifests mkdir -p ${ENVTEST_ASSETS_DIR} test -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/v0.7.0/hack/setup-envtest.sh - source ${ENVTEST_ASSETS_DIR}/setup-envtest.sh; fetch_envtest_tools $(ENVTEST_ASSETS_DIR); setup_envtest_env $(ENVTEST_ASSETS_DIR); go test ./... -coverprofile cover.out + source ${ENVTEST_ASSETS_DIR}/setup-envtest.sh; fetch_envtest_tools $(ENVTEST_ASSETS_DIR); setup_envtest_env $(ENVTEST_ASSETS_DIR); go test ./... -coverprofile coverage.txt + go tool cover -html=./coverage.txt -o cover.html # Build manager binary manager: generate fmt vet diff --git a/controllers/helpers_test.go b/controllers/helpers_test.go index 99ba610d..b975a7e7 100644 --- a/controllers/helpers_test.go +++ b/controllers/helpers_test.go @@ -158,13 +158,10 @@ type MockAutoscalingGroup struct { } type launchTemplateInfo struct { - data *ec2.ResponseLaunchTemplateData name *string } type MockEC2 struct { ec2iface.EC2API - awsErr awserr.Error - reservations []*ec2.Reservation LaunchTemplates map[string]*launchTemplateInfo } diff --git a/controllers/providers/kubernetes/utils.go b/controllers/providers/kubernetes/utils.go index 2e557416..abeddd92 100644 --- a/controllers/providers/kubernetes/utils.go +++ b/controllers/providers/kubernetes/utils.go @@ -87,12 +87,10 @@ func GetKubernetesLocalConfig() (*rest.Config, error) { } func SelectNodeByInstanceID(instanceID string, nodes []*corev1.Node) *corev1.Node { - if nodes != nil { - for _, node := range nodes { - nodeID := GetNodeInstanceID(node) - if strings.EqualFold(instanceID, nodeID) { - return node - } + for _, node := range nodes { + nodeID := GetNodeInstanceID(node) + if strings.EqualFold(instanceID, nodeID) { + return node } } return nil diff --git a/controllers/rollingupgrade_controller.go b/controllers/rollingupgrade_controller.go index 37d196ef..b57dbd39 100644 --- a/controllers/rollingupgrade_controller.go +++ b/controllers/rollingupgrade_controller.go @@ -78,7 +78,7 @@ func (r *RollingUpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Reque err := r.Get(ctx, req.NamespacedName, rollingUpgrade) if err != nil { if kerrors.IsNotFound(err) { - r.AdmissionMap.Delete(fmt.Sprintf("%s", req.NamespacedName)) + r.AdmissionMap.Delete(req.NamespacedName.String()) r.Info("rolling upgrade resource not found, deleted object from admission map", "name", req.NamespacedName) return ctrl.Result{}, nil } @@ -113,7 +113,7 @@ func (r *RollingUpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Reque ) // at any given point in time, there should be only one reconcile operation running per ASG - if _, present := r.ReconcileMap.LoadOrStore(rollingUpgrade.NamespacedName(), scalingGroupName); present == true { + if _, present := r.ReconcileMap.LoadOrStore(rollingUpgrade.NamespacedName(), scalingGroupName); present { r.Info("a reconcile operation is already in progress for this ASG, requeuing", "scalingGroup", scalingGroupName, "name", rollingUpgrade.NamespacedName()) return ctrl.Result{RequeueAfter: v1alpha1.DefaultRequeueTime}, nil } @@ -136,7 +136,7 @@ func (r *RollingUpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Reque } // store the rolling upgrade in admission map - if _, present := r.AdmissionMap.LoadOrStore(rollingUpgrade.NamespacedName(), scalingGroupName); present == false { + if _, present := r.AdmissionMap.LoadOrStore(rollingUpgrade.NamespacedName(), scalingGroupName); !present { r.Info("admitted new rolling upgrade", "scalingGroup", scalingGroupName, "update strategy", rollingUpgrade.Spec.Strategy, "name", rollingUpgrade.NamespacedName()) r.CacheConfig.FlushCache("autoscaling") } else {