From 949269104d86b6d70d1fe07d34db9b3bedd57b0f Mon Sep 17 00:00:00 2001 From: JoannaaKL Date: Mon, 3 Oct 2022 17:50:20 +0200 Subject: [PATCH] Avastancu/joannaakl/service container error log (#2110) * adding support for a service container docker logs * Adding Unit test to ContainerOperationProvider * Adding another test to ContainerOperationProvider * placed the docker logs output in dedicated ##group section * Removed the exception thrown if the service container was not healthy * Removed duplicated logging to the executionContext * Updated the container logs sub-section message * Print service containers only if they were healthy Unhealthy service logs are printed in ContainerHealthCheckLogs called prior to this step. * Removed recently added method to inspect docker logs The method was doing the same thing as the existing DockerLogs method. * Added execution context error This will make a failed health check more visible in the UI without disrupting the execution of the program. * Removing the section 'Waiting for all services to be ready' Since nested subsections are not being displayed properly and we already need one subsection per service error. * Update src/Runner.Worker/Container/DockerCommandManager.cs Co-authored-by: Tingluo Huang * Update src/Test/L0/TestHostContext.cs Co-authored-by: Tingluo Huang * Change the logic for printing Service Containers logs Service container logs will be printed in the 'Start containers' section only if there is an error. Healthy services will have their logs printed in the 'Stop Containers' section. * Removed unused import * Added back section group. * Moved service containers error logs to separate group sections * Removed the test testing the old logic flow. * Remove unnecessary 'IsAnyUnhealthy' flag * Remove printHello() function * Add newline to TestHostContext * Remove unnecessary field 'UnhealthyContainers' * Rename boolean flag indicating service container failure * Refactor healthcheck logic to separate method to enable unit testing. * Remove the default value for bool variable * Update src/Runner.Worker/ContainerOperationProvider.cs Co-authored-by: Tingluo Huang * Update src/Runner.Worker/ContainerOperationProvider.cs Co-authored-by: Tingluo Huang * Rename Healthcheck back to ContainerHealthcheck * Make test sequential * Unextract the container error logs method * remove test asserting thrown exception * Add configure await * Update src/Test/L0/Worker/ContainerOperationProviderL0.cs Co-authored-by: Tingluo Huang * Update src/Test/L0/Worker/ContainerOperationProviderL0.cs Co-authored-by: Tingluo Huang * Update src/Test/L0/Worker/ContainerOperationProviderL0.cs Co-authored-by: Tingluo Huang * Update src/Test/L0/Worker/ContainerOperationProviderL0.cs Co-authored-by: Tingluo Huang * Update src/Test/L0/Worker/ContainerOperationProviderL0.cs Co-authored-by: Tingluo Huang * Add back test asserting exception * Check service exit code if there is no healtcheck configured * Remove unnecessary healthcheck for healthy service container * Revert "Check service exit code if there is no healtcheck configured" This reverts commit fec24e834137afe2dc751a38707d0e399c10f8fb. Co-authored-by: Ava S Co-authored-by: Tingluo Huang --- src/Runner.Worker/Container/ContainerInfo.cs | 2 + .../ContainerOperationProvider.cs | 59 +++++++--- .../L0/Worker/ContainerOperationProviderL0.cs | 108 ++++++++++++++++++ 3 files changed, 150 insertions(+), 19 deletions(-) create mode 100644 src/Test/L0/Worker/ContainerOperationProviderL0.cs diff --git a/src/Runner.Worker/Container/ContainerInfo.cs b/src/Runner.Worker/Container/ContainerInfo.cs index 9c114939e2d..32e55eb3c40 100644 --- a/src/Runner.Worker/Container/ContainerInfo.cs +++ b/src/Runner.Worker/Container/ContainerInfo.cs @@ -92,6 +92,8 @@ public ContainerInfo(IHostContext hostContext, Pipelines.JobContainer container, public bool IsJobContainer { get; set; } public bool IsAlpine { get; set; } + public bool FailedInitialization { get; set; } + public IDictionary ContainerEnvironmentVariables { get diff --git a/src/Runner.Worker/ContainerOperationProvider.cs b/src/Runner.Worker/ContainerOperationProvider.cs index 73472795c5e..2fc59730888 100644 --- a/src/Runner.Worker/ContainerOperationProvider.cs +++ b/src/Runner.Worker/ContainerOperationProvider.cs @@ -98,12 +98,41 @@ public async Task StartContainersAsync(IExecutionContext executionContext, objec await StartContainerAsync(executionContext, container); } + await RunContainersHealthcheck(executionContext, containers); + } + + public async Task RunContainersHealthcheck(IExecutionContext executionContext, List containers) + { executionContext.Output("##[group]Waiting for all services to be ready"); + + var unhealthyContainers = new List(); foreach (var container in containers.Where(c => !c.IsJobContainer)) { - await ContainerHealthcheck(executionContext, container); + var healthcheck = await ContainerHealthcheck(executionContext, container); + + if (!string.Equals(healthcheck, "healthy", StringComparison.OrdinalIgnoreCase)) + { + unhealthyContainers.Add(container); + } + else + { + executionContext.Output($"{container.ContainerNetworkAlias} service is healthy."); + } } executionContext.Output("##[endgroup]"); + + if (unhealthyContainers.Count > 0) + { + foreach (var container in unhealthyContainers) + { + executionContext.Output($"##[group]Service container {container.ContainerNetworkAlias} failed."); + await _dockerManager.DockerLogs(context: executionContext, containerId: container.ContainerId); + executionContext.Error($"Failed to initialize container {container.ContainerImage}"); + container.FailedInitialization = true; + executionContext.Output("##[endgroup]"); + } + throw new InvalidOperationException("One or more containers failed to start."); + } } public async Task StopContainersAsync(IExecutionContext executionContext, object data) @@ -299,16 +328,15 @@ private async Task StopContainerAsync(IExecutionContext executionContext, Contai if (!string.IsNullOrEmpty(container.ContainerId)) { - if (!container.IsJobContainer) + if (!container.IsJobContainer && !container.FailedInitialization) { - // Print logs for service container jobs (not the "action" job itself b/c that's already logged). - executionContext.Output($"Print service container logs: {container.ContainerDisplayName}"); + executionContext.Output($"Print service container logs: {container.ContainerDisplayName}"); - int logsExitCode = await _dockerManager.DockerLogs(executionContext, container.ContainerId); - if (logsExitCode != 0) - { - executionContext.Warning($"Docker logs fail with exit code {logsExitCode}"); - } + int logsExitCode = await _dockerManager.DockerLogs(executionContext, container.ContainerId); + if (logsExitCode != 0) + { + executionContext.Warning($"Docker logs fail with exit code {logsExitCode}"); + } } executionContext.Output($"Stop and remove container: {container.ContainerDisplayName}"); @@ -395,14 +423,14 @@ private async Task RemoveContainerNetworkAsync(IExecutionContext executionContex } } - private async Task ContainerHealthcheck(IExecutionContext executionContext, ContainerInfo container) + private async Task ContainerHealthcheck(IExecutionContext executionContext, ContainerInfo container) { string healthCheck = "--format=\"{{if .Config.Healthcheck}}{{print .State.Health.Status}}{{end}}\""; string serviceHealth = (await _dockerManager.DockerInspect(context: executionContext, dockerObject: container.ContainerId, options: healthCheck)).FirstOrDefault(); if (string.IsNullOrEmpty(serviceHealth)) { // Container has no HEALTHCHECK - return; + return String.Empty; } var retryCount = 0; while (string.Equals(serviceHealth, "starting", StringComparison.OrdinalIgnoreCase)) @@ -413,14 +441,7 @@ private async Task ContainerHealthcheck(IExecutionContext executionContext, Cont serviceHealth = (await _dockerManager.DockerInspect(context: executionContext, dockerObject: container.ContainerId, options: healthCheck)).FirstOrDefault(); retryCount++; } - if (string.Equals(serviceHealth, "healthy", StringComparison.OrdinalIgnoreCase)) - { - executionContext.Output($"{container.ContainerNetworkAlias} service is healthy."); - } - else - { - throw new InvalidOperationException($"Failed to initialize, {container.ContainerNetworkAlias} service is {serviceHealth}."); - } + return serviceHealth; } private async Task ContainerRegistryLogin(IExecutionContext executionContext, ContainerInfo container) diff --git a/src/Test/L0/Worker/ContainerOperationProviderL0.cs b/src/Test/L0/Worker/ContainerOperationProviderL0.cs new file mode 100644 index 00000000000..bfc09f343ac --- /dev/null +++ b/src/Test/L0/Worker/ContainerOperationProviderL0.cs @@ -0,0 +1,108 @@ +using GitHub.Runner.Worker; +using GitHub.Runner.Worker.Container; +using Xunit; +using Moq; +using GitHub.Runner.Worker.Container.ContainerHooks; +using System.Threading.Tasks; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using GitHub.DistributedTask.WebApi; +using System; + +namespace GitHub.Runner.Common.Tests.Worker +{ + + public sealed class ContainerOperationProviderL0 + { + + private TestHostContext _hc; + private Mock _ec; + private Mock _dockerManager; + private Mock _containerHookManager; + private ContainerOperationProvider containerOperationProvider; + private Mock serverQueue; + private Mock pagingLogger; + private List healthyDockerStatus = new List { "healthy" }; + private List unhealthyDockerStatus = new List { "unhealthy" }; + private List dockerLogs = new List { "log1", "log2", "log3" }; + + List containers = new List(); + + [Fact] + [Trait("Level", "L0")] + [Trait("Category", "Worker")] + public async void RunServiceContainersHealthcheck_UnhealthyServiceContainer_AssertFailedTask() + { + //Arrange + Setup(); + _dockerManager.Setup(x => x.DockerInspect(_ec.Object, It.IsAny(), It.IsAny())).Returns(Task.FromResult(unhealthyDockerStatus)); + + //Act + try + { + await containerOperationProvider.RunContainersHealthcheck(_ec.Object, containers); + } + catch (InvalidOperationException) + { + + //Assert + Assert.Equal(TaskResult.Failed, _ec.Object.Result ?? TaskResult.Failed); + } + } + + [Fact] + [Trait("Level", "L0")] + [Trait("Category", "Worker")] + public async void RunServiceContainersHealthcheck_UnhealthyServiceContainer_AssertExceptionThrown() + { + //Arrange + Setup(); + _dockerManager.Setup(x => x.DockerInspect(_ec.Object, It.IsAny(), It.IsAny())).Returns(Task.FromResult(unhealthyDockerStatus)); + + //Act and Assert + await Assert.ThrowsAsync(() => containerOperationProvider.RunContainersHealthcheck(_ec.Object, containers)); + + } + + [Fact] + [Trait("Level", "L0")] + [Trait("Category", "Worker")] + public async void RunServiceContainersHealthcheck_healthyServiceContainer_AssertSucceededTask() + { + //Arrange + Setup(); + _dockerManager.Setup(x => x.DockerInspect(_ec.Object, It.IsAny(), It.IsAny())).Returns(Task.FromResult(healthyDockerStatus)); + + //Act + await containerOperationProvider.RunContainersHealthcheck(_ec.Object, containers); + + //Assert + Assert.Equal(TaskResult.Succeeded, _ec.Object.Result ?? TaskResult.Succeeded); + + } + + private void Setup([CallerMemberName] string testName = "") + { + containers.Add(new ContainerInfo() { ContainerImage = "ubuntu:16.04" }); + _hc = new TestHostContext(this, testName); + _ec = new Mock(); + serverQueue = new Mock(); + pagingLogger = new Mock(); + + _dockerManager = new Mock(); + _containerHookManager = new Mock(); + containerOperationProvider = new ContainerOperationProvider(); + + _hc.SetSingleton(_dockerManager.Object); + _hc.SetSingleton(serverQueue.Object); + _hc.SetSingleton(pagingLogger.Object); + + _hc.SetSingleton(_dockerManager.Object); + _hc.SetSingleton(_containerHookManager.Object); + + _ec.Setup(x => x.Global).Returns(new GlobalContext()); + + containerOperationProvider.Initialize(_hc); + } + } +} \ No newline at end of file