Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/add failure reason when run fails #1711

Merged
merged 2 commits into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/next_deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ on:
push:
branches:
- develop # change to main if needed
- feat/drift-runs-improvement-name
- feat/add-failure-reason-when-run-fails
jobs:
deploy:
name: Deploy app
Expand Down
1 change: 1 addition & 0 deletions next/model/digger_runs.gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion next/models_generated/digger_runs.gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

109 changes: 106 additions & 3 deletions next/services/runs.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,24 @@ func RunQueuesStateMachine(queueItem *model.DiggerRunQueueItem, service ci.PullR
runName, err := GetRunNameFromJob(*planJob)
if err != nil {
log.Printf("could not get run name: %v", err)
dr.Status = string(dbmodels.RunFailed)
dr.FailureReason = "Could not load run name"
err = dbmodels.DB.UpdateDiggerRun(dr)
if err != nil {
log.Printf("Error: could not update digger status to failed: %v", err)
}
return fmt.Errorf("could not get run name: %v", err)
}

err = RefreshVariableSpecForJob(planJob)
if err != nil {
log.Printf("could not get variable spec from job: %v", err)
dr.Status = string(dbmodels.RunFailed)
dr.FailureReason = "Could not load variables"
err = dbmodels.DB.UpdateDiggerRun(dr)
if err != nil {
log.Printf("Error: could not update digger status to failed: %v", err)
}
return fmt.Errorf("could not get variable spec from job: %v", err)
}

Expand All @@ -53,28 +65,64 @@ func RunQueuesStateMachine(queueItem *model.DiggerRunQueueItem, service ci.PullR
err = RefreshVariableSpecForJob(applyJob)
if err != nil {
log.Printf("could not get variable spec from job: %v", err)
dr.Status = string(dbmodels.RunFailed)
dr.FailureReason = "could not load variables"
err = dbmodels.DB.UpdateDiggerRun(dr)
if err != nil {
log.Printf("Error: could not update digger status to failed: %v", err)
}
return fmt.Errorf("could not get variable spec from job: %v", err)
}

spec, err := GetSpecFromJob(*planJob)
if err != nil {
log.Printf("could not get spec: %v", err)
dr.Status = string(dbmodels.RunFailed)
dr.FailureReason = "could not prepare job spec for triggering"
err = dbmodels.DB.UpdateDiggerRun(dr)
if err != nil {
log.Printf("Error: could not update digger status to failed: %v", err)
}
return fmt.Errorf("could not get spec: %v", err)
}

vcsToken, err := GetVCSTokenFromJob(*planJob, gh)
if err != nil {
log.Printf("could not get vcs token: %v", err)
dr.Status = string(dbmodels.RunFailed)
dr.FailureReason = "could not fetch VCS token (hint: is your app installed for repo?)"
err = dbmodels.DB.UpdateDiggerRun(dr)
if err != nil {
log.Printf("Error: could not update digger status to failed: %v", err)
}

return fmt.Errorf("could not get vcs token: %v", err)
}

err = dbmodels.DB.RefreshDiggerJobTokenExpiry(planJob)
if err != nil {
log.Printf("could not refresh job token expiry: %v", err)
dr.Status = string(dbmodels.RunFailed)
dr.FailureReason = "could not refresh digger token (likely an internal error)"
err = dbmodels.DB.UpdateDiggerRun(dr)
if err != nil {
log.Printf("Error: could not update digger status to failed: %v", err)
}
return fmt.Errorf("could not refresh job token from expiry: %v", err)
}

ciBackend.TriggerWorkflow(*spec, *runName, *vcsToken)
err = ciBackend.TriggerWorkflow(*spec, *runName, *vcsToken)
if err != nil {
log.Printf("ERROR: Failed to trigger for Digger Run queueID: %v [%v %v]", queueItem.ID, queueItem.DiggerRunID, dr.ProjectName)
dr.Status = string(dbmodels.RunFailed)
dr.FailureReason = fmt.Sprintf("could not trigger workflow, internal error: %v", err)
err = dbmodels.DB.UpdateDiggerRun(dr)
if err != nil {
log.Printf("Error: could not update digger status to failed: %v", err)
}

return fmt.Errorf("ERROR: Failed to trigger for Digger Run queueID: %v [%v %v]", queueItem.ID, queueItem.DiggerRunID, dr.ProjectName)
}

// change status to RunPendingPlan
log.Printf("Updating run queueItem item to planning state")
Expand All @@ -89,6 +137,12 @@ func RunQueuesStateMachine(queueItem *model.DiggerRunQueueItem, service ci.PullR
batch, err := dbmodels.DB.GetDiggerBatch(planStage.BatchID)
if err != nil {
log.Printf("could not get plan batch: %v", err)
dr.Status = string(dbmodels.RunFailed)
dr.FailureReason = "could not find digger batch"
err = dbmodels.DB.UpdateDiggerRun(dr)
if err != nil {
log.Printf("Error: could not update digger status to failed: %v", err)
}
return fmt.Errorf("could not get plan batch: %v", err)
}
batchStatus := batch.Status
Expand All @@ -97,6 +151,7 @@ func RunQueuesStateMachine(queueItem *model.DiggerRunQueueItem, service ci.PullR
// if failed then go straight to failed
if batchStatus == int16(orchestrator_scheduler.BatchJobFailed) {
dr.Status = string(dbmodels.RunFailed)
dr.FailureReason = "The job failed to run, please check action logs for more details"
err := dbmodels.DB.UpdateDiggerRun(dr)
if err != nil {
log.Printf("ERROR: Failed to update Digger Run for queueID: %v [%v %v]", queueItem.ID, queueItem.DiggerRunID, dr.ProjectName)
Expand Down Expand Up @@ -136,34 +191,75 @@ func RunQueuesStateMachine(queueItem *model.DiggerRunQueueItem, service ci.PullR
client := service.(*github.GithubService).Client
ciBackend := ci_backends.GithubActionCi{Client: client}
if err != nil {
log.Printf("could not get run name: %v", err)
log.Printf("could not get job: %v", err)
dr.Status = string(dbmodels.RunFailed)
dr.FailureReason = "could not get job from run stage"
err = dbmodels.DB.UpdateDiggerRun(dr)
if err != nil {
log.Printf("Error: could not update digger status to failed: %v", err)
}
return fmt.Errorf("could not get run name: %v", err)
}
runName, err := GetRunNameFromJob(*job)
if err != nil {
log.Printf("could not get run name: %v", err)
dr.Status = string(dbmodels.RunFailed)
dr.FailureReason = "could not get run name"
err = dbmodels.DB.UpdateDiggerRun(dr)
if err != nil {
log.Printf("Error: could not update digger status to failed: %v", err)
}
return fmt.Errorf("could not get run name: %v", err)
}

spec, err := GetSpecFromJob(*job)
if err != nil {
log.Printf("could not get spec: %v", err)
dr.Status = string(dbmodels.RunFailed)
dr.FailureReason = "could get spec from job"
err = dbmodels.DB.UpdateDiggerRun(dr)
if err != nil {
log.Printf("Error: could not update digger status to failed: %v", err)
}
return fmt.Errorf("could not get spec: %v", err)
}

vcsToken, err := GetVCSTokenFromJob(*job, gh)
if err != nil {
log.Printf("could not get vcs token: %v", err)
dr.Status = string(dbmodels.RunFailed)
dr.FailureReason = "could not fetch vcs token (hint: is the app still installed?)"
err = dbmodels.DB.UpdateDiggerRun(dr)
if err != nil {
log.Printf("Error: could not update digger status to failed: %v", err)
}

return fmt.Errorf("could not get spec: %v", err)
}

err = dbmodels.DB.RefreshDiggerJobTokenExpiry(job)
if err != nil {
log.Printf("could not refresh job token expiry: %v", err)
dr.Status = string(dbmodels.RunFailed)
dr.FailureReason = "could not refresh expiry token"
err = dbmodels.DB.UpdateDiggerRun(dr)
if err != nil {
log.Printf("Error: could not update digger status to failed: %v", err)
}
return fmt.Errorf("could not refresh job token from expiry: %v", err)
}

ciBackend.TriggerWorkflow(*spec, *runName, *vcsToken)
err = ciBackend.TriggerWorkflow(*spec, *runName, *vcsToken)
if err != nil {
log.Printf("could not trigger workflow for apply queueItem: %v [%v %v]", queueItem.ID, queueItem.DiggerRunID, dr.ProjectName)
dr.Status = string(dbmodels.RunFailed)
dr.FailureReason = fmt.Sprintf("could not trigger workflow: %v", err)
err = dbmodels.DB.UpdateDiggerRun(dr)
if err != nil {
log.Printf("Error: could not update digger status to failed: %v", err)
}
return fmt.Errorf("ERROR: failed to trigger workflow: %v", err)
}

dr.Status = string(dbmodels.RunApplying)
err = dbmodels.DB.UpdateDiggerRun(dr)
Expand All @@ -177,13 +273,20 @@ func RunQueuesStateMachine(queueItem *model.DiggerRunQueueItem, service ci.PullR
batch, err := dbmodels.DB.GetDiggerBatch(applyStage.BatchID)
if err != nil {
log.Printf("could not get apply batch: %v", err)
dr.Status = string(dbmodels.RunFailed)
dr.FailureReason = "could not get apply batch"
err = dbmodels.DB.UpdateDiggerRun(dr)
if err != nil {
log.Printf("Error: could not update digger status to failed: %v", err)
}
return fmt.Errorf("could not get apply batch: %v", err)
}
batchStatus := batch.Status

// if failed then go straight to failed
if batchStatus == int16(orchestrator_scheduler.BatchJobFailed) {
dr.Status = string(dbmodels.RunFailed)
dr.FailureReason = "the job failed to run, please refer to action logs for details"
err := dbmodels.DB.UpdateDiggerRun(dr)
if err != nil {
log.Printf("ERROR: Failed to update Digger Run for queueID: %v [%v %v]", queueItem.ID, queueItem.DiggerRunID, dr.ProjectName)
Expand Down
Loading