@@ -728,6 +728,7 @@ func (p *Planner) BuilderLoop(builder protos.PlannerForBuilder_BuilderLoopServer
728
728
return fmt .Errorf ("dequeue() call resulted in nil response. builder: %s" , builderID )
729
729
}
730
730
task := item .(* QueueTask )
731
+ logger := log .With (logger , "task" , task .ID )
731
732
732
733
queueTime := time .Since (task .queueTime )
733
734
p .metrics .queueDuration .Observe (queueTime .Seconds ())
@@ -739,7 +740,8 @@ func (p *Planner) BuilderLoop(builder protos.PlannerForBuilder_BuilderLoopServer
739
740
continue
740
741
}
741
742
742
- if err := p .forwardTaskToBuilder (builder , builderID , task ); err != nil {
743
+ result , err := p .forwardTaskToBuilder (builder , builderID , task )
744
+ if err != nil {
743
745
maxRetries := p .limits .BloomTaskMaxRetries (task .Tenant )
744
746
if maxRetries > 0 && task .timesEnqueued >= maxRetries {
745
747
p .metrics .tasksFailed .Inc ()
@@ -750,6 +752,10 @@ func (p *Planner) BuilderLoop(builder protos.PlannerForBuilder_BuilderLoopServer
750
752
"maxRetries" , maxRetries ,
751
753
"err" , err ,
752
754
)
755
+ task .resultsChannel <- & protos.TaskResult {
756
+ TaskID : task .ID ,
757
+ Error : fmt .Errorf ("task failed after max retries (%d): %w" , maxRetries , err ),
758
+ }
753
759
continue
754
760
}
755
761
@@ -758,12 +764,29 @@ func (p *Planner) BuilderLoop(builder protos.PlannerForBuilder_BuilderLoopServer
758
764
p .metrics .taskLost .Inc ()
759
765
p .removePendingTask (task )
760
766
level .Error (logger ).Log ("msg" , "error re-enqueuing task. this task will be lost" , "err" , err )
767
+ task .resultsChannel <- & protos.TaskResult {
768
+ TaskID : task .ID ,
769
+ Error : fmt .Errorf ("error re-enqueuing task: %w" , err ),
770
+ }
761
771
continue
762
772
}
763
773
764
774
p .metrics .tasksRequeued .Inc ()
765
- level .Error (logger ).Log ("msg" , "error forwarding task to builder, Task requeued" , "err" , err )
775
+ level .Error (logger ).Log (
776
+ "msg" , "error forwarding task to builder, Task requeued" ,
777
+ "retries" , task .timesEnqueued ,
778
+ "err" , err ,
779
+ )
780
+ continue
766
781
}
782
+
783
+ level .Debug (logger ).Log (
784
+ "msg" , "task completed" ,
785
+ "duration" , time .Since (task .queueTime ).Seconds (),
786
+ "retries" , task .timesEnqueued ,
787
+ )
788
+ p .removePendingTask (task )
789
+ task .resultsChannel <- result
767
790
}
768
791
769
792
return errPlannerIsNotRunning
@@ -773,15 +796,13 @@ func (p *Planner) forwardTaskToBuilder(
773
796
builder protos.PlannerForBuilder_BuilderLoopServer ,
774
797
builderID string ,
775
798
task * QueueTask ,
776
- ) error {
777
- defer p .removePendingTask (task )
778
-
799
+ ) (* protos.TaskResult , error ) {
779
800
msg := & protos.PlannerToBuilder {
780
801
Task : task .ToProtoTask (),
781
802
}
782
803
783
804
if err := builder .Send (msg ); err != nil {
784
- return fmt .Errorf ("error sending task to builder (%s): %w" , builderID , err )
805
+ return nil , fmt .Errorf ("error sending task to builder (%s): %w" , builderID , err )
785
806
}
786
807
787
808
// Launch a goroutine to wait for the response from the builder so we can
@@ -811,12 +832,11 @@ func (p *Planner) forwardTaskToBuilder(
811
832
// Note: Errors from the result are not returned here since we don't retry tasks
812
833
// that return with an error. I.e. we won't retry errors forwarded from the builder.
813
834
// TODO(salvacorts): Filter and return errors that can be retried.
814
- task .resultsChannel <- result
815
- return nil
835
+ return result , nil
816
836
case err := <- errCh :
817
- return err
837
+ return nil , err
818
838
case <- timeout :
819
- return fmt .Errorf ("timeout waiting for response from builder (%s)" , builderID )
839
+ return nil , fmt .Errorf ("timeout waiting for response from builder (%s)" , builderID )
820
840
}
821
841
}
822
842
0 commit comments