@@ -74,7 +74,7 @@ Debug Options:
74
74
profiles. Accepts id prefixes.
75
75
76
76
-server-id=<server>,<server>
77
- Comma separated list of Nomad server names, or "leader " to monitor for logs and include pprof
77
+ Comma separated list of Nomad server names, "leader", or "all " to monitor for logs and include pprof
78
78
profiles.
79
79
80
80
-stale=<true|false>
@@ -251,9 +251,25 @@ func (c *OperatorDebugCommand) Run(args []string) int {
251
251
}
252
252
}
253
253
254
- // Resolve server prefixes
255
- for _ , id := range argNodes (serverIDs ) {
256
- c .serverIDs = append (c .serverIDs , id )
254
+ // Resolve servers
255
+ members , err := client .Agent ().Members ()
256
+ c .writeJSON ("version" , "members.json" , members , err )
257
+ // We always write the error to the file, but don't range if no members found
258
+ if serverIDs == "all" && members != nil {
259
+ // Special case to capture from all servers
260
+ for _ , member := range members .Members {
261
+ c .serverIDs = append (c .serverIDs , member .Name )
262
+ }
263
+ } else {
264
+ for _ , id := range argNodes (serverIDs ) {
265
+ c .serverIDs = append (c .serverIDs , id )
266
+ }
267
+ }
268
+
269
+ // Return error if servers were specified but not found
270
+ if len (serverIDs ) > 0 && len (c .serverIDs ) == 0 {
271
+ c .Ui .Error (fmt .Sprintf ("Failed to retrieve servers, 0 members found in list: %s" , serverIDs ))
272
+ return 1
257
273
}
258
274
259
275
c .manifest = make ([]string , 0 )
@@ -267,6 +283,8 @@ func (c *OperatorDebugCommand) Run(args []string) int {
267
283
stamped := "nomad-debug-" + c .timestamp
268
284
269
285
c .Ui .Output ("Starting debugger and capturing cluster data..." )
286
+ c .Ui .Output (fmt .Sprintf ("Capturing from servers: %v" , c .serverIDs ))
287
+ c .Ui .Output (fmt .Sprintf ("Capturing from client nodes: %v" , c .nodeIDs ))
270
288
271
289
c .Ui .Output (fmt .Sprintf (" Interval: '%s'" , interval ))
272
290
c .Ui .Output (fmt .Sprintf (" Duration: '%s'" , duration ))
@@ -499,6 +517,23 @@ func (c *OperatorDebugCommand) collectPprof(path, id string, client *api.Client)
499
517
if err == nil {
500
518
c .writeBytes (path , "goroutine.prof" , bs )
501
519
}
520
+
521
+ // Gather goroutine text output - debug type 1
522
+ // debug type 1 writes the legacy text format for human readable output
523
+ opts .Debug = 1
524
+ bs , err = client .Agent ().Lookup ("goroutine" , opts , nil )
525
+ if err == nil {
526
+ c .writeBytes (path , "goroutine-debug1.txt" , bs )
527
+ }
528
+
529
+ // Gather goroutine text output - debug type 2
530
+ // When printing the "goroutine" profile, debug=2 means to print the goroutine
531
+ // stacks in the same form that a Go program uses when dying due to an unrecovered panic.
532
+ opts .Debug = 2
533
+ bs , err = client .Agent ().Lookup ("goroutine" , opts , nil )
534
+ if err == nil {
535
+ c .writeBytes (path , "goroutine-debug2.txt" , bs )
536
+ }
502
537
}
503
538
504
539
// collectPeriodic runs for duration, capturing the cluster state every interval. It flushes and stops
@@ -576,8 +611,11 @@ func (c *OperatorDebugCommand) collectNomad(dir string, client *api.Client) erro
576
611
vs , _ , err := client .CSIVolumes ().List (qo )
577
612
c .writeJSON (dir , "volumes.json" , vs , err )
578
613
579
- metrics , err := client .Operator ().Metrics (qo )
580
- c .writeJSON (dir , "metrics.json" , metrics , err )
614
+ if metricBytes , err := client .Operator ().Metrics (qo ); err != nil {
615
+ c .writeError (dir , "metrics.json" , err )
616
+ } else {
617
+ c .writeBytes (dir , "metrics.json" , metricBytes )
618
+ }
581
619
582
620
return nil
583
621
}
@@ -628,12 +666,24 @@ func (c *OperatorDebugCommand) collectVault(dir, vault string) error {
628
666
629
667
// writeBytes writes a file to the archive, recording it in the manifest
630
668
func (c * OperatorDebugCommand ) writeBytes (dir , file string , data []byte ) error {
631
- path := filepath .Join (dir , file )
632
- c .manifest = append (c .manifest , path )
633
- path = filepath .Join (c .collectDir , path )
669
+ relativePath := filepath .Join (dir , file )
670
+ c .manifest = append (c .manifest , relativePath )
671
+ dirPath := filepath .Join (c .collectDir , dir )
672
+ filePath := filepath .Join (dirPath , file )
673
+
674
+ // Ensure parent directories exist
675
+ err := os .MkdirAll (dirPath , os .ModePerm )
676
+ if err != nil {
677
+ // Display error immediately -- may not see this if files aren't written
678
+ c .Ui .Error (fmt .Sprintf ("failed to create parent directories of \" %s\" : %s" , dirPath , err .Error ()))
679
+ return err
680
+ }
634
681
635
- fh , err := os .Create (path )
682
+ // Create the file
683
+ fh , err := os .Create (filePath )
636
684
if err != nil {
685
+ // Display error immediately -- may not see this if files aren't written
686
+ c .Ui .Error (fmt .Sprintf ("failed to create file \" %s\" : %s" , filePath , err .Error ()))
637
687
return err
638
688
}
639
689
defer fh .Close ()
0 commit comments