Skip to content

Commit

Permalink
feat: collectors should log PollData, plugin times, and metadata (#1347)
Browse files Browse the repository at this point in the history
* feat: collectors should log PollData, plugin times, and metadata

fix: ems collector failed to send autosupport
docs: fix spelling in ems docs

* fix: RestPerf should not log "no instances" as error
  • Loading branch information
cgrinds authored Oct 14, 2022
1 parent 2e3a4f4 commit 8621a91
Show file tree
Hide file tree
Showing 14 changed files with 89 additions and 69 deletions.
8 changes: 4 additions & 4 deletions cmd/collectors/ems/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ objects:
```
Even though the EMS mapping shown above references a single file named `ems.yaml`,
there may be mutliple versions of that file across subdirectories named after ONTAP releases.
See [cdot](`https://github.com/NetApp/harvest/tree/main/conf/zapiperf/cdot`) for examples.
there may be multiple versions of that file across subdirectories named after ONTAP releases.
See [cDOT](`https://github.com/NetApp/harvest/tree/main/conf/zapiperf/cdot`) for examples.
At runtime, the EMS collector will select the appropriate object configuration file that most closely matches the targeted ONTAP system.

### EMS Template File
Expand All @@ -85,12 +85,12 @@ The EMS template file should contain the following parameters:

#### Event Parameters

This section defines the list of EMS events you want to collect, which properites to export, what labels to attach, and how to handle bookend pairs.
This section defines the list of EMS events you want to collect, which properties to export, what labels to attach, and how to handle bookend pairs.
The EMS event template parameters are explained below along with an example for reference.

- `name` is the ONTAP EMS event name. (collect ONTAP EMS events with the name of `LUN.offline`)
- `matches` list of name-value pairs used to further filter ONTAP events.
Some EMS events include arguments and these name-value pairs provide a way to filter on those arugments.
Some EMS events include arguments and these name-value pairs provide a way to filter on those arguments.
(Only collect ONTAP EMS events where `volume_name` has the value `abc_vol`)
- `exports` list of EMS event parameters to export. These exported parameters are attached as labels to each matching EMS event.
- labels that are prefixed with `^^` use that parameter to define [instance uniqueness](https://github.com/NetApp/harvest/blob/main/docs/TemplatesAndMetrics.md#harvest-object-template).
Expand Down
9 changes: 7 additions & 2 deletions cmd/collectors/ems/ems.go
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,10 @@ func (e *Ems) PollData() (map[string]*matrix.Matrix, error) {
Int("queried", len(e.eventNames)).
Msg("No EMS events returned")
e.lastFilterTime = toTime
_ = e.Metadata.LazySetValueInt64("api_time", "data", apiD.Microseconds())
_ = e.Metadata.LazySetValueInt64("parse_time", "data", parseD.Microseconds())
_ = e.Metadata.LazySetValueUint64("metrics", "data", 0)
_ = e.Metadata.LazySetValueUint64("instances", "data", 0)
return nil, nil
}

Expand All @@ -475,10 +479,11 @@ func (e *Ems) PollData() (map[string]*matrix.Matrix, error) {
Str("parseTime", parseD.String()).
Msg("Collected")

_ = e.Metadata.LazySetValueInt64("count", "data", int64(instanceCount))
_ = e.Metadata.LazySetValueInt64("api_time", "data", apiD.Microseconds())
_ = e.Metadata.LazySetValueInt64("parse_time", "data", parseD.Microseconds())
_ = e.Metadata.LazySetValueUint64("datapoint_count", "data", count)
_ = e.Metadata.LazySetValueUint64("metrics", "data", count)
_ = e.Metadata.LazySetValueUint64("instances", "data", uint64(instanceCount))

e.AddCollectCount(count)

// update lastFilterTime to current cluster time
Expand Down
4 changes: 2 additions & 2 deletions cmd/collectors/rest/plugins/certificate/certificate.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ func (my *Certificate) Run(data *matrix.Matrix) ([]*matrix.Matrix, error) {

// invoke private vserver cli rest and get admin vserver name
if adminVserver, err = my.GetAdminVserver(); err != nil {
if ontap.IsAPINotFound(err) {
if ontap.IsRestErr(err, ontap.APINotFound) {
my.Logger.Debug().Err(err).Msg("Failed to collect admin SVM")
} else {
my.Logger.Error().Err(err).Msg("Failed to collect admin SVM")
Expand All @@ -82,7 +82,7 @@ func (my *Certificate) Run(data *matrix.Matrix) ([]*matrix.Matrix, error) {

// invoke private ssl cli rest and get the admin SVM's serial number
if adminVserverSerial, err = my.GetSecuritySsl(adminVserver); err != nil {
if ontap.IsAPINotFound(err) {
if ontap.IsRestErr(err, ontap.APINotFound) {
my.Logger.Debug().Err(err).Msg("Failed to collect admin SVM's serial number")
} else {
my.Logger.Error().Msg("Failed to collect admin SVM's serial number")
Expand Down
2 changes: 1 addition & 1 deletion cmd/collectors/rest/plugins/svm/svm.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func (my *SVM) Run(data *matrix.Matrix) ([]*matrix.Matrix, error) {

// invoke nameservice-nsswitch-get-iter zapi and get nsswitch info
if my.nsswitchInfo, err = my.GetNSSwitchInfo(data); err != nil {
if errs.IsAPINotFound(err) {
if errs.IsRestErr(err, errs.APINotFound) {
my.Logger.Debug().Err(err).Msg("Failed to collect nsswitch info")
} else {
my.Logger.Warn().Err(err).Msg("Failed to collect nsswitch info")
Expand Down
2 changes: 1 addition & 1 deletion cmd/collectors/rest/plugins/volume/volume.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func (my *Volume) Run(data *matrix.Matrix) ([]*matrix.Matrix, error) {

// invoke disk rest and populate info in aggrsMap
if disks, err := my.getEncryptedDisks(); err != nil {
if errs.IsAPINotFound(err) {
if errs.IsRestErr(err, errs.APINotFound) {
my.Logger.Debug().Err(err).Msg("Failed to collect disk data")
} else {
my.Logger.Error().Err(err).Msg("Failed to collect disk data")
Expand Down
17 changes: 5 additions & 12 deletions cmd/collectors/rest/rest.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,17 +306,10 @@ func (r *Rest) PollData() (map[string]*matrix.Matrix, error) {

numRecords := len(r.Matrix[r.Object].GetInstances())

r.Logger.Info().
Int("instances", numRecords).
Uint64("metrics", count).
Str("apiD", apiD.Round(time.Millisecond).String()).
Str("parseD", parseD.Round(time.Millisecond).String()).
Msg("Collected")

_ = r.Metadata.LazySetValueInt64("count", "data", int64(numRecords))
_ = r.Metadata.LazySetValueInt64("api_time", "data", apiD.Microseconds())
_ = r.Metadata.LazySetValueInt64("parse_time", "data", parseD.Microseconds())
_ = r.Metadata.LazySetValueUint64("datapoint_count", "data", count)
_ = r.Metadata.LazySetValueUint64("metrics", "data", count)
_ = r.Metadata.LazySetValueUint64("instances", "data", uint64(numRecords))
r.AddCollectCount(count)

return r.Matrix, nil
Expand Down Expand Up @@ -566,7 +559,7 @@ func (r *Rest) CollectAutoSupport(p *collector.Payload) {
ClientTimeout: r.Client.Timeout.String(),
})

if (r.Name == "Rest" && (r.Object == "Volume" || r.Object == "Node")) || r.Name == "ems" {
if (r.Name == "Rest" && (r.Object == "Volume" || r.Object == "Node")) || r.Name == "Ems" {
version := r.Client.Cluster().Version
p.Target.Version = strconv.Itoa(version[0]) + "." + strconv.Itoa(version[1]) + "." + strconv.Itoa(version[2])
p.Target.Model = "cdot"
Expand All @@ -577,8 +570,8 @@ func (r *Rest) CollectAutoSupport(p *collector.Payload) {

md := r.GetMetadata()
info := collector.InstanceInfo{
Count: md.LazyValueInt64("count", "data"),
DataPoints: md.LazyValueInt64("datapoint_count", "data"),
Count: md.LazyValueInt64("instances", "data"),
DataPoints: md.LazyValueInt64("metrics", "data"),
PollTime: md.LazyValueInt64("poll_time", "data"),
APITime: md.LazyValueInt64("api_time", "data"),
ParseTime: md.LazyValueInt64("parse_time", "data"),
Expand Down
1 change: 0 additions & 1 deletion cmd/collectors/rest/templating.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ func (r *Rest) InitCache() error {
}

r.ParseRestCounters(counters, r.Prop)
_, _ = r.Metadata.NewMetricUint64("datapoint_count")

r.Logger.Debug().
Strs("extracted Instance Keys", r.Prop.InstanceKeys).
Expand Down
33 changes: 19 additions & 14 deletions cmd/collectors/restperf/restperf.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,9 @@ func (r *RestPerf) InitMatrix() error {
mat.SetGlobalLabel(l.GetNameS(), l.GetContentS())
}
}

// Add metadata metric for skips
_, _ = r.Metadata.NewMetricUint64("skips")
return nil
}

Expand Down Expand Up @@ -198,8 +201,7 @@ func (r *RestPerf) PollCounter() (map[string]*matrix.Matrix, error) {

records, err = rest.Fetch(r.Client, href)
if err != nil {
r.Logger.Error().Err(err).Str("href", href).Msg("Failed to fetch data")
return nil, err
return r.handleError(err, href)
}

firstRecord := records[0]
Expand Down Expand Up @@ -720,7 +722,8 @@ func (r *RestPerf) PollData() (map[string]*matrix.Matrix, error) {

_ = r.Metadata.LazySetValueInt64("api_time", "data", apiD.Microseconds())
_ = r.Metadata.LazySetValueInt64("parse_time", "data", parseD.Microseconds())
_ = r.Metadata.LazySetValueUint64("count", "data", count)
_ = r.Metadata.LazySetValueUint64("metrics", "data", count)
_ = r.Metadata.LazySetValueUint64("instances", "data", numRecords)
r.AddCollectCount(count)

// skip calculating from delta if no data from previous poll
Expand Down Expand Up @@ -899,16 +902,9 @@ func (r *RestPerf) PollData() (map[string]*matrix.Matrix, error) {
}

calcD := time.Since(calcStart)
_ = r.Metadata.LazySetValueUint64("instances", "data", uint64(len(newData.GetInstances())))
_ = r.Metadata.LazySetValueInt64("calc_time", "data", calcD.Microseconds())

r.Logger.Info().
Int("instances", len(newData.GetInstances())).
Uint64("metrics", count).
Str("apiD", apiD.Round(time.Millisecond).String()).
Str("parseD", parseD.Round(time.Millisecond).String()).
Str("calcD", calcD.Round(time.Millisecond).String()).
Int("skips", totalSkips).
Msg("Collected")
_ = r.Metadata.LazySetValueUint64("skips", "data", uint64(totalSkips))

// store cache for next poll
r.Matrix[r.Object] = cachedData
Expand Down Expand Up @@ -1077,8 +1073,7 @@ func (r *RestPerf) PollInstance() (map[string]*matrix.Matrix, error) {

records, err = rest.Fetch(r.Client, href)
if err != nil {
r.Logger.Error().Err(err).Str("href", href).Msg("Failed to fetch data")
return nil, err
return r.handleError(err, href)
}

if len(records) == 0 {
Expand Down Expand Up @@ -1153,6 +1148,16 @@ func (r *RestPerf) PollInstance() (map[string]*matrix.Matrix, error) {
return nil, err
}

func (r *RestPerf) handleError(err error, href string) (map[string]*matrix.Matrix, error) {
if errs.IsRestErr(err, errs.TableNotFound) {
// the table does not exist, log as info and return no instances so the task goes to stand-by
r.Logger.Info().Str("href", href).Msg(err.Error())
return nil, errs.New(errs.ErrNoInstance, err.Error())
}
r.Logger.Error().Err(err).Str("href", href).Msg("Failed to fetch data")
return nil, err
}

func isWorkloadObject(query string) bool {
_, ok := qosQueries[query]
return ok
Expand Down
16 changes: 4 additions & 12 deletions cmd/collectors/zapi/collector/zapi.go
Original file line number Diff line number Diff line change
Expand Up @@ -367,19 +367,11 @@ func (z *Zapi) PollData() (map[string]*matrix.Matrix, error) {
z.Logger.Debug().Str("key", key).Msg("removed instance")
}

z.Logger.Info().
Int("instances", len(instances)).
Uint64("metrics", count).
Str("apiD", apiT.Round(time.Millisecond).String()).
Str("parseD", parseT.Round(time.Millisecond).String()).
Msg("Collected")

// update metadata
_ = z.Metadata.LazySetValueInt64("api_time", "data", apiT.Microseconds())
_ = z.Metadata.LazySetValueInt64("parse_time", "data", parseT.Microseconds())
_ = z.Metadata.LazySetValueUint64("count", "data", count)
_ = z.Metadata.LazySetValueUint64("count", "instance", uint64(len(instances)))

_ = z.Metadata.LazySetValueUint64("metrics", "data", count)
_ = z.Metadata.LazySetValueUint64("instances", "data", uint64(len(instances)))
z.AddCollectCount(count)

if len(mat.GetInstances()) == 0 {
Expand Down Expand Up @@ -442,8 +434,8 @@ func (z *Zapi) CollectAutoSupport(p *collector.Payload) {

md := z.GetMetadata()
info := collector.InstanceInfo{
Count: md.LazyValueInt64("count", "instance"),
DataPoints: md.LazyValueInt64("count", "data"),
Count: md.LazyValueInt64("instances", "data"),
DataPoints: md.LazyValueInt64("metrics", "data"),
PollTime: md.LazyValueInt64("poll_time", "data"),
APITime: md.LazyValueInt64("api_time", "data"),
ParseTime: md.LazyValueInt64("parse_time", "data"),
Expand Down
17 changes: 7 additions & 10 deletions cmd/collectors/zapiperf/zapiperf.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,10 @@ func (z *ZapiPerf) InitCache() error {
}
z.Matrix[z.Object].Object = z.object
z.Logger.Debug().Msgf("object= %s --> %s", z.Object, z.object)

// Add metadata metric for skips
_, _ = z.Metadata.NewMetricUint64("skips")

return nil
}

Expand Down Expand Up @@ -526,7 +530,8 @@ func (z *ZapiPerf) PollData() (map[string]*matrix.Matrix, error) {
// update metadata
_ = z.Metadata.LazySetValueInt64("api_time", "data", apiT.Microseconds())
_ = z.Metadata.LazySetValueInt64("parse_time", "data", parseT.Microseconds())
_ = z.Metadata.LazySetValueUint64("count", "data", count)
_ = z.Metadata.LazySetValueUint64("metrics", "data", count)
_ = z.Metadata.LazySetValueUint64("instances", "data", uint64(len(instanceKeys)))
z.AddCollectCount(count)

// skip calculating from delta if no data from previous poll
Expand Down Expand Up @@ -668,16 +673,8 @@ func (z *ZapiPerf) PollData() (map[string]*matrix.Matrix, error) {

calcD := time.Since(calcStart)

z.Logger.Info().
Int("instances", len(instanceKeys)).
Uint64("metrics", count).
Str("apiD", apiT.Round(time.Millisecond).String()).
Str("parseD", parseT.Round(time.Millisecond).String()).
Str("calcD", calcD.Round(time.Millisecond).String()).
Int("skips", totalSkips).
Msg("Collected")

_ = z.Metadata.LazySetValueInt64("calc_time", "data", calcD.Microseconds())
_ = z.Metadata.LazySetValueUint64("skips", "data", uint64(totalSkips))

// store cache for next poll
z.Matrix[z.Object] = cachedData
Expand Down
2 changes: 1 addition & 1 deletion cmd/exporters/prometheus/httpd.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ func (p *Prometheus) ServeMetrics(w http.ResponseWriter, r *http.Request) {
// filterMetaTags removes duplicate TYPE/HELP tags in the metrics
// Note: this is a workaround, normally Render() will only add
// one TYPE/HELP for each metric type, however since some metric
// types (e.g. metadata_collector_count) are submitted from multiple
// types (e.g. metadata_collector_metrics) are submitted from multiple
// collectors, we end up with duplicates in the final batch delivered
// over HTTP.
func filterMetaTags(metrics [][]byte) [][]byte {
Expand Down
38 changes: 33 additions & 5 deletions cmd/poller/collector/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@ import (
"github.com/netapp/harvest/v2/pkg/logging"
"golang.org/x/text/cases"
"golang.org/x/text/language"
"math"
"reflect"
"strconv"
"strings"
"sync"
"time"

Expand Down Expand Up @@ -223,7 +225,8 @@ func Init(c Collector) error {
_, _ = md.NewMetricInt64("parse_time")
_, _ = md.NewMetricInt64("calc_time")
_, _ = md.NewMetricInt64("plugin_time")
_, _ = md.NewMetricUint64("count")
_, _ = md.NewMetricUint64("metrics")
_, _ = md.NewMetricUint64("instances")

// add tasks of the collector as metadata instances
for _, task := range s.GetTasks() {
Expand All @@ -233,9 +236,6 @@ func Init(c Collector) error {
instance.SetLabel("interval", strconv.FormatFloat(t, 'f', 4, 32))
}

// Create the metadata instance named "instance" since autosupport relies on that key
_, _ = md.NewInstance("instance")

md.SetExportOptions(matrix.DefaultExportOptions())

c.SetMetadata(md)
Expand Down Expand Up @@ -407,6 +407,9 @@ func (c *AbstractCollector) Start(wg *sync.WaitGroup) {
_ = c.Metadata.LazySetValueInt64("plugin_time", task.Name, pluginTime.Microseconds())
}
}
if task.Name == "data" {
c.logMetadata()
}

// update task metadata
_ = c.Metadata.LazySetValueInt64("poll_time", task.Name, task.GetDuration().Microseconds())
Expand All @@ -425,7 +428,7 @@ func (c *AbstractCollector) Start(wg *sync.WaitGroup) {
}

if err := e.Export(c.Metadata); err != nil {
c.Logger.Warn().Msgf("export metadata to [%s]: %s", e.GetName(), err.Error())
c.Logger.Warn().Err(err).Str("exporter", e.GetName()).Msg("Unable to export metadata")
}

// continue if metadata failed, since it might be specific to metadata
Expand Down Expand Up @@ -454,6 +457,31 @@ func (c *AbstractCollector) Start(wg *sync.WaitGroup) {
}
}

func (c *AbstractCollector) logMetadata() {
metrics := c.Metadata.GetMetrics()
info := c.Logger.Info()
dataInstance := c.Metadata.GetInstance("data")
if dataInstance == nil {
return
}
for _, metric := range metrics {
mName := metric.GetName()
if mName == "poll_time" || mName == "task_time" {
// don't log these since they're covered by other durations
continue
}
value, _, _ := metric.GetValueFloat64(dataInstance)
if strings.HasSuffix(mName, "_time") {
// convert microseconds to milliseconds and names ending with _time into -> *Ms
v := int64(math.Round(value / 1000))
info.Int64(mName[0:len(mName)-5]+"Ms", v)
} else {
info.Int64(mName, int64(value))
}
}
info.Msg("Collected")
}

// GetName returns name of the collector
func (c *AbstractCollector) GetName() string {
return c.Name
Expand Down
2 changes: 1 addition & 1 deletion grafana/dashboards/cmode/harvest_dashboard_metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -3529,7 +3529,7 @@
"pluginVersion": "8.1.8",
"targets": [
{
"expr": "avg by (collector, object) (metadata_collector_count{hostname=~\"$Hostname\",poller=~\"$Poller\",task=\"data\"})",
"expr": "avg by (collector, object) (metadata_collector_metrics{hostname=~\"$Hostname\",poller=~\"$Poller\",task=\"data\"})",
"hide": false,
"interval": "",
"legendFormat": "{{collector}} - {{object}}",
Expand Down
Loading

0 comments on commit 8621a91

Please sign in to comment.