diff --git a/cmd/collectors/ems/README.md b/cmd/collectors/ems/README.md index 2064b3201..b50af1029 100644 --- a/cmd/collectors/ems/README.md +++ b/cmd/collectors/ems/README.md @@ -67,8 +67,8 @@ objects: ``` Even though the EMS mapping shown above references a single file named `ems.yaml`, -there may be mutliple versions of that file across subdirectories named after ONTAP releases. -See [cdot](`https://github.com/NetApp/harvest/tree/main/conf/zapiperf/cdot`) for examples. +there may be multiple versions of that file across subdirectories named after ONTAP releases. +See [cDOT](`https://github.com/NetApp/harvest/tree/main/conf/zapiperf/cdot`) for examples. At runtime, the EMS collector will select the appropriate object configuration file that most closely matches the targeted ONTAP system. ### EMS Template File @@ -85,12 +85,12 @@ The EMS template file should contain the following parameters: #### Event Parameters -This section defines the list of EMS events you want to collect, which properites to export, what labels to attach, and how to handle bookend pairs. +This section defines the list of EMS events you want to collect, which properties to export, what labels to attach, and how to handle bookend pairs. The EMS event template parameters are explained below along with an example for reference. - `name` is the ONTAP EMS event name. (collect ONTAP EMS events with the name of `LUN.offline`) - `matches` list of name-value pairs used to further filter ONTAP events. -Some EMS events include arguments and these name-value pairs provide a way to filter on those arugments. +Some EMS events include arguments and these name-value pairs provide a way to filter on those arguments. (Only collect ONTAP EMS events where `volume_name` has the value `abc_vol`) - `exports` list of EMS event parameters to export. These exported parameters are attached as labels to each matching EMS event. - labels that are prefixed with `^^` use that parameter to define [instance uniqueness](https://github.com/NetApp/harvest/blob/main/docs/TemplatesAndMetrics.md#harvest-object-template). diff --git a/cmd/collectors/ems/ems.go b/cmd/collectors/ems/ems.go index 21e9c6765..101cb375b 100644 --- a/cmd/collectors/ems/ems.go +++ b/cmd/collectors/ems/ems.go @@ -455,6 +455,10 @@ func (e *Ems) PollData() (map[string]*matrix.Matrix, error) { Int("queried", len(e.eventNames)). Msg("No EMS events returned") e.lastFilterTime = toTime + _ = e.Metadata.LazySetValueInt64("api_time", "data", apiD.Microseconds()) + _ = e.Metadata.LazySetValueInt64("parse_time", "data", parseD.Microseconds()) + _ = e.Metadata.LazySetValueUint64("metrics", "data", 0) + _ = e.Metadata.LazySetValueUint64("instances", "data", 0) return nil, nil } @@ -475,10 +479,11 @@ func (e *Ems) PollData() (map[string]*matrix.Matrix, error) { Str("parseTime", parseD.String()). Msg("Collected") - _ = e.Metadata.LazySetValueInt64("count", "data", int64(instanceCount)) _ = e.Metadata.LazySetValueInt64("api_time", "data", apiD.Microseconds()) _ = e.Metadata.LazySetValueInt64("parse_time", "data", parseD.Microseconds()) - _ = e.Metadata.LazySetValueUint64("datapoint_count", "data", count) + _ = e.Metadata.LazySetValueUint64("metrics", "data", count) + _ = e.Metadata.LazySetValueUint64("instances", "data", uint64(instanceCount)) + e.AddCollectCount(count) // update lastFilterTime to current cluster time diff --git a/cmd/collectors/rest/plugins/certificate/certificate.go b/cmd/collectors/rest/plugins/certificate/certificate.go index 1e5ee474e..9199acbf2 100644 --- a/cmd/collectors/rest/plugins/certificate/certificate.go +++ b/cmd/collectors/rest/plugins/certificate/certificate.go @@ -72,7 +72,7 @@ func (my *Certificate) Run(data *matrix.Matrix) ([]*matrix.Matrix, error) { // invoke private vserver cli rest and get admin vserver name if adminVserver, err = my.GetAdminVserver(); err != nil { - if ontap.IsAPINotFound(err) { + if ontap.IsRestErr(err, ontap.APINotFound) { my.Logger.Debug().Err(err).Msg("Failed to collect admin SVM") } else { my.Logger.Error().Err(err).Msg("Failed to collect admin SVM") @@ -82,7 +82,7 @@ func (my *Certificate) Run(data *matrix.Matrix) ([]*matrix.Matrix, error) { // invoke private ssl cli rest and get the admin SVM's serial number if adminVserverSerial, err = my.GetSecuritySsl(adminVserver); err != nil { - if ontap.IsAPINotFound(err) { + if ontap.IsRestErr(err, ontap.APINotFound) { my.Logger.Debug().Err(err).Msg("Failed to collect admin SVM's serial number") } else { my.Logger.Error().Msg("Failed to collect admin SVM's serial number") diff --git a/cmd/collectors/rest/plugins/svm/svm.go b/cmd/collectors/rest/plugins/svm/svm.go index b4335bbea..dc0bf0da2 100644 --- a/cmd/collectors/rest/plugins/svm/svm.go +++ b/cmd/collectors/rest/plugins/svm/svm.go @@ -34,7 +34,7 @@ func (my *SVM) Run(data *matrix.Matrix) ([]*matrix.Matrix, error) { // invoke nameservice-nsswitch-get-iter zapi and get nsswitch info if my.nsswitchInfo, err = my.GetNSSwitchInfo(data); err != nil { - if errs.IsAPINotFound(err) { + if errs.IsRestErr(err, errs.APINotFound) { my.Logger.Debug().Err(err).Msg("Failed to collect nsswitch info") } else { my.Logger.Warn().Err(err).Msg("Failed to collect nsswitch info") diff --git a/cmd/collectors/rest/plugins/volume/volume.go b/cmd/collectors/rest/plugins/volume/volume.go index d27a28451..dfcba03a6 100644 --- a/cmd/collectors/rest/plugins/volume/volume.go +++ b/cmd/collectors/rest/plugins/volume/volume.go @@ -67,7 +67,7 @@ func (my *Volume) Run(data *matrix.Matrix) ([]*matrix.Matrix, error) { // invoke disk rest and populate info in aggrsMap if disks, err := my.getEncryptedDisks(); err != nil { - if errs.IsAPINotFound(err) { + if errs.IsRestErr(err, errs.APINotFound) { my.Logger.Debug().Err(err).Msg("Failed to collect disk data") } else { my.Logger.Error().Err(err).Msg("Failed to collect disk data") diff --git a/cmd/collectors/rest/rest.go b/cmd/collectors/rest/rest.go index 5d2764047..305e38ac2 100644 --- a/cmd/collectors/rest/rest.go +++ b/cmd/collectors/rest/rest.go @@ -306,17 +306,10 @@ func (r *Rest) PollData() (map[string]*matrix.Matrix, error) { numRecords := len(r.Matrix[r.Object].GetInstances()) - r.Logger.Info(). - Int("instances", numRecords). - Uint64("metrics", count). - Str("apiD", apiD.Round(time.Millisecond).String()). - Str("parseD", parseD.Round(time.Millisecond).String()). - Msg("Collected") - - _ = r.Metadata.LazySetValueInt64("count", "data", int64(numRecords)) _ = r.Metadata.LazySetValueInt64("api_time", "data", apiD.Microseconds()) _ = r.Metadata.LazySetValueInt64("parse_time", "data", parseD.Microseconds()) - _ = r.Metadata.LazySetValueUint64("datapoint_count", "data", count) + _ = r.Metadata.LazySetValueUint64("metrics", "data", count) + _ = r.Metadata.LazySetValueUint64("instances", "data", uint64(numRecords)) r.AddCollectCount(count) return r.Matrix, nil @@ -566,7 +559,7 @@ func (r *Rest) CollectAutoSupport(p *collector.Payload) { ClientTimeout: r.Client.Timeout.String(), }) - if (r.Name == "Rest" && (r.Object == "Volume" || r.Object == "Node")) || r.Name == "ems" { + if (r.Name == "Rest" && (r.Object == "Volume" || r.Object == "Node")) || r.Name == "Ems" { version := r.Client.Cluster().Version p.Target.Version = strconv.Itoa(version[0]) + "." + strconv.Itoa(version[1]) + "." + strconv.Itoa(version[2]) p.Target.Model = "cdot" @@ -577,8 +570,8 @@ func (r *Rest) CollectAutoSupport(p *collector.Payload) { md := r.GetMetadata() info := collector.InstanceInfo{ - Count: md.LazyValueInt64("count", "data"), - DataPoints: md.LazyValueInt64("datapoint_count", "data"), + Count: md.LazyValueInt64("instances", "data"), + DataPoints: md.LazyValueInt64("metrics", "data"), PollTime: md.LazyValueInt64("poll_time", "data"), APITime: md.LazyValueInt64("api_time", "data"), ParseTime: md.LazyValueInt64("parse_time", "data"), diff --git a/cmd/collectors/rest/templating.go b/cmd/collectors/rest/templating.go index c84b390b4..e22270c19 100644 --- a/cmd/collectors/rest/templating.go +++ b/cmd/collectors/rest/templating.go @@ -66,7 +66,6 @@ func (r *Rest) InitCache() error { } r.ParseRestCounters(counters, r.Prop) - _, _ = r.Metadata.NewMetricUint64("datapoint_count") r.Logger.Debug(). Strs("extracted Instance Keys", r.Prop.InstanceKeys). diff --git a/cmd/collectors/restperf/restperf.go b/cmd/collectors/restperf/restperf.go index b98e650be..a5c705bb1 100644 --- a/cmd/collectors/restperf/restperf.go +++ b/cmd/collectors/restperf/restperf.go @@ -158,6 +158,9 @@ func (r *RestPerf) InitMatrix() error { mat.SetGlobalLabel(l.GetNameS(), l.GetContentS()) } } + + // Add metadata metric for skips + _, _ = r.Metadata.NewMetricUint64("skips") return nil } @@ -198,8 +201,7 @@ func (r *RestPerf) PollCounter() (map[string]*matrix.Matrix, error) { records, err = rest.Fetch(r.Client, href) if err != nil { - r.Logger.Error().Err(err).Str("href", href).Msg("Failed to fetch data") - return nil, err + return r.handleError(err, href) } firstRecord := records[0] @@ -720,7 +722,8 @@ func (r *RestPerf) PollData() (map[string]*matrix.Matrix, error) { _ = r.Metadata.LazySetValueInt64("api_time", "data", apiD.Microseconds()) _ = r.Metadata.LazySetValueInt64("parse_time", "data", parseD.Microseconds()) - _ = r.Metadata.LazySetValueUint64("count", "data", count) + _ = r.Metadata.LazySetValueUint64("metrics", "data", count) + _ = r.Metadata.LazySetValueUint64("instances", "data", numRecords) r.AddCollectCount(count) // skip calculating from delta if no data from previous poll @@ -899,16 +902,9 @@ func (r *RestPerf) PollData() (map[string]*matrix.Matrix, error) { } calcD := time.Since(calcStart) + _ = r.Metadata.LazySetValueUint64("instances", "data", uint64(len(newData.GetInstances()))) _ = r.Metadata.LazySetValueInt64("calc_time", "data", calcD.Microseconds()) - - r.Logger.Info(). - Int("instances", len(newData.GetInstances())). - Uint64("metrics", count). - Str("apiD", apiD.Round(time.Millisecond).String()). - Str("parseD", parseD.Round(time.Millisecond).String()). - Str("calcD", calcD.Round(time.Millisecond).String()). - Int("skips", totalSkips). - Msg("Collected") + _ = r.Metadata.LazySetValueUint64("skips", "data", uint64(totalSkips)) // store cache for next poll r.Matrix[r.Object] = cachedData @@ -1077,8 +1073,7 @@ func (r *RestPerf) PollInstance() (map[string]*matrix.Matrix, error) { records, err = rest.Fetch(r.Client, href) if err != nil { - r.Logger.Error().Err(err).Str("href", href).Msg("Failed to fetch data") - return nil, err + return r.handleError(err, href) } if len(records) == 0 { @@ -1153,6 +1148,16 @@ func (r *RestPerf) PollInstance() (map[string]*matrix.Matrix, error) { return nil, err } +func (r *RestPerf) handleError(err error, href string) (map[string]*matrix.Matrix, error) { + if errs.IsRestErr(err, errs.TableNotFound) { + // the table does not exist, log as info and return no instances so the task goes to stand-by + r.Logger.Info().Str("href", href).Msg(err.Error()) + return nil, errs.New(errs.ErrNoInstance, err.Error()) + } + r.Logger.Error().Err(err).Str("href", href).Msg("Failed to fetch data") + return nil, err +} + func isWorkloadObject(query string) bool { _, ok := qosQueries[query] return ok diff --git a/cmd/collectors/zapi/collector/zapi.go b/cmd/collectors/zapi/collector/zapi.go index e3362dd57..ef92427cc 100644 --- a/cmd/collectors/zapi/collector/zapi.go +++ b/cmd/collectors/zapi/collector/zapi.go @@ -367,19 +367,11 @@ func (z *Zapi) PollData() (map[string]*matrix.Matrix, error) { z.Logger.Debug().Str("key", key).Msg("removed instance") } - z.Logger.Info(). - Int("instances", len(instances)). - Uint64("metrics", count). - Str("apiD", apiT.Round(time.Millisecond).String()). - Str("parseD", parseT.Round(time.Millisecond).String()). - Msg("Collected") - // update metadata _ = z.Metadata.LazySetValueInt64("api_time", "data", apiT.Microseconds()) _ = z.Metadata.LazySetValueInt64("parse_time", "data", parseT.Microseconds()) - _ = z.Metadata.LazySetValueUint64("count", "data", count) - _ = z.Metadata.LazySetValueUint64("count", "instance", uint64(len(instances))) - + _ = z.Metadata.LazySetValueUint64("metrics", "data", count) + _ = z.Metadata.LazySetValueUint64("instances", "data", uint64(len(instances))) z.AddCollectCount(count) if len(mat.GetInstances()) == 0 { @@ -442,8 +434,8 @@ func (z *Zapi) CollectAutoSupport(p *collector.Payload) { md := z.GetMetadata() info := collector.InstanceInfo{ - Count: md.LazyValueInt64("count", "instance"), - DataPoints: md.LazyValueInt64("count", "data"), + Count: md.LazyValueInt64("instances", "data"), + DataPoints: md.LazyValueInt64("metrics", "data"), PollTime: md.LazyValueInt64("poll_time", "data"), APITime: md.LazyValueInt64("api_time", "data"), ParseTime: md.LazyValueInt64("parse_time", "data"), diff --git a/cmd/collectors/zapiperf/zapiperf.go b/cmd/collectors/zapiperf/zapiperf.go index ee8baf210..d63d3e30b 100644 --- a/cmd/collectors/zapiperf/zapiperf.go +++ b/cmd/collectors/zapiperf/zapiperf.go @@ -140,6 +140,10 @@ func (z *ZapiPerf) InitCache() error { } z.Matrix[z.Object].Object = z.object z.Logger.Debug().Msgf("object= %s --> %s", z.Object, z.object) + + // Add metadata metric for skips + _, _ = z.Metadata.NewMetricUint64("skips") + return nil } @@ -526,7 +530,8 @@ func (z *ZapiPerf) PollData() (map[string]*matrix.Matrix, error) { // update metadata _ = z.Metadata.LazySetValueInt64("api_time", "data", apiT.Microseconds()) _ = z.Metadata.LazySetValueInt64("parse_time", "data", parseT.Microseconds()) - _ = z.Metadata.LazySetValueUint64("count", "data", count) + _ = z.Metadata.LazySetValueUint64("metrics", "data", count) + _ = z.Metadata.LazySetValueUint64("instances", "data", uint64(len(instanceKeys))) z.AddCollectCount(count) // skip calculating from delta if no data from previous poll @@ -668,16 +673,8 @@ func (z *ZapiPerf) PollData() (map[string]*matrix.Matrix, error) { calcD := time.Since(calcStart) - z.Logger.Info(). - Int("instances", len(instanceKeys)). - Uint64("metrics", count). - Str("apiD", apiT.Round(time.Millisecond).String()). - Str("parseD", parseT.Round(time.Millisecond).String()). - Str("calcD", calcD.Round(time.Millisecond).String()). - Int("skips", totalSkips). - Msg("Collected") - _ = z.Metadata.LazySetValueInt64("calc_time", "data", calcD.Microseconds()) + _ = z.Metadata.LazySetValueUint64("skips", "data", uint64(totalSkips)) // store cache for next poll z.Matrix[z.Object] = cachedData diff --git a/cmd/exporters/prometheus/httpd.go b/cmd/exporters/prometheus/httpd.go index 92296794e..bef087c87 100644 --- a/cmd/exporters/prometheus/httpd.go +++ b/cmd/exporters/prometheus/httpd.go @@ -152,7 +152,7 @@ func (p *Prometheus) ServeMetrics(w http.ResponseWriter, r *http.Request) { // filterMetaTags removes duplicate TYPE/HELP tags in the metrics // Note: this is a workaround, normally Render() will only add // one TYPE/HELP for each metric type, however since some metric -// types (e.g. metadata_collector_count) are submitted from multiple +// types (e.g. metadata_collector_metrics) are submitted from multiple // collectors, we end up with duplicates in the final batch delivered // over HTTP. func filterMetaTags(metrics [][]byte) [][]byte { diff --git a/cmd/poller/collector/collector.go b/cmd/poller/collector/collector.go index 64b4a6fba..ec6d7ebc5 100644 --- a/cmd/poller/collector/collector.go +++ b/cmd/poller/collector/collector.go @@ -22,8 +22,10 @@ import ( "github.com/netapp/harvest/v2/pkg/logging" "golang.org/x/text/cases" "golang.org/x/text/language" + "math" "reflect" "strconv" + "strings" "sync" "time" @@ -223,7 +225,8 @@ func Init(c Collector) error { _, _ = md.NewMetricInt64("parse_time") _, _ = md.NewMetricInt64("calc_time") _, _ = md.NewMetricInt64("plugin_time") - _, _ = md.NewMetricUint64("count") + _, _ = md.NewMetricUint64("metrics") + _, _ = md.NewMetricUint64("instances") // add tasks of the collector as metadata instances for _, task := range s.GetTasks() { @@ -233,9 +236,6 @@ func Init(c Collector) error { instance.SetLabel("interval", strconv.FormatFloat(t, 'f', 4, 32)) } - // Create the metadata instance named "instance" since autosupport relies on that key - _, _ = md.NewInstance("instance") - md.SetExportOptions(matrix.DefaultExportOptions()) c.SetMetadata(md) @@ -407,6 +407,9 @@ func (c *AbstractCollector) Start(wg *sync.WaitGroup) { _ = c.Metadata.LazySetValueInt64("plugin_time", task.Name, pluginTime.Microseconds()) } } + if task.Name == "data" { + c.logMetadata() + } // update task metadata _ = c.Metadata.LazySetValueInt64("poll_time", task.Name, task.GetDuration().Microseconds()) @@ -425,7 +428,7 @@ func (c *AbstractCollector) Start(wg *sync.WaitGroup) { } if err := e.Export(c.Metadata); err != nil { - c.Logger.Warn().Msgf("export metadata to [%s]: %s", e.GetName(), err.Error()) + c.Logger.Warn().Err(err).Str("exporter", e.GetName()).Msg("Unable to export metadata") } // continue if metadata failed, since it might be specific to metadata @@ -454,6 +457,31 @@ func (c *AbstractCollector) Start(wg *sync.WaitGroup) { } } +func (c *AbstractCollector) logMetadata() { + metrics := c.Metadata.GetMetrics() + info := c.Logger.Info() + dataInstance := c.Metadata.GetInstance("data") + if dataInstance == nil { + return + } + for _, metric := range metrics { + mName := metric.GetName() + if mName == "poll_time" || mName == "task_time" { + // don't log these since they're covered by other durations + continue + } + value, _, _ := metric.GetValueFloat64(dataInstance) + if strings.HasSuffix(mName, "_time") { + // convert microseconds to milliseconds and names ending with _time into -> *Ms + v := int64(math.Round(value / 1000)) + info.Int64(mName[0:len(mName)-5]+"Ms", v) + } else { + info.Int64(mName, int64(value)) + } + } + info.Msg("Collected") +} + // GetName returns name of the collector func (c *AbstractCollector) GetName() string { return c.Name diff --git a/grafana/dashboards/cmode/harvest_dashboard_metadata.json b/grafana/dashboards/cmode/harvest_dashboard_metadata.json index 51b0ae11c..a959a4695 100644 --- a/grafana/dashboards/cmode/harvest_dashboard_metadata.json +++ b/grafana/dashboards/cmode/harvest_dashboard_metadata.json @@ -3529,7 +3529,7 @@ "pluginVersion": "8.1.8", "targets": [ { - "expr": "avg by (collector, object) (metadata_collector_count{hostname=~\"$Hostname\",poller=~\"$Poller\",task=\"data\"})", + "expr": "avg by (collector, object) (metadata_collector_metrics{hostname=~\"$Hostname\",poller=~\"$Poller\",task=\"data\"})", "hide": false, "interval": "", "legendFormat": "{{collector}} - {{object}}", diff --git a/pkg/errs/ontap.go b/pkg/errs/ontap.go index 1959004b3..45702a22c 100644 --- a/pkg/errs/ontap.go +++ b/pkg/errs/ontap.go @@ -31,13 +31,14 @@ type OntapRestCode struct { } var ( - APINotFound = OntapRestCode{"API not found", 3} + APINotFound = OntapRestCode{"API not found", 3} + TableNotFound = OntapRestCode{"Table is not found", 8585320} ) -func IsAPINotFound(err error) bool { +func IsRestErr(err error, sentinel OntapRestCode) bool { var restErr *RestError if errors.As(err, &restErr) { - if restErr.Code == APINotFound.Code { + if restErr.Code == sentinel.Code { return true } }