Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: collectors should log PollData, plugin times, and metadata #1347

Merged
merged 2 commits into from
Oct 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions cmd/collectors/ems/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ objects:
```

Even though the EMS mapping shown above references a single file named `ems.yaml`,
there may be mutliple versions of that file across subdirectories named after ONTAP releases.
See [cdot](`https://github.com/NetApp/harvest/tree/main/conf/zapiperf/cdot`) for examples.
there may be multiple versions of that file across subdirectories named after ONTAP releases.
See [cDOT](`https://github.com/NetApp/harvest/tree/main/conf/zapiperf/cdot`) for examples.
At runtime, the EMS collector will select the appropriate object configuration file that most closely matches the targeted ONTAP system.

### EMS Template File
Expand All @@ -85,12 +85,12 @@ The EMS template file should contain the following parameters:

#### Event Parameters

This section defines the list of EMS events you want to collect, which properites to export, what labels to attach, and how to handle bookend pairs.
This section defines the list of EMS events you want to collect, which properties to export, what labels to attach, and how to handle bookend pairs.
The EMS event template parameters are explained below along with an example for reference.

- `name` is the ONTAP EMS event name. (collect ONTAP EMS events with the name of `LUN.offline`)
- `matches` list of name-value pairs used to further filter ONTAP events.
Some EMS events include arguments and these name-value pairs provide a way to filter on those arugments.
Some EMS events include arguments and these name-value pairs provide a way to filter on those arguments.
(Only collect ONTAP EMS events where `volume_name` has the value `abc_vol`)
- `exports` list of EMS event parameters to export. These exported parameters are attached as labels to each matching EMS event.
- labels that are prefixed with `^^` use that parameter to define [instance uniqueness](https://github.com/NetApp/harvest/blob/main/docs/TemplatesAndMetrics.md#harvest-object-template).
Expand Down
9 changes: 7 additions & 2 deletions cmd/collectors/ems/ems.go
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,10 @@ func (e *Ems) PollData() (map[string]*matrix.Matrix, error) {
Int("queried", len(e.eventNames)).
Msg("No EMS events returned")
e.lastFilterTime = toTime
_ = e.Metadata.LazySetValueInt64("api_time", "data", apiD.Microseconds())
_ = e.Metadata.LazySetValueInt64("parse_time", "data", parseD.Microseconds())
_ = e.Metadata.LazySetValueUint64("metrics", "data", 0)
_ = e.Metadata.LazySetValueUint64("instances", "data", 0)
return nil, nil
}

Expand All @@ -475,10 +479,11 @@ func (e *Ems) PollData() (map[string]*matrix.Matrix, error) {
Str("parseTime", parseD.String()).
Msg("Collected")

_ = e.Metadata.LazySetValueInt64("count", "data", int64(instanceCount))
_ = e.Metadata.LazySetValueInt64("api_time", "data", apiD.Microseconds())
_ = e.Metadata.LazySetValueInt64("parse_time", "data", parseD.Microseconds())
_ = e.Metadata.LazySetValueUint64("datapoint_count", "data", count)
_ = e.Metadata.LazySetValueUint64("metrics", "data", count)
_ = e.Metadata.LazySetValueUint64("instances", "data", uint64(instanceCount))

e.AddCollectCount(count)

// update lastFilterTime to current cluster time
Expand Down
4 changes: 2 additions & 2 deletions cmd/collectors/rest/plugins/certificate/certificate.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ func (my *Certificate) Run(data *matrix.Matrix) ([]*matrix.Matrix, error) {

// invoke private vserver cli rest and get admin vserver name
if adminVserver, err = my.GetAdminVserver(); err != nil {
if ontap.IsAPINotFound(err) {
if ontap.IsRestErr(err, ontap.APINotFound) {
my.Logger.Debug().Err(err).Msg("Failed to collect admin SVM")
} else {
my.Logger.Error().Err(err).Msg("Failed to collect admin SVM")
Expand All @@ -82,7 +82,7 @@ func (my *Certificate) Run(data *matrix.Matrix) ([]*matrix.Matrix, error) {

// invoke private ssl cli rest and get the admin SVM's serial number
if adminVserverSerial, err = my.GetSecuritySsl(adminVserver); err != nil {
if ontap.IsAPINotFound(err) {
if ontap.IsRestErr(err, ontap.APINotFound) {
my.Logger.Debug().Err(err).Msg("Failed to collect admin SVM's serial number")
} else {
my.Logger.Error().Msg("Failed to collect admin SVM's serial number")
Expand Down
2 changes: 1 addition & 1 deletion cmd/collectors/rest/plugins/svm/svm.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func (my *SVM) Run(data *matrix.Matrix) ([]*matrix.Matrix, error) {

// invoke nameservice-nsswitch-get-iter zapi and get nsswitch info
if my.nsswitchInfo, err = my.GetNSSwitchInfo(data); err != nil {
if errs.IsAPINotFound(err) {
if errs.IsRestErr(err, errs.APINotFound) {
my.Logger.Debug().Err(err).Msg("Failed to collect nsswitch info")
} else {
my.Logger.Warn().Err(err).Msg("Failed to collect nsswitch info")
Expand Down
2 changes: 1 addition & 1 deletion cmd/collectors/rest/plugins/volume/volume.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func (my *Volume) Run(data *matrix.Matrix) ([]*matrix.Matrix, error) {

// invoke disk rest and populate info in aggrsMap
if disks, err := my.getEncryptedDisks(); err != nil {
if errs.IsAPINotFound(err) {
if errs.IsRestErr(err, errs.APINotFound) {
my.Logger.Debug().Err(err).Msg("Failed to collect disk data")
} else {
my.Logger.Error().Err(err).Msg("Failed to collect disk data")
Expand Down
17 changes: 5 additions & 12 deletions cmd/collectors/rest/rest.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,17 +306,10 @@ func (r *Rest) PollData() (map[string]*matrix.Matrix, error) {

numRecords := len(r.Matrix[r.Object].GetInstances())

r.Logger.Info().
Int("instances", numRecords).
Uint64("metrics", count).
Str("apiD", apiD.Round(time.Millisecond).String()).
Str("parseD", parseD.Round(time.Millisecond).String()).
Msg("Collected")

_ = r.Metadata.LazySetValueInt64("count", "data", int64(numRecords))
_ = r.Metadata.LazySetValueInt64("api_time", "data", apiD.Microseconds())
_ = r.Metadata.LazySetValueInt64("parse_time", "data", parseD.Microseconds())
_ = r.Metadata.LazySetValueUint64("datapoint_count", "data", count)
_ = r.Metadata.LazySetValueUint64("metrics", "data", count)
_ = r.Metadata.LazySetValueUint64("instances", "data", uint64(numRecords))
r.AddCollectCount(count)

return r.Matrix, nil
Expand Down Expand Up @@ -566,7 +559,7 @@ func (r *Rest) CollectAutoSupport(p *collector.Payload) {
ClientTimeout: r.Client.Timeout.String(),
})

if (r.Name == "Rest" && (r.Object == "Volume" || r.Object == "Node")) || r.Name == "ems" {
if (r.Name == "Rest" && (r.Object == "Volume" || r.Object == "Node")) || r.Name == "Ems" {
version := r.Client.Cluster().Version
p.Target.Version = strconv.Itoa(version[0]) + "." + strconv.Itoa(version[1]) + "." + strconv.Itoa(version[2])
p.Target.Model = "cdot"
Expand All @@ -577,8 +570,8 @@ func (r *Rest) CollectAutoSupport(p *collector.Payload) {

md := r.GetMetadata()
info := collector.InstanceInfo{
Count: md.LazyValueInt64("count", "data"),
DataPoints: md.LazyValueInt64("datapoint_count", "data"),
Count: md.LazyValueInt64("instances", "data"),
DataPoints: md.LazyValueInt64("metrics", "data"),
PollTime: md.LazyValueInt64("poll_time", "data"),
APITime: md.LazyValueInt64("api_time", "data"),
ParseTime: md.LazyValueInt64("parse_time", "data"),
Expand Down
1 change: 0 additions & 1 deletion cmd/collectors/rest/templating.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ func (r *Rest) InitCache() error {
}

r.ParseRestCounters(counters, r.Prop)
_, _ = r.Metadata.NewMetricUint64("datapoint_count")

r.Logger.Debug().
Strs("extracted Instance Keys", r.Prop.InstanceKeys).
Expand Down
33 changes: 19 additions & 14 deletions cmd/collectors/restperf/restperf.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,9 @@ func (r *RestPerf) InitMatrix() error {
mat.SetGlobalLabel(l.GetNameS(), l.GetContentS())
}
}

// Add metadata metric for skips
_, _ = r.Metadata.NewMetricUint64("skips")
return nil
}

Expand Down Expand Up @@ -198,8 +201,7 @@ func (r *RestPerf) PollCounter() (map[string]*matrix.Matrix, error) {

records, err = rest.Fetch(r.Client, href)
if err != nil {
r.Logger.Error().Err(err).Str("href", href).Msg("Failed to fetch data")
return nil, err
return r.handleError(err, href)
}

firstRecord := records[0]
Expand Down Expand Up @@ -720,7 +722,8 @@ func (r *RestPerf) PollData() (map[string]*matrix.Matrix, error) {

_ = r.Metadata.LazySetValueInt64("api_time", "data", apiD.Microseconds())
_ = r.Metadata.LazySetValueInt64("parse_time", "data", parseD.Microseconds())
_ = r.Metadata.LazySetValueUint64("count", "data", count)
_ = r.Metadata.LazySetValueUint64("metrics", "data", count)
_ = r.Metadata.LazySetValueUint64("instances", "data", numRecords)
r.AddCollectCount(count)

// skip calculating from delta if no data from previous poll
Expand Down Expand Up @@ -899,16 +902,9 @@ func (r *RestPerf) PollData() (map[string]*matrix.Matrix, error) {
}

calcD := time.Since(calcStart)
_ = r.Metadata.LazySetValueUint64("instances", "data", uint64(len(newData.GetInstances())))
_ = r.Metadata.LazySetValueInt64("calc_time", "data", calcD.Microseconds())

r.Logger.Info().
Int("instances", len(newData.GetInstances())).
Uint64("metrics", count).
Str("apiD", apiD.Round(time.Millisecond).String()).
Str("parseD", parseD.Round(time.Millisecond).String()).
Str("calcD", calcD.Round(time.Millisecond).String()).
Int("skips", totalSkips).
Msg("Collected")
_ = r.Metadata.LazySetValueUint64("skips", "data", uint64(totalSkips))

// store cache for next poll
r.Matrix[r.Object] = cachedData
Expand Down Expand Up @@ -1077,8 +1073,7 @@ func (r *RestPerf) PollInstance() (map[string]*matrix.Matrix, error) {

records, err = rest.Fetch(r.Client, href)
if err != nil {
r.Logger.Error().Err(err).Str("href", href).Msg("Failed to fetch data")
return nil, err
return r.handleError(err, href)
}

if len(records) == 0 {
Expand Down Expand Up @@ -1153,6 +1148,16 @@ func (r *RestPerf) PollInstance() (map[string]*matrix.Matrix, error) {
return nil, err
}

func (r *RestPerf) handleError(err error, href string) (map[string]*matrix.Matrix, error) {
if errs.IsRestErr(err, errs.TableNotFound) {
// the table does not exist, log as info and return no instances so the task goes to stand-by
r.Logger.Info().Str("href", href).Msg(err.Error())
return nil, errs.New(errs.ErrNoInstance, err.Error())
}
r.Logger.Error().Err(err).Str("href", href).Msg("Failed to fetch data")
return nil, err
}

func isWorkloadObject(query string) bool {
_, ok := qosQueries[query]
return ok
Expand Down
16 changes: 4 additions & 12 deletions cmd/collectors/zapi/collector/zapi.go
Original file line number Diff line number Diff line change
Expand Up @@ -367,19 +367,11 @@ func (z *Zapi) PollData() (map[string]*matrix.Matrix, error) {
z.Logger.Debug().Str("key", key).Msg("removed instance")
}

z.Logger.Info().
Int("instances", len(instances)).
Uint64("metrics", count).
Str("apiD", apiT.Round(time.Millisecond).String()).
Str("parseD", parseT.Round(time.Millisecond).String()).
Msg("Collected")

// update metadata
_ = z.Metadata.LazySetValueInt64("api_time", "data", apiT.Microseconds())
_ = z.Metadata.LazySetValueInt64("parse_time", "data", parseT.Microseconds())
_ = z.Metadata.LazySetValueUint64("count", "data", count)
_ = z.Metadata.LazySetValueUint64("count", "instance", uint64(len(instances)))

_ = z.Metadata.LazySetValueUint64("metrics", "data", count)
_ = z.Metadata.LazySetValueUint64("instances", "data", uint64(len(instances)))
z.AddCollectCount(count)

if len(mat.GetInstances()) == 0 {
Expand Down Expand Up @@ -442,8 +434,8 @@ func (z *Zapi) CollectAutoSupport(p *collector.Payload) {

md := z.GetMetadata()
info := collector.InstanceInfo{
Count: md.LazyValueInt64("count", "instance"),
DataPoints: md.LazyValueInt64("count", "data"),
Count: md.LazyValueInt64("instances", "data"),
DataPoints: md.LazyValueInt64("metrics", "data"),
PollTime: md.LazyValueInt64("poll_time", "data"),
APITime: md.LazyValueInt64("api_time", "data"),
ParseTime: md.LazyValueInt64("parse_time", "data"),
Expand Down
17 changes: 7 additions & 10 deletions cmd/collectors/zapiperf/zapiperf.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,10 @@ func (z *ZapiPerf) InitCache() error {
}
z.Matrix[z.Object].Object = z.object
z.Logger.Debug().Msgf("object= %s --> %s", z.Object, z.object)

// Add metadata metric for skips
_, _ = z.Metadata.NewMetricUint64("skips")

return nil
}

Expand Down Expand Up @@ -526,7 +530,8 @@ func (z *ZapiPerf) PollData() (map[string]*matrix.Matrix, error) {
// update metadata
_ = z.Metadata.LazySetValueInt64("api_time", "data", apiT.Microseconds())
_ = z.Metadata.LazySetValueInt64("parse_time", "data", parseT.Microseconds())
_ = z.Metadata.LazySetValueUint64("count", "data", count)
_ = z.Metadata.LazySetValueUint64("metrics", "data", count)
_ = z.Metadata.LazySetValueUint64("instances", "data", uint64(len(instanceKeys)))
z.AddCollectCount(count)

// skip calculating from delta if no data from previous poll
Expand Down Expand Up @@ -668,16 +673,8 @@ func (z *ZapiPerf) PollData() (map[string]*matrix.Matrix, error) {

calcD := time.Since(calcStart)

z.Logger.Info().
Int("instances", len(instanceKeys)).
Uint64("metrics", count).
Str("apiD", apiT.Round(time.Millisecond).String()).
Str("parseD", parseT.Round(time.Millisecond).String()).
Str("calcD", calcD.Round(time.Millisecond).String()).
Int("skips", totalSkips).
Msg("Collected")

_ = z.Metadata.LazySetValueInt64("calc_time", "data", calcD.Microseconds())
_ = z.Metadata.LazySetValueUint64("skips", "data", uint64(totalSkips))

// store cache for next poll
z.Matrix[z.Object] = cachedData
Expand Down
2 changes: 1 addition & 1 deletion cmd/exporters/prometheus/httpd.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ func (p *Prometheus) ServeMetrics(w http.ResponseWriter, r *http.Request) {
// filterMetaTags removes duplicate TYPE/HELP tags in the metrics
// Note: this is a workaround, normally Render() will only add
// one TYPE/HELP for each metric type, however since some metric
// types (e.g. metadata_collector_count) are submitted from multiple
// types (e.g. metadata_collector_metrics) are submitted from multiple
// collectors, we end up with duplicates in the final batch delivered
// over HTTP.
func filterMetaTags(metrics [][]byte) [][]byte {
Expand Down
38 changes: 33 additions & 5 deletions cmd/poller/collector/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@ import (
"github.com/netapp/harvest/v2/pkg/logging"
"golang.org/x/text/cases"
"golang.org/x/text/language"
"math"
"reflect"
"strconv"
"strings"
"sync"
"time"

Expand Down Expand Up @@ -223,7 +225,8 @@ func Init(c Collector) error {
_, _ = md.NewMetricInt64("parse_time")
_, _ = md.NewMetricInt64("calc_time")
_, _ = md.NewMetricInt64("plugin_time")
_, _ = md.NewMetricUint64("count")
_, _ = md.NewMetricUint64("metrics")
_, _ = md.NewMetricUint64("instances")

// add tasks of the collector as metadata instances
for _, task := range s.GetTasks() {
Expand All @@ -233,9 +236,6 @@ func Init(c Collector) error {
instance.SetLabel("interval", strconv.FormatFloat(t, 'f', 4, 32))
}

// Create the metadata instance named "instance" since autosupport relies on that key
_, _ = md.NewInstance("instance")

md.SetExportOptions(matrix.DefaultExportOptions())

c.SetMetadata(md)
Expand Down Expand Up @@ -407,6 +407,9 @@ func (c *AbstractCollector) Start(wg *sync.WaitGroup) {
_ = c.Metadata.LazySetValueInt64("plugin_time", task.Name, pluginTime.Microseconds())
}
}
if task.Name == "data" {
c.logMetadata()
}

// update task metadata
_ = c.Metadata.LazySetValueInt64("poll_time", task.Name, task.GetDuration().Microseconds())
Expand All @@ -425,7 +428,7 @@ func (c *AbstractCollector) Start(wg *sync.WaitGroup) {
}

if err := e.Export(c.Metadata); err != nil {
c.Logger.Warn().Msgf("export metadata to [%s]: %s", e.GetName(), err.Error())
c.Logger.Warn().Err(err).Str("exporter", e.GetName()).Msg("Unable to export metadata")
}

// continue if metadata failed, since it might be specific to metadata
Expand Down Expand Up @@ -454,6 +457,31 @@ func (c *AbstractCollector) Start(wg *sync.WaitGroup) {
}
}

func (c *AbstractCollector) logMetadata() {
metrics := c.Metadata.GetMetrics()
info := c.Logger.Info()
dataInstance := c.Metadata.GetInstance("data")
if dataInstance == nil {
return
}
for _, metric := range metrics {
mName := metric.GetName()
if mName == "poll_time" || mName == "task_time" {
// don't log these since they're covered by other durations
continue
}
value, _, _ := metric.GetValueFloat64(dataInstance)
if strings.HasSuffix(mName, "_time") {
// convert microseconds to milliseconds and names ending with _time into -> *Ms
v := int64(math.Round(value / 1000))
info.Int64(mName[0:len(mName)-5]+"Ms", v)
} else {
info.Int64(mName, int64(value))
}
}
info.Msg("Collected")
}

// GetName returns name of the collector
func (c *AbstractCollector) GetName() string {
return c.Name
Expand Down
2 changes: 1 addition & 1 deletion grafana/dashboards/cmode/harvest_dashboard_metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -3529,7 +3529,7 @@
"pluginVersion": "8.1.8",
"targets": [
{
"expr": "avg by (collector, object) (metadata_collector_count{hostname=~\"$Hostname\",poller=~\"$Poller\",task=\"data\"})",
"expr": "avg by (collector, object) (metadata_collector_metrics{hostname=~\"$Hostname\",poller=~\"$Poller\",task=\"data\"})",
"hide": false,
"interval": "",
"legendFormat": "{{collector}} - {{object}}",
Expand Down
Loading