From b5f2608ba1fece3e130eab40b9e2c21f7c5f6ed5 Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Thu, 2 Jul 2020 11:32:59 +0200 Subject: [PATCH 01/51] added dynatrace client install instructions --- plugins/outputs/all/all.go | 1 + plugins/outputs/dynatrace/README.md | 21 ++ plugins/outputs/dynatrace/dynatrace.go | 189 ++++++++++++++++++ plugins/outputs/dynatrace/dynatrace_test.go | 206 ++++++++++++++++++++ 4 files changed, 417 insertions(+) create mode 100644 plugins/outputs/dynatrace/README.md create mode 100644 plugins/outputs/dynatrace/dynatrace.go create mode 100644 plugins/outputs/dynatrace/dynatrace_test.go diff --git a/plugins/outputs/all/all.go b/plugins/outputs/all/all.go index 7d37c2208b092..806410b8176e9 100644 --- a/plugins/outputs/all/all.go +++ b/plugins/outputs/all/all.go @@ -10,6 +10,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/outputs/cratedb" _ "github.com/influxdata/telegraf/plugins/outputs/datadog" _ "github.com/influxdata/telegraf/plugins/outputs/discard" + _ "github.com/influxdata/telegraf/plugins/outputs/dynatrace" _ "github.com/influxdata/telegraf/plugins/outputs/elasticsearch" _ "github.com/influxdata/telegraf/plugins/outputs/exec" _ "github.com/influxdata/telegraf/plugins/outputs/file" diff --git a/plugins/outputs/dynatrace/README.md b/plugins/outputs/dynatrace/README.md new file mode 100644 index 0000000000000..9da00080ee601 --- /dev/null +++ b/plugins/outputs/dynatrace/README.md @@ -0,0 +1,21 @@ +# Dynatrace Output Plugin + +This plugin writes telegraf metrics to a Dynatrace environment. + +A API token is necessary, which can be obtained in your Dynatrace environment. Navigate to **Dynatrace > Settings > Integration > Dynatrace API** and create a new token with +'Data ingest' access scope enabled. + +Telegraf measurements which cant be converted to a float64 are skipped. + +Metrics fields are added to the measurement name by using '.' in the metric name. + +### Configuration + +```toml +[[outputs.dynatrace]] + ## Dynatrace environment URL. + environmentURL = "" + environmentApiToken = "" + skipCertificateCheck = false + +``` \ No newline at end of file diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go new file mode 100644 index 0000000000000..d9dbac0431c3c --- /dev/null +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -0,0 +1,189 @@ +package dynatrace + +import ( + "bytes" + "crypto/tls" + "fmt" + "io/ioutil" + "math" + "net/http" + "strconv" + "strings" + "time" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/outputs" +) + +// Dynatrace Configuration for the Dynatrace output plugin +type Dynatrace struct { + EnvironmentURL string `toml:"environmentURL"` + EnvironmentAPIToken string `toml:"environmentApiToken"` + SkipCertificateCheck bool `toml:"skipCertificateCheck"` + Log telegraf.Logger `toml:"log"` + + client *http.Client +} + +var sampleConfig = ` + ## Your Dynatrace environment URL. + ## For Dynatrace SaaS environments the URL scheme is "https://{your-environment-id}.live.dynatrace.com" + ## For Dynatrace Managed environments the URL scheme is "https://{your-domain}/e/{your-environment-id}" + environmentURL = "" + + ## Your Dynatrace API token. + ## Create an API token within your Dynatrace environment, by navigating to Settings > Integration > Dynatrace API + ## The API token needs data ingest scope permission. + environmentApiToken = "" +` + +// Connect Connects the Dynatrace output plugin to the Telegraf stream +func (d *Dynatrace) Connect() error { + if len(d.EnvironmentURL) == 0 { + d.Log.Errorf("Dynatrace environmentURL is a required field for Dynatrace output") + return fmt.Errorf("environmentURL is a required field for Dynatrace output") + } + if len(d.EnvironmentAPIToken) == 0 { + d.Log.Errorf("Dynatrace environmentApiToken is a required field for Dynatrace output") + return fmt.Errorf("environmentApiToken is a required field for Dynatrace output") + } + + d.client = &http.Client{ + Transport: &http.Transport{ + Proxy: http.ProxyFromEnvironment, + TLSClientConfig: &tls.Config{InsecureSkipVerify: d.SkipCertificateCheck}, + }, + Timeout: 5 * time.Second, + } + return nil +} + +// Close Closes the Dynatrace output plugin +func (d *Dynatrace) Close() error { + var err error + + return err +} + +// SampleConfig Returns a sample configuration for the Dynatrace output plugin +func (d *Dynatrace) SampleConfig() string { + return sampleConfig +} + +// Description returns the description for the Dynatrace output plugin +func (d *Dynatrace) Description() string { + return "Send telegraf metrics to a Dynatrace environment" +} + +func (d *Dynatrace) convertKey(v string) string { + kEs := strings.ToLower(v) + sEs := strings.Replace(kEs, " ", "_", -1) + return sEs +} + +func (d *Dynatrace) escape(v string) string { + vEs := strings.Replace(v, "\\", "\\\\", -1) + return "\"" + vEs + "\"" +} + +func (d *Dynatrace) Write(metrics []telegraf.Metric) error { + var err error + var buf bytes.Buffer + var tagb bytes.Buffer + if len(metrics) == 0 { + return err + } + + for _, metric := range metrics { + // first write the tags into a buffer + tagb.Reset() + if len(metric.Tags()) > 0 { + for tk, tv := range metric.Tags() { + fmt.Fprintf(&tagb, ",%s=%s", d.convertKey(tk), d.escape(tv)) + } + } + if len(metric.Fields()) > 0 { + for k, v := range metric.Fields() { + var value string + // first check if value type is supported + switch v := v.(type) { + case string: + continue + case float64: + if !math.IsNaN(v) && !math.IsInf(v, 0) { + value = fmt.Sprintf("%f", v) + } else { + continue + } + case uint64: + value = strconv.FormatUint(v, 10) + case int64: + value = strconv.FormatInt(v, 10) + case bool: + if v { + value = "1" + } else { + value = "0" + } + default: + d.Log.Infof("Dynatrace type not supported! %s", v) + continue + } + + // write metric name combined with its field + fmt.Fprintf(&buf, "%s.%s", metric.Name(), k) + // add the tag string + if len(tagb.String()) > 0 { + fmt.Fprintf(&buf, "%s", tagb.String()) + } + + // write measured value + fmt.Fprintf(&buf, " %v\n", value) + } + } + } + //d.Log.Infof("%s", buf.String()) + // send it + d.send(buf.Bytes()) + return err +} + +func (d *Dynatrace) send(msg []byte) error { + var err error + req, err := http.NewRequest("POST", d.EnvironmentURL+"/api/v2/metrics/ingest", bytes.NewBuffer(msg)) + if err != nil { + d.Log.Errorf("Dynatrace error: %s", err.Error()) + return fmt.Errorf("Dynatrace error while creating HTTP request:, %s", err.Error()) + } + req.Header.Add("Content-Type", "text/plain; charset=UTF-8") + req.Header.Add("Authorization", "Api-Token "+d.EnvironmentAPIToken) + // add user-agent header to identify metric source + req.Header.Add("User-Agent", "telegraf") + + resp, err := d.client.Do(req) + if err != nil { + d.Log.Errorf("Dynatrace error: %s", err.Error()) + fmt.Println(req) + return fmt.Errorf("Dynatrace error while sending HTTP request:, %s", err.Error()) + } + defer resp.Body.Close() + // print metric line results as info log + if resp.StatusCode == http.StatusOK { + bodyBytes, err := ioutil.ReadAll(resp.Body) + if err != nil { + d.Log.Errorf("Dynatrace error reading response") + } + bodyString := string(bodyBytes) + d.Log.Infof("Dynatrace returned: %s", bodyString) + } else { + return fmt.Errorf("Dynatrace request failed with response code:, %d", resp.StatusCode) + } + + return err +} + +func init() { + outputs.Add("dynatrace", func() telegraf.Output { + return &Dynatrace{} + }) +} diff --git a/plugins/outputs/dynatrace/dynatrace_test.go b/plugins/outputs/dynatrace/dynatrace_test.go new file mode 100644 index 0000000000000..5f8f9357603fa --- /dev/null +++ b/plugins/outputs/dynatrace/dynatrace_test.go @@ -0,0 +1,206 @@ +package dynatrace + +import ( + "encoding/json" + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/metric" + "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/require" + "net/http" + "net/http/httptest" + "testing" + "time" +) + +func TestNilMetrics(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(`{"linesOk":10,"linesInvalid":0,"error":null}`) + })) + defer ts.Close() + + d := &Dynatrace{} + d.EnvironmentURL = ts.URL + d.EnvironmentAPIToken = "123" + d.Log = testutil.Logger{} + err := d.Connect() + require.NoError(t, err) + + err = d.Write(nil) + require.NoError(t, err) +} + +func TestEmptyMetricsSlice(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(`{"linesOk":10,"linesInvalid":0,"error":null}`) + })) + defer ts.Close() + + d := &Dynatrace{} + d.EnvironmentURL = ts.URL + d.EnvironmentAPIToken = "123" + d.Log = testutil.Logger{} + err := d.Connect() + require.NoError(t, err) + empty := []telegraf.Metric{} + err = d.Write(empty) + require.NoError(t, err) +} + +func TestMockURL(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(`{"linesOk":10,"linesInvalid":0,"error":null}`) + })) + defer ts.Close() + + d := &Dynatrace{} + d.EnvironmentURL = ts.URL + d.EnvironmentAPIToken = "123" + d.Log = testutil.Logger{} + err := d.Connect() + require.NoError(t, err) + + err = d.Write(testutil.MockMetrics()) + require.NoError(t, err) +} + +func TestMissingURL(t *testing.T) { + d := &Dynatrace{} + d.EnvironmentAPIToken = "123" + d.Log = testutil.Logger{} + err := d.Connect() + require.Error(t, err) +} + +func TestMissingAPIToken(t *testing.T) { + d := &Dynatrace{} + d.EnvironmentURL = "test" + d.Log = testutil.Logger{} + err := d.Connect() + require.Error(t, err) +} + +func TestSendMetric(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(`{"linesOk":10,"linesInvalid":0,"error":null}`) + })) + defer ts.Close() + + d := &Dynatrace{} + d.EnvironmentURL = ts.URL + d.EnvironmentAPIToken = "123" + d.Log = testutil.Logger{} + err := d.Connect() + require.NoError(t, err) + + // Init metrics + + m1, _ := metric.New( + "mymeasurement", + map[string]string{"host": "192.168.0.1", "nix": "nix"}, + map[string]interface{}{"myfield": float64(3.14)}, + time.Date(2010, time.November, 10, 23, 0, 0, 0, time.UTC), + ) + + m2, _ := metric.New( + "mymeasurement", + map[string]string{"host": "192.168.0.1"}, + map[string]interface{}{"value": float64(3.14)}, + time.Date(2010, time.November, 10, 23, 0, 0, 0, time.UTC), + ) + + metrics := []telegraf.Metric{m1, m2} + + err = d.Write(metrics) + require.NoError(t, err) +} + +func TestSendSingleMetric(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(`{"linesOk":10,"linesInvalid":0,"error":null}`) + })) + defer ts.Close() + + d := &Dynatrace{} + d.EnvironmentURL = ts.URL + d.EnvironmentAPIToken = "123" + d.Log = testutil.Logger{} + err := d.Connect() + require.NoError(t, err) + + // Init metrics + + m1, _ := metric.New( + "mymeasurement", + map[string]string{"host": "192.168.0.1", "nix": "nix"}, + map[string]interface{}{"myfield": float64(3.14)}, + time.Date(2010, time.November, 10, 23, 0, 0, 0, time.UTC), + ) + + metrics := []telegraf.Metric{m1} + + err = d.Write(metrics) + require.NoError(t, err) +} + +func TestSendMetricWithoutTags(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(`{"linesOk":10,"linesInvalid":0,"error":null}`) + })) + defer ts.Close() + + d := &Dynatrace{} + d.EnvironmentURL = ts.URL + d.EnvironmentAPIToken = "123" + d.Log = testutil.Logger{} + err := d.Connect() + require.NoError(t, err) + + // Init metrics + + m1, _ := metric.New( + "mymeasurement", + map[string]string{}, + map[string]interface{}{"myfield": float64(3.14)}, + time.Date(2010, time.November, 10, 23, 0, 0, 0, time.UTC), + ) + + metrics := []telegraf.Metric{m1} + + err = d.Write(metrics) + require.NoError(t, err) +} + +func TestSendBooleanMetricWithoutTags(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(`{"linesOk":10,"linesInvalid":0,"error":null}`) + })) + defer ts.Close() + + d := &Dynatrace{} + d.EnvironmentURL = ts.URL + d.EnvironmentAPIToken = "123" + d.Log = testutil.Logger{} + err := d.Connect() + require.NoError(t, err) + + // Init metrics + + m1, _ := metric.New( + "mymeasurement", + map[string]string{}, + map[string]interface{}{"myfield": bool(true)}, + time.Date(2010, time.November, 10, 23, 0, 0, 0, time.UTC), + ) + + metrics := []telegraf.Metric{m1} + + err = d.Write(metrics) + require.NoError(t, err) +} From d2c3d579e4b208eda99918b8e00ba152bb0f4814 Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Tue, 14 Jul 2020 08:22:53 +0200 Subject: [PATCH 02/51] fixed some minor issues --- plugins/outputs/dynatrace/README.md | 4 ++-- plugins/outputs/dynatrace/dynatrace.go | 21 ++++++++------------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/plugins/outputs/dynatrace/README.md b/plugins/outputs/dynatrace/README.md index 9da00080ee601..df3abeae5b334 100644 --- a/plugins/outputs/dynatrace/README.md +++ b/plugins/outputs/dynatrace/README.md @@ -2,10 +2,10 @@ This plugin writes telegraf metrics to a Dynatrace environment. -A API token is necessary, which can be obtained in your Dynatrace environment. Navigate to **Dynatrace > Settings > Integration > Dynatrace API** and create a new token with +An API token is necessary, which can be obtained in your Dynatrace environment. Navigate to **Dynatrace > Settings > Integration > Dynatrace API** and create a new token with 'Data ingest' access scope enabled. -Telegraf measurements which cant be converted to a float64 are skipped. +Telegraf measurements which can't be converted to a float64 are skipped. Metrics fields are added to the measurement name by using '.' in the metric name. diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index d9dbac0431c3c..8af48ed2637a5 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -25,7 +25,7 @@ type Dynatrace struct { client *http.Client } -var sampleConfig = ` +const sampleConfig = ` ## Your Dynatrace environment URL. ## For Dynatrace SaaS environments the URL scheme is "https://{your-environment-id}.live.dynatrace.com" ## For Dynatrace Managed environments the URL scheme is "https://{your-domain}/e/{your-environment-id}" @@ -60,9 +60,7 @@ func (d *Dynatrace) Connect() error { // Close Closes the Dynatrace output plugin func (d *Dynatrace) Close() error { - var err error - - return err + return nil } // SampleConfig Returns a sample configuration for the Dynatrace output plugin @@ -77,21 +75,20 @@ func (d *Dynatrace) Description() string { func (d *Dynatrace) convertKey(v string) string { kEs := strings.ToLower(v) - sEs := strings.Replace(kEs, " ", "_", -1) + sEs := strings.ReplaceAll(kEs, " ", "_") return sEs } func (d *Dynatrace) escape(v string) string { - vEs := strings.Replace(v, "\\", "\\\\", -1) + vEs := strings.ReplaceAll(v, "\\","\\\\") return "\"" + vEs + "\"" } func (d *Dynatrace) Write(metrics []telegraf.Metric) error { - var err error var buf bytes.Buffer var tagb bytes.Buffer if len(metrics) == 0 { - return err + return nil } for _, metric := range metrics { @@ -142,10 +139,8 @@ func (d *Dynatrace) Write(metrics []telegraf.Metric) error { } } } - //d.Log.Infof("%s", buf.String()) - // send it - d.send(buf.Bytes()) - return err + + return d.send(buf.Bytes()) } func (d *Dynatrace) send(msg []byte) error { @@ -179,7 +174,7 @@ func (d *Dynatrace) send(msg []byte) error { return fmt.Errorf("Dynatrace request failed with response code:, %d", resp.StatusCode) } - return err + return nil } func init() { From 72dd44e8f6d48a134514f42ef7f93b3ca55f6af6 Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Tue, 14 Jul 2020 08:24:57 +0200 Subject: [PATCH 03/51] added idea to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 4176a04131cfc..db929895d173c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +/.idea /build /telegraf /telegraf.exe From f49ac2a6a4b20190a92e2228f9a90b5d5722a93d Mon Sep 17 00:00:00 2001 From: Wolfgang Beer Date: Tue, 14 Jul 2020 09:26:13 +0200 Subject: [PATCH 04/51] changed to DT normalized metric and dimension keys --- .DS_Store | Bin 0 -> 8196 bytes plugins/.DS_Store | Bin 0 -> 8196 bytes plugins/outputs/.DS_Store | Bin 0 -> 8196 bytes plugins/outputs/dynatrace/dynatrace.go | 36 ++++++++++++++++++++----- 4 files changed, 29 insertions(+), 7 deletions(-) create mode 100644 .DS_Store create mode 100644 plugins/.DS_Store create mode 100644 plugins/outputs/.DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..a73b469f659b73dd8111457ef6a9094013862099 GIT binary patch literal 8196 zcmeHMUu+ab9G>r5VAn2mTacFJz=K)~T0;*C22r^7Do`l4axHBs((c`^Wy5y2-tJw2 zQqngK$|L?ICTKJoeDDD={(mqgh{hLcj6{7fCccwO}L}t7VKy^l(%wD7#MLJbz}TSlCid2(o9)Vh;1NW8P@ip4B^Q zh$0Y0Ac{Z~fhYn|1a5{1(3#DXG{?Cw*eH)85JljYi~#>WBWUNIodiBItEgsBiG1qyQpVa^cFjPQnnV0N-g z1l<`@0;4>NKoo)55#ZZr0UKpDE3o0K@9%Dwn9sALw&nRvOJ}Ws3&zO^BA94U)yBs+Y-(;yBsXrDXiSVJo0A)<(v)nPn2@EK`s9P{LnB9zjUS&l z@uKhz;rD@6&hYijY)5Ucn9kbV(b!uhN3TpfdRv9EP}|wQbJuQVP^~VK51pgFzGMxm0m9I+IE-gS$vrO zKHogzb8Z&Zx8HN~emWSe&!X`~FSX#AdrcbDDw=IHw5LKox2Pt*s{ZbE&6~HTrz+K| zYGsMCw6|z^R@OFq#w^1>)USIv(=jY(ct3^K@vX-!GcQ+_b;{k2K4MN)t5ub?%QUU8 zkhLB!SpHG9PLhJw6)QF6fI&0uaSd)|sBcShz^q=WX#<=|=S|9u(BQ5HP3iW?B~=eQ z-qWZk-G}KNtDY=Oo05vwW9wPdRt;IqvvHH6^%g}`)!kv^{aX~RFXxVo(2~6>G=|OF zQkr&<&)lJVe&?v^(5f!d^xSuCrM6v?`Z?QT7__Rok_PHhYeX?{xh|CwHJJ{b6>L3g zV;yXW<=Hqp#hznl*?D$>y~93XU$7~5h5g8WVZXB9*zfEw#882SScFQ{q8<%cgSA+X zt=NVX+VL>D@Cf!|07J+k2LmJUP(Tr5n8cGfiKp;1PU9t<#mjgFZ{mG?h>!3wF5)v> z!W6#3_xK4v;}5aGSV_%$M7@;bPb$sWwxnkI*)34>>FcXGzGLT(_FXrzn!j1ZU*qB> zw=b#Py~w zmUC*^-B+Dq&*`+)@eHyGxVSQnE}SCL59VRInTG}^Eo zog~z~$e;%YF$kT+dKfl|)x{AU#W;@R1fB>5dkW9sSv-#ya0aj9HN1{Da1Q5j0q^2H z67UE36kp;pzMUc2j*?gf;=9?BEoSnL?K;E6l!Sh*QB=E;5hxRb*Yf-SLv#QBeYVBt4L4|9&ik~Mh&a_mLH3N5Sjd9ha8Ss()3jqo zoPjt4aR%ZH#2JV)a4Teh?rh$a8SZ^4jmJ0xaRzS74Dk0uk~*j9h>lAtUL7=u5`ZW# z0YsxY)d9jL8PRk^$0Ze}9PjYpl>4=U?D$NTU)^X|6D(XD-VD_j# zoDI#e6lBcee!l88!=t%q-sk)7Wwsq|-$<*4mv6kd$sy(%q7Okn)u`yMtUzXa~eRyc(_~;X3C(g*& z5WOE*&6HTrP7l-#${jhE2by@V66g<;fzH*mc}7=v*S76FgSuV{>|M5NhECb;G($TW z)N2BYx;yWCy+ylB5%%YOw>0FDq&9E5`4TDHTU|7;qDHT+yJLZ- z?V}8Oq4TI?7k3XkR(N!;8RTuxa=at`WhZcQuHEOELw0S|PF0<#(`##tg+`;IH!B0B zob!0e2}cB1TVfcJBl`qlC7;U?p1YD=?f?>>7qqj zt?Gf=_ZU59X`0g76x~vHyJpUIC2O6wUeUL!{bd=JWIm{ERBF+p4&?ozA&MuE?6i?l z^e>b{eD5AJ2zM52kGAKrcP=rEy_~eabGfZ`qIR3j2lq%C55C*&hHBsKzYJM?DNQpb;yP!u@E& zW^|z&+tGtZun+rj5QmV5iy`;pGuj3rv#yfZypWt(RjZ63j z6S#~k_!+;+Uzn(%d52tAMqH)Ylw(V1rq6By&8KgS<|cvW<+m)Fzn?E5eCJ*D$z`in zx2@lpnSAbycUQ8dlFm%bQMsECqH2bk%xxROVBo&}vZxpX?W(KOHbA#9aTW9|Me=~nV>^sgt zoPqy21E}uIc4p{D%=8jYkSt04Fm>Lz-ngW~ga)p{`Jx;rI{Ako&6ANV9no<~NkZ*E R{}2%0|MC4Vkav3ze*>k57*YTL literal 0 HcmV?d00001 diff --git a/plugins/outputs/.DS_Store b/plugins/outputs/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..3985570b25bec9a5250b4dee6bb685d7ff8f3ebd GIT binary patch literal 8196 zcmeHMU2GIp6u#fKz>J-;(}GZjVi#*k(Gt6bf*`{7N1#${Wm{T)WIMYv!i4Ee-I?9~ zQPMXx$|FjYXrfVL@WBVf_!E6FCWyut))-0j!I=1>(L`fneDU15(+X{Qk{A-^CUd_# z_q*rbd(M7$rf13+LwmtUGFHPFlj!28R#3G-;XJ?Ql~~wQP6)DR%x8I~vm7(Cqw_{b z9Z>|L2t*NxA`nF&iomT90eWZiBHiGtOBFVBO zSIUp4kGrF3U-t`MyPiMH?|OCrcs6YJxvo9i*0TpJb2Ota?{FPow;VGc7+Z!(Lyins zj^U2By9LMaf_c({2o}ny+T`Q|$=0Ss%jS(!O^M0mCVrD_PBu?X$Y48j9~FO{+?UdS=%di2xnvO%s6{>*4aBMluB(+*Pcg|?u=SpBtLpae9Lvb z17_YM%&2jXeDBJ+PH*1i{6NmN3&W0=QLA#goh#V7Z?@TX+L|z_FYe{?u5I_ap2ep* zIOv;WKIi69gNHmf?{@@~WmL7`nfpze%PN|kG^00Y(UROyg?xVL%G$LJ_ujv0>yD06 zT&=2BmMbd;ik4?(ZL@#WGW_EQbuVW+hUFYPMB#OO>j}%u%T;APa-X9Qo26>CDqgcn z(*_G!>&b%UkE`{PRI090<7>1!<%mH`?sE-pWk~9~a=_fJ)oF)0(~&o+HbR4STBFkE z(at1Q7X$B8HYj~3=y#S?PxLe^En2^=XH8pH4Y5wEvPByxim1x!UeUQt*{%)d+~Hx` zvL{!eNp$bj+O(s5w`=)$Au!DHBu!x(~&91IM@gO4IcF^#A344%bvID?n*3SPx)cpD$$BYccc za0#E|GD`Rw-{U9zj9K}bl9v}b! literal 0 HcmV?d00001 diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index 8af48ed2637a5..d4c416b0becd5 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -7,6 +7,7 @@ import ( "io/ioutil" "math" "net/http" + "regexp" "strconv" "strings" "time" @@ -73,14 +74,33 @@ func (d *Dynatrace) Description() string { return "Send telegraf metrics to a Dynatrace environment" } -func (d *Dynatrace) convertKey(v string) string { - kEs := strings.ToLower(v) - sEs := strings.ReplaceAll(kEs, " ", "_") - return sEs +var reNameAllowedCharList = regexp.MustCompile("[^A-Za-z0-9.]+") + +const maxDimKeyLen = 100 +const maxMetricKeyLen = 250 + +// Normalizes a metric keys or metric dimension identifiers +// according to Dynatrace format. +func (d *Dynatrace) normalize(s string, max int) string { + result := reNameAllowedCharList.ReplaceAllString(s, "_") + // trunc to max size + if len(result) > max { + result = result[:max] + } + // remove trailing and ending '_' char + if len(result) > 1 { + if strings.HasPrefix(s, "_") { + result = result[1:] + } + if strings.HasSuffix(s, "_") { + result = result[:len(result)-1] + } + } + return result } func (d *Dynatrace) escape(v string) string { - vEs := strings.ReplaceAll(v, "\\","\\\\") + vEs := strings.ReplaceAll(v, "\\", "\\\\") return "\"" + vEs + "\"" } @@ -96,7 +116,7 @@ func (d *Dynatrace) Write(metrics []telegraf.Metric) error { tagb.Reset() if len(metric.Tags()) > 0 { for tk, tv := range metric.Tags() { - fmt.Fprintf(&tagb, ",%s=%s", d.convertKey(tk), d.escape(tv)) + fmt.Fprintf(&tagb, ",%s=%s", strings.ToLower(d.normalize(tk, maxDimKeyLen)), d.escape(tv)) } } if len(metric.Fields()) > 0 { @@ -127,8 +147,10 @@ func (d *Dynatrace) Write(metrics []telegraf.Metric) error { continue } + // metric name + metricID := metric.Name() + "." + k // write metric name combined with its field - fmt.Fprintf(&buf, "%s.%s", metric.Name(), k) + fmt.Fprintf(&buf, "%s", d.normalize(metricID, maxMetricKeyLen)) // add the tag string if len(tagb.String()) > 0 { fmt.Fprintf(&buf, "%s", tagb.String()) From 62c605483b98b201af5347aff672f84b2144d977 Mon Sep 17 00:00:00 2001 From: Wolfgang Beer Date: Tue, 14 Jul 2020 09:28:34 +0200 Subject: [PATCH 05/51] changed to DT normalized metric and dimension keys --- .DS_Store | Bin 8196 -> 0 bytes plugins/.DS_Store | Bin 8196 -> 0 bytes plugins/outputs/.DS_Store | Bin 8196 -> 0 bytes 3 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .DS_Store delete mode 100644 plugins/.DS_Store delete mode 100644 plugins/outputs/.DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index a73b469f659b73dd8111457ef6a9094013862099..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8196 zcmeHMUu+ab9G>r5VAn2mTacFJz=K)~T0;*C22r^7Do`l4axHBs((c`^Wy5y2-tJw2 zQqngK$|L?ICTKJoeDDD={(mqgh{hLcj6{7fCccwO}L}t7VKy^l(%wD7#MLJbz}TSlCid2(o9)Vh;1NW8P@ip4B^Q zh$0Y0Ac{Z~fhYn|1a5{1(3#DXG{?Cw*eH)85JljYi~#>WBWUNIodiBItEgsBiG1qyQpVa^cFjPQnnV0N-g z1l<`@0;4>NKoo)55#ZZr0UKpDE3o0K@9%Dwn9sALw&nRvOJ}Ws3&zO^BA94U)yBs+Y-(;yBsXrDXiSVJo0A)<(v)nPn2@EK`s9P{LnB9zjUS&l z@uKhz;rD@6&hYijY)5Ucn9kbV(b!uhN3TpfdRv9EP}|wQbJuQVP^~VK51pgFzGMxm0m9I+IE-gS$vrO zKHogzb8Z&Zx8HN~emWSe&!X`~FSX#AdrcbDDw=IHw5LKox2Pt*s{ZbE&6~HTrz+K| zYGsMCw6|z^R@OFq#w^1>)USIv(=jY(ct3^K@vX-!GcQ+_b;{k2K4MN)t5ub?%QUU8 zkhLB!SpHG9PLhJw6)QF6fI&0uaSd)|sBcShz^q=WX#<=|=S|9u(BQ5HP3iW?B~=eQ z-qWZk-G}KNtDY=Oo05vwW9wPdRt;IqvvHH6^%g}`)!kv^{aX~RFXxVo(2~6>G=|OF zQkr&<&)lJVe&?v^(5f!d^xSuCrM6v?`Z?QT7__Rok_PHhYeX?{xh|CwHJJ{b6>L3g zV;yXW<=Hqp#hznl*?D$>y~93XU$7~5h5g8WVZXB9*zfEw#882SScFQ{q8<%cgSA+X zt=NVX+VL>D@Cf!|07J+k2LmJUP(Tr5n8cGfiKp;1PU9t<#mjgFZ{mG?h>!3wF5)v> z!W6#3_xK4v;}5aGSV_%$M7@;bPb$sWwxnkI*)34>>FcXGzGLT(_FXrzn!j1ZU*qB> zw=b#Py~w zmUC*^-B+Dq&*`+)@eHyGxVSQnE}SCL59VRInTG}^Eo zog~z~$e;%YF$kT+dKfl|)x{AU#W;@R1fB>5dkW9sSv-#ya0aj9HN1{Da1Q5j0q^2H z67UE36kp;pzMUc2j*?gf;=9?BEoSnL?K;E6l!Sh*QB=E;5hxRb*Yf-SLv#QBeYVBt4L4|9&ik~Mh&a_mLH3N5Sjd9ha8Ss()3jqo zoPjt4aR%ZH#2JV)a4Teh?rh$a8SZ^4jmJ0xaRzS74Dk0uk~*j9h>lAtUL7=u5`ZW# z0YsxY)d9jL8PRk^$0Ze}9PjYpl>4=U?D$NTU)^X|6D(XD-VD_j# zoDI#e6lBcee!l88!=t%q-sk)7Wwsq|-$<*4mv6kd$sy(%q7Okn)u`yMtUzXa~eRyc(_~;X3C(g*& z5WOE*&6HTrP7l-#${jhE2by@V66g<;fzH*mc}7=v*S76FgSuV{>|M5NhECb;G($TW z)N2BYx;yWCy+ylB5%%YOw>0FDq&9E5`4TDHTU|7;qDHT+yJLZ- z?V}8Oq4TI?7k3XkR(N!;8RTuxa=at`WhZcQuHEOELw0S|PF0<#(`##tg+`;IH!B0B zob!0e2}cB1TVfcJBl`qlC7;U?p1YD=?f?>>7qqj zt?Gf=_ZU59X`0g76x~vHyJpUIC2O6wUeUL!{bd=JWIm{ERBF+p4&?ozA&MuE?6i?l z^e>b{eD5AJ2zM52kGAKrcP=rEy_~eabGfZ`qIR3j2lq%C55C*&hHBsKzYJM?DNQpb;yP!u@E& zW^|z&+tGtZun+rj5QmV5iy`;pGuj3rv#yfZypWt(RjZ63j z6S#~k_!+;+Uzn(%d52tAMqH)Ylw(V1rq6By&8KgS<|cvW<+m)Fzn?E5eCJ*D$z`in zx2@lpnSAbycUQ8dlFm%bQMsECqH2bk%xxROVBo&}vZxpX?W(KOHbA#9aTW9|Me=~nV>^sgt zoPqy21E}uIc4p{D%=8jYkSt04Fm>Lz-ngW~ga)p{`Jx;rI{Ako&6ANV9no<~NkZ*E R{}2%0|MC4Vkav3ze*>k57*YTL diff --git a/plugins/outputs/.DS_Store b/plugins/outputs/.DS_Store deleted file mode 100644 index 3985570b25bec9a5250b4dee6bb685d7ff8f3ebd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8196 zcmeHMU2GIp6u#fKz>J-;(}GZjVi#*k(Gt6bf*`{7N1#${Wm{T)WIMYv!i4Ee-I?9~ zQPMXx$|FjYXrfVL@WBVf_!E6FCWyut))-0j!I=1>(L`fneDU15(+X{Qk{A-^CUd_# z_q*rbd(M7$rf13+LwmtUGFHPFlj!28R#3G-;XJ?Ql~~wQP6)DR%x8I~vm7(Cqw_{b z9Z>|L2t*NxA`nF&iomT90eWZiBHiGtOBFVBO zSIUp4kGrF3U-t`MyPiMH?|OCrcs6YJxvo9i*0TpJb2Ota?{FPow;VGc7+Z!(Lyins zj^U2By9LMaf_c({2o}ny+T`Q|$=0Ss%jS(!O^M0mCVrD_PBu?X$Y48j9~FO{+?UdS=%di2xnvO%s6{>*4aBMluB(+*Pcg|?u=SpBtLpae9Lvb z17_YM%&2jXeDBJ+PH*1i{6NmN3&W0=QLA#goh#V7Z?@TX+L|z_FYe{?u5I_ap2ep* zIOv;WKIi69gNHmf?{@@~WmL7`nfpze%PN|kG^00Y(UROyg?xVL%G$LJ_ujv0>yD06 zT&=2BmMbd;ik4?(ZL@#WGW_EQbuVW+hUFYPMB#OO>j}%u%T;APa-X9Qo26>CDqgcn z(*_G!>&b%UkE`{PRI090<7>1!<%mH`?sE-pWk~9~a=_fJ)oF)0(~&o+HbR4STBFkE z(at1Q7X$B8HYj~3=y#S?PxLe^En2^=XH8pH4Y5wEvPByxim1x!UeUQt*{%)d+~Hx` zvL{!eNp$bj+O(s5w`=)$Au!DHBu!x(~&91IM@gO4IcF^#A344%bvID?n*3SPx)cpD$$BYccc za0#E|GD`Rw-{U9zj9K}bl9v}b! From 34d5f563b446c701e4806dfd3d218e27b1b60fd7 Mon Sep 17 00:00:00 2001 From: Wolfgang Beer Date: Tue, 14 Jul 2020 10:53:22 +0200 Subject: [PATCH 06/51] changed escaping of values to strconv.Quote --- plugins/outputs/dynatrace/dynatrace.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index d4c416b0becd5..5c4afec7d01be 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -100,8 +100,7 @@ func (d *Dynatrace) normalize(s string, max int) string { } func (d *Dynatrace) escape(v string) string { - vEs := strings.ReplaceAll(v, "\\", "\\\\") - return "\"" + vEs + "\"" + return strconv.Quote(v) } func (d *Dynatrace) Write(metrics []telegraf.Metric) error { From de0b09fea62df2db2d6be0d46bb259f11e1c0640 Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Mon, 20 Jul 2020 11:36:40 +0200 Subject: [PATCH 07/51] added oneagent support --- plugins/outputs/dynatrace/dynatrace.go | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index 5c4afec7d01be..12d6d5b9f1082 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -26,10 +26,13 @@ type Dynatrace struct { client *http.Client } +const oneAgentMetricsUrl = "http://127.0.0.1:14499/metrics/ingest" + const sampleConfig = ` ## Your Dynatrace environment URL. - ## For Dynatrace SaaS environments the URL scheme is "https://{your-environment-id}.live.dynatrace.com" - ## For Dynatrace Managed environments the URL scheme is "https://{your-domain}/e/{your-environment-id}" + ## For Dynatrace SaaS environments the URL scheme is "https://{your-environment-id}.live.dynatrace.com/api/v2/metrics/ingest" + ## For Dynatrace Managed environments the URL scheme is "https://{your-domain}/e/{your-environment-id}/api/v2/metrics/ingest" + ## For Dynatrace OneAgent the URL scheme is "http://127.0.0.1:14499/metrics/ingest" environmentURL = "" ## Your Dynatrace API token. @@ -41,10 +44,10 @@ const sampleConfig = ` // Connect Connects the Dynatrace output plugin to the Telegraf stream func (d *Dynatrace) Connect() error { if len(d.EnvironmentURL) == 0 { - d.Log.Errorf("Dynatrace environmentURL is a required field for Dynatrace output") - return fmt.Errorf("environmentURL is a required field for Dynatrace output") + d.Log.Infof("Dynatrace environmentURL is empty, defaulting to OneAgent URL") + d.EnvironmentURL = oneAgentMetricsUrl } - if len(d.EnvironmentAPIToken) == 0 { + if d.EnvironmentURL != oneAgentMetricsUrl && len(d.EnvironmentAPIToken) == 0 { d.Log.Errorf("Dynatrace environmentApiToken is a required field for Dynatrace output") return fmt.Errorf("environmentApiToken is a required field for Dynatrace output") } @@ -184,7 +187,7 @@ func (d *Dynatrace) send(msg []byte) error { } defer resp.Body.Close() // print metric line results as info log - if resp.StatusCode == http.StatusOK { + if resp.StatusCode == http.StatusOK || resp.StatusCode == http.StatusAccepted { bodyBytes, err := ioutil.ReadAll(resp.Body) if err != nil { d.Log.Errorf("Dynatrace error reading response") From 9c4f3d716eec487e0fa33bc03dfdf79a1d926f0a Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Tue, 21 Jul 2020 08:06:48 +0200 Subject: [PATCH 08/51] adapted handling for oneagent --- plugins/outputs/dynatrace/dynatrace.go | 13 +++++++++---- plugins/outputs/dynatrace/dynatrace_test.go | 12 ++++++++++-- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index 12d6d5b9f1082..405b7a3700e4d 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -32,7 +32,7 @@ const sampleConfig = ` ## Your Dynatrace environment URL. ## For Dynatrace SaaS environments the URL scheme is "https://{your-environment-id}.live.dynatrace.com/api/v2/metrics/ingest" ## For Dynatrace Managed environments the URL scheme is "https://{your-domain}/e/{your-environment-id}/api/v2/metrics/ingest" - ## For Dynatrace OneAgent the URL scheme is "http://127.0.0.1:14499/metrics/ingest" + ## For Dynatrace OneAgent the URL scheme is "http://127.0.0.1:14499/metrics/ingest" (default) environmentURL = "" ## Your Dynatrace API token. @@ -44,7 +44,7 @@ const sampleConfig = ` // Connect Connects the Dynatrace output plugin to the Telegraf stream func (d *Dynatrace) Connect() error { if len(d.EnvironmentURL) == 0 { - d.Log.Infof("Dynatrace environmentURL is empty, defaulting to OneAgent URL") + d.Log.Infof("Dynatrace environmentURL is empty, defaulting to OneAgent metrics interface") d.EnvironmentURL = oneAgentMetricsUrl } if d.EnvironmentURL != oneAgentMetricsUrl && len(d.EnvironmentAPIToken) == 0 { @@ -169,16 +169,20 @@ func (d *Dynatrace) Write(metrics []telegraf.Metric) error { func (d *Dynatrace) send(msg []byte) error { var err error - req, err := http.NewRequest("POST", d.EnvironmentURL+"/api/v2/metrics/ingest", bytes.NewBuffer(msg)) + req, err := http.NewRequest("POST", d.EnvironmentURL, bytes.NewBuffer(msg)) if err != nil { d.Log.Errorf("Dynatrace error: %s", err.Error()) return fmt.Errorf("Dynatrace error while creating HTTP request:, %s", err.Error()) } req.Header.Add("Content-Type", "text/plain; charset=UTF-8") - req.Header.Add("Authorization", "Api-Token "+d.EnvironmentAPIToken) + + if len(d.EnvironmentAPIToken) != 0 { + req.Header.Add("Authorization", "Api-Token "+d.EnvironmentAPIToken) + } // add user-agent header to identify metric source req.Header.Add("User-Agent", "telegraf") + fmt.Println(req) resp, err := d.client.Do(req) if err != nil { d.Log.Errorf("Dynatrace error: %s", err.Error()) @@ -186,6 +190,7 @@ func (d *Dynatrace) send(msg []byte) error { return fmt.Errorf("Dynatrace error while sending HTTP request:, %s", err.Error()) } defer resp.Body.Close() + // print metric line results as info log if resp.StatusCode == http.StatusOK || resp.StatusCode == http.StatusAccepted { bodyBytes, err := ioutil.ReadAll(resp.Body) diff --git a/plugins/outputs/dynatrace/dynatrace_test.go b/plugins/outputs/dynatrace/dynatrace_test.go index 5f8f9357603fa..4a848efb90d08 100644 --- a/plugins/outputs/dynatrace/dynatrace_test.go +++ b/plugins/outputs/dynatrace/dynatrace_test.go @@ -68,10 +68,18 @@ func TestMockURL(t *testing.T) { func TestMissingURL(t *testing.T) { d := &Dynatrace{} - d.EnvironmentAPIToken = "123" d.Log = testutil.Logger{} err := d.Connect() - require.Error(t, err) + require.Equal(t, oneAgentMetricsUrl, d.EnvironmentURL) + require.NoError(t, err) +} + +func TestMissingAPITokenMissingURL(t *testing.T) { + d := &Dynatrace{} + d.Log = testutil.Logger{} + err := d.Connect() + require.Equal(t, oneAgentMetricsUrl, d.EnvironmentURL) + require.NoError(t, err) } func TestMissingAPIToken(t *testing.T) { From 6ae80148c61c81dc208f9b713bbfa6c721254f1d Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Tue, 21 Jul 2020 08:09:49 +0200 Subject: [PATCH 09/51] removed debug output --- plugins/outputs/dynatrace/dynatrace.go | 1 - 1 file changed, 1 deletion(-) diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index 405b7a3700e4d..fe61eb30b8c2a 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -182,7 +182,6 @@ func (d *Dynatrace) send(msg []byte) error { // add user-agent header to identify metric source req.Header.Add("User-Agent", "telegraf") - fmt.Println(req) resp, err := d.client.Do(req) if err != nil { d.Log.Errorf("Dynatrace error: %s", err.Error()) From 02f560ceb606f4e89c003855131e3a857ddd5cf7 Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Fri, 24 Jul 2020 11:35:03 +0200 Subject: [PATCH 10/51] fixed normalization for fields, if they start with a digit --- plugins/outputs/dynatrace/dynatrace.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index fe61eb30b8c2a..047254956eda8 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -4,6 +4,8 @@ import ( "bytes" "crypto/tls" "fmt" + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/outputs" "io/ioutil" "math" "net/http" @@ -11,9 +13,7 @@ import ( "strconv" "strings" "time" - - "github.com/influxdata/telegraf" - "github.com/influxdata/telegraf/plugins/outputs" + "unicode" ) // Dynatrace Configuration for the Dynatrace output plugin @@ -99,6 +99,11 @@ func (d *Dynatrace) normalize(s string, max int) string { result = result[:len(result)-1] } } + + // append "generic" when it starts with a digit + if unicode.IsDigit(rune(s[0])) { + result = "generic_" + result + } return result } @@ -150,7 +155,7 @@ func (d *Dynatrace) Write(metrics []telegraf.Metric) error { } // metric name - metricID := metric.Name() + "." + k + metricID := metric.Name() + "." + (d.normalize(k, maxMetricKeyLen)) // write metric name combined with its field fmt.Fprintf(&buf, "%s", d.normalize(metricID, maxMetricKeyLen)) // add the tag string From fb56fcf3a9878f42086e19fa5a54d82e80ad8337 Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Thu, 30 Jul 2020 10:50:05 +0200 Subject: [PATCH 11/51] fixed normalization --- plugins/outputs/dynatrace/dynatrace.go | 83 ++++++++++++++------- plugins/outputs/dynatrace/dynatrace_test.go | 2 +- 2 files changed, 56 insertions(+), 29 deletions(-) diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index 047254956eda8..2f23d77c920f0 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -16,6 +16,16 @@ import ( "unicode" ) +const ( + oneAgentMetricsUrl = "http://127.0.0.1:14499/metrics/ingest" +) + +var ( + reNameAllowedCharList = regexp.MustCompile("[^A-Za-z0-9.]+") + maxDimKeyLen = 100 + maxMetricKeyLen = 250 +) + // Dynatrace Configuration for the Dynatrace output plugin type Dynatrace struct { EnvironmentURL string `toml:"environmentURL"` @@ -26,8 +36,6 @@ type Dynatrace struct { client *http.Client } -const oneAgentMetricsUrl = "http://127.0.0.1:14499/metrics/ingest" - const sampleConfig = ` ## Your Dynatrace environment URL. ## For Dynatrace SaaS environments the URL scheme is "https://{your-environment-id}.live.dynatrace.com/api/v2/metrics/ingest" @@ -64,6 +72,7 @@ func (d *Dynatrace) Connect() error { // Close Closes the Dynatrace output plugin func (d *Dynatrace) Close() error { + d.client = nil return nil } @@ -77,34 +86,40 @@ func (d *Dynatrace) Description() string { return "Send telegraf metrics to a Dynatrace environment" } -var reNameAllowedCharList = regexp.MustCompile("[^A-Za-z0-9.]+") - -const maxDimKeyLen = 100 -const maxMetricKeyLen = 250 - // Normalizes a metric keys or metric dimension identifiers // according to Dynatrace format. -func (d *Dynatrace) normalize(s string, max int) string { - result := reNameAllowedCharList.ReplaceAllString(s, "_") - // trunc to max size - if len(result) > max { - result = result[:max] - } - // remove trailing and ending '_' char - if len(result) > 1 { - if strings.HasPrefix(s, "_") { - result = result[1:] - } - if strings.HasSuffix(s, "_") { - result = result[:len(result)-1] +func (d *Dynatrace) normalize(s string, max int) (string, error) { + s = reNameAllowedCharList.ReplaceAllString(s, "_") + + // Strip Digits if they are at the beginning of the string + normalizedString := "" + firstChars := true + + for _, char := range s { + if firstChars && (unicode.IsDigit(char) || char == '_') { + continue + } else { + firstChars = false } + normalizedString += string(char) } - // append "generic" when it starts with a digit - if unicode.IsDigit(rune(s[0])) { - result = "generic_" + result + for strings.HasPrefix(normalizedString, "_") { + normalizedString = normalizedString[1:] } - return result + + if len(normalizedString) > max { + normalizedString = normalizedString[:max] + } + + for strings.HasSuffix(normalizedString, "_") { + normalizedString = normalizedString[:len(normalizedString)-1] + } + + if len(normalizedString) == 0 { + return "", fmt.Errorf("error normalizing the string: %s", s) + } + return normalizedString, nil } func (d *Dynatrace) escape(v string) string { @@ -123,13 +138,17 @@ func (d *Dynatrace) Write(metrics []telegraf.Metric) error { tagb.Reset() if len(metric.Tags()) > 0 { for tk, tv := range metric.Tags() { - fmt.Fprintf(&tagb, ",%s=%s", strings.ToLower(d.normalize(tk, maxDimKeyLen)), d.escape(tv)) + tagKey, err := d.normalize(tk, maxDimKeyLen) + if err != nil { + continue + } + fmt.Fprintf(&tagb, ",%s=%s", strings.ToLower(tagKey), d.escape(tv)) + } } if len(metric.Fields()) > 0 { for k, v := range metric.Fields() { var value string - // first check if value type is supported switch v := v.(type) { case string: continue @@ -155,9 +174,17 @@ func (d *Dynatrace) Write(metrics []telegraf.Metric) error { } // metric name - metricID := metric.Name() + "." + (d.normalize(k, maxMetricKeyLen)) + metricKey, err := d.normalize(k, maxMetricKeyLen) + if err != nil { + continue + } + + metricID, err := d.normalize(metric.Name()+"."+metricKey, maxMetricKeyLen) // write metric name combined with its field - fmt.Fprintf(&buf, "%s", d.normalize(metricID, maxMetricKeyLen)) + if err != nil { + continue + } + fmt.Fprintf(&buf, "%s", metricID) // add the tag string if len(tagb.String()) > 0 { fmt.Fprintf(&buf, "%s", tagb.String()) diff --git a/plugins/outputs/dynatrace/dynatrace_test.go b/plugins/outputs/dynatrace/dynatrace_test.go index 4a848efb90d08..048a4fb1c8ec4 100644 --- a/plugins/outputs/dynatrace/dynatrace_test.go +++ b/plugins/outputs/dynatrace/dynatrace_test.go @@ -1,4 +1,4 @@ -package dynatrace +package dynatrace_weg import ( "encoding/json" From 0e6a3471693b2089d15fe95632b8d12d0f97b4e9 Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Thu, 30 Jul 2020 10:56:19 +0200 Subject: [PATCH 12/51] changed package name --- plugins/outputs/dynatrace/dynatrace_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/outputs/dynatrace/dynatrace_test.go b/plugins/outputs/dynatrace/dynatrace_test.go index 048a4fb1c8ec4..4a848efb90d08 100644 --- a/plugins/outputs/dynatrace/dynatrace_test.go +++ b/plugins/outputs/dynatrace/dynatrace_test.go @@ -1,4 +1,4 @@ -package dynatrace_weg +package dynatrace import ( "encoding/json" From fc9508336643c765fd89f929998ec1562b21dc1b Mon Sep 17 00:00:00 2001 From: sepp Date: Fri, 7 Aug 2020 11:27:29 +0200 Subject: [PATCH 13/51] added readme information about direct OneAgent ingest endpoint --- plugins/outputs/dynatrace/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/outputs/dynatrace/README.md b/plugins/outputs/dynatrace/README.md index df3abeae5b334..52455342d6a4a 100644 --- a/plugins/outputs/dynatrace/README.md +++ b/plugins/outputs/dynatrace/README.md @@ -13,7 +13,7 @@ Metrics fields are added to the measurement name by using '.' in the metric name ```toml [[outputs.dynatrace]] - ## Dynatrace environment URL. + ## Dynatrace environment URL (e.g.: https://YOUR_DOMAIN/api/v2/metrics/ingest) or use the local ingest endpoint of your OneAgent monitored host (e.g.: http://127.0.0.1:14499/metrics/ingest). environmentURL = "" environmentApiToken = "" skipCertificateCheck = false From 5087a01e3397e29700cfd4420898f163b04841b8 Mon Sep 17 00:00:00 2001 From: wolfgangB33r Date: Fri, 7 Aug 2020 12:10:01 +0200 Subject: [PATCH 14/51] added a metric name prefix setting --- plugins/outputs/dynatrace/README.md | 3 +++ plugins/outputs/dynatrace/dynatrace.go | 17 ++++++++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/plugins/outputs/dynatrace/README.md b/plugins/outputs/dynatrace/README.md index 52455342d6a4a..203d572679956 100644 --- a/plugins/outputs/dynatrace/README.md +++ b/plugins/outputs/dynatrace/README.md @@ -16,6 +16,9 @@ Metrics fields are added to the measurement name by using '.' in the metric name ## Dynatrace environment URL (e.g.: https://YOUR_DOMAIN/api/v2/metrics/ingest) or use the local ingest endpoint of your OneAgent monitored host (e.g.: http://127.0.0.1:14499/metrics/ingest). environmentURL = "" environmentApiToken = "" + ## Optional prefix for metric names (e.g.: "telegraf.") + prefix = "telegraf." + ## Flag for skipping the tls certificate check, just for testing purposes, should be false by default skipCertificateCheck = false ``` \ No newline at end of file diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index 2f23d77c920f0..2a010687ca44e 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -28,10 +28,11 @@ var ( // Dynatrace Configuration for the Dynatrace output plugin type Dynatrace struct { - EnvironmentURL string `toml:"environmentURL"` - EnvironmentAPIToken string `toml:"environmentApiToken"` - SkipCertificateCheck bool `toml:"skipCertificateCheck"` - Log telegraf.Logger `toml:"log"` + EnvironmentURL string `toml:"environmentURL"` + EnvironmentAPIToken string `toml:"environmentApiToken"` + SkipCertificateCheck bool `toml:"skipCertificateCheck"` + Prefix string `toml:"prefix"` + Log telegraf.Logger `toml:"log"` client *http.Client } @@ -47,6 +48,12 @@ const sampleConfig = ` ## Create an API token within your Dynatrace environment, by navigating to Settings > Integration > Dynatrace API ## The API token needs data ingest scope permission. environmentApiToken = "" + + ## Optional prefix for metric names (e.g.: "telegraf.") + prefix = "telegraf." + + ## Optional flag for ignoring tls certificate check + skipCertificateCheck = false ` // Connect Connects the Dynatrace output plugin to the Telegraf stream @@ -179,7 +186,7 @@ func (d *Dynatrace) Write(metrics []telegraf.Metric) error { continue } - metricID, err := d.normalize(metric.Name()+"."+metricKey, maxMetricKeyLen) + metricID, err := d.normalize(d.Prefix + metric.Name() + "." + metricKey, maxMetricKeyLen) // write metric name combined with its field if err != nil { continue From 9e7dccf5d8d63c9f75e31edb7c3d98f1dcaf216f Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Mon, 10 Aug 2020 14:33:51 +0200 Subject: [PATCH 15/51] fmtted file --- plugins/outputs/dynatrace/dynatrace.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index 2a010687ca44e..384d443205ed5 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -28,11 +28,11 @@ var ( // Dynatrace Configuration for the Dynatrace output plugin type Dynatrace struct { - EnvironmentURL string `toml:"environmentURL"` - EnvironmentAPIToken string `toml:"environmentApiToken"` - SkipCertificateCheck bool `toml:"skipCertificateCheck"` - Prefix string `toml:"prefix"` - Log telegraf.Logger `toml:"log"` + EnvironmentURL string `toml:"environmentURL"` + EnvironmentAPIToken string `toml:"environmentApiToken"` + SkipCertificateCheck bool `toml:"skipCertificateCheck"` + Prefix string `toml:"prefix"` + Log telegraf.Logger `toml:"log"` client *http.Client } @@ -186,7 +186,7 @@ func (d *Dynatrace) Write(metrics []telegraf.Metric) error { continue } - metricID, err := d.normalize(d.Prefix + metric.Name() + "." + metricKey, maxMetricKeyLen) + metricID, err := d.normalize(d.Prefix+metric.Name()+"."+metricKey, maxMetricKeyLen) // write metric name combined with its field if err != nil { continue From 91e26d9230fde8b43bf93f6d8de992d5c33c100a Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Tue, 11 Aug 2020 11:01:06 +0200 Subject: [PATCH 16/51] changed doc --- plugins/outputs/dynatrace/dynatrace.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index 384d443205ed5..e4cf28fc2098b 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -38,15 +38,19 @@ type Dynatrace struct { } const sampleConfig = ` - ## Your Dynatrace environment URL. + ## For usage with the Dynatrace OneAgent you can omit any configuration, + ## the only requirement is that the OneAgent is running on the same host. + ## Only setup environment url and token if you want to monitor a Host without the OneAgent present. + ## + ## Your Dynatrace environment URL. + ## For Dynatrace OneAgent you can leave this empty or set it to "http://127.0.0.1:14499/metrics/ingest" (default) ## For Dynatrace SaaS environments the URL scheme is "https://{your-environment-id}.live.dynatrace.com/api/v2/metrics/ingest" ## For Dynatrace Managed environments the URL scheme is "https://{your-domain}/e/{your-environment-id}/api/v2/metrics/ingest" - ## For Dynatrace OneAgent the URL scheme is "http://127.0.0.1:14499/metrics/ingest" (default) environmentURL = "" ## Your Dynatrace API token. ## Create an API token within your Dynatrace environment, by navigating to Settings > Integration > Dynatrace API - ## The API token needs data ingest scope permission. + ## The API token needs data ingest scope permission. When using OneAgent, no API token is required. environmentApiToken = "" ## Optional prefix for metric names (e.g.: "telegraf.") From b783f3a8c03fe91e2f67f647a891ef6fc3dabcc1 Mon Sep 17 00:00:00 2001 From: Thomas Schuetz <38893055+thschue@users.noreply.github.com> Date: Wed, 12 Aug 2020 07:06:53 +0200 Subject: [PATCH 17/51] Update plugins/outputs/dynatrace/dynatrace.go Co-authored-by: Steven Soroka --- plugins/outputs/dynatrace/dynatrace.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index e4cf28fc2098b..88853b050c675 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -28,7 +28,7 @@ var ( // Dynatrace Configuration for the Dynatrace output plugin type Dynatrace struct { - EnvironmentURL string `toml:"environmentURL"` + EnvironmentURL string `toml:"environment_url"` EnvironmentAPIToken string `toml:"environmentApiToken"` SkipCertificateCheck bool `toml:"skipCertificateCheck"` Prefix string `toml:"prefix"` From 38a05fdb1cfbc1f171ca99bfe02e5c97032cc8a1 Mon Sep 17 00:00:00 2001 From: Thomas Schuetz <38893055+thschue@users.noreply.github.com> Date: Wed, 12 Aug 2020 07:09:14 +0200 Subject: [PATCH 18/51] Apply suggestions from code review Co-authored-by: Steven Soroka --- plugins/outputs/dynatrace/dynatrace.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index 88853b050c675..93d2100d7289d 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -30,9 +30,9 @@ var ( type Dynatrace struct { EnvironmentURL string `toml:"environment_url"` EnvironmentAPIToken string `toml:"environmentApiToken"` - SkipCertificateCheck bool `toml:"skipCertificateCheck"` + InsecureSkipVerify bool `toml:"insecure_skip_verify"` Prefix string `toml:"prefix"` - Log telegraf.Logger `toml:"log"` + Log telegraf.Logger `toml:"-"` client *http.Client } From a273e626ea2fcc215ed430eb9d5d2c22788b2b0e Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Wed, 12 Aug 2020 07:52:00 +0200 Subject: [PATCH 19/51] added requested changes --- plugins/outputs/dynatrace/dynatrace.go | 84 +++++++++++++------------- 1 file changed, 41 insertions(+), 43 deletions(-) diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index 93d2100d7289d..2ec655a813653 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -5,6 +5,7 @@ import ( "crypto/tls" "fmt" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/plugins/outputs" "io/ioutil" "math" @@ -13,7 +14,6 @@ import ( "strconv" "strings" "time" - "unicode" ) const ( @@ -28,11 +28,12 @@ var ( // Dynatrace Configuration for the Dynatrace output plugin type Dynatrace struct { - EnvironmentURL string `toml:"environment_url"` - EnvironmentAPIToken string `toml:"environmentApiToken"` + URL string `toml:"url"` + APIToken string `toml:"api_token"` InsecureSkipVerify bool `toml:"insecure_skip_verify"` - Prefix string `toml:"prefix"` - Log telegraf.Logger `toml:"-"` + Prefix string `toml:"prefix"` + Log telegraf.Logger `toml:"-"` + Timeout internal.Duration `toml:"timeout"` client *http.Client } @@ -46,38 +47,25 @@ const sampleConfig = ` ## For Dynatrace OneAgent you can leave this empty or set it to "http://127.0.0.1:14499/metrics/ingest" (default) ## For Dynatrace SaaS environments the URL scheme is "https://{your-environment-id}.live.dynatrace.com/api/v2/metrics/ingest" ## For Dynatrace Managed environments the URL scheme is "https://{your-domain}/e/{your-environment-id}/api/v2/metrics/ingest" - environmentURL = "" + url = "" ## Your Dynatrace API token. ## Create an API token within your Dynatrace environment, by navigating to Settings > Integration > Dynatrace API ## The API token needs data ingest scope permission. When using OneAgent, no API token is required. - environmentApiToken = "" + api_token = "" ## Optional prefix for metric names (e.g.: "telegraf.") prefix = "telegraf." ## Optional flag for ignoring tls certificate check - skipCertificateCheck = false + insecure_skip_verify = false + + ## Connection timeout, defaults to "5s" if not set. + timeout = "5s" ` // Connect Connects the Dynatrace output plugin to the Telegraf stream func (d *Dynatrace) Connect() error { - if len(d.EnvironmentURL) == 0 { - d.Log.Infof("Dynatrace environmentURL is empty, defaulting to OneAgent metrics interface") - d.EnvironmentURL = oneAgentMetricsUrl - } - if d.EnvironmentURL != oneAgentMetricsUrl && len(d.EnvironmentAPIToken) == 0 { - d.Log.Errorf("Dynatrace environmentApiToken is a required field for Dynatrace output") - return fmt.Errorf("environmentApiToken is a required field for Dynatrace output") - } - - d.client = &http.Client{ - Transport: &http.Transport{ - Proxy: http.ProxyFromEnvironment, - TLSClientConfig: &tls.Config{InsecureSkipVerify: d.SkipCertificateCheck}, - }, - Timeout: 5 * time.Second, - } return nil } @@ -102,18 +90,8 @@ func (d *Dynatrace) Description() string { func (d *Dynatrace) normalize(s string, max int) (string, error) { s = reNameAllowedCharList.ReplaceAllString(s, "_") - // Strip Digits if they are at the beginning of the string - normalizedString := "" - firstChars := true - - for _, char := range s { - if firstChars && (unicode.IsDigit(char) || char == '_') { - continue - } else { - firstChars = false - } - normalizedString += string(char) - } + // Strip Digits and underscores if they are at the beginning of the string + normalizedString := strings.TrimLeft(s, "_0123456789") for strings.HasPrefix(normalizedString, "_") { normalizedString = normalizedString[1:] @@ -197,9 +175,7 @@ func (d *Dynatrace) Write(metrics []telegraf.Metric) error { } fmt.Fprintf(&buf, "%s", metricID) // add the tag string - if len(tagb.String()) > 0 { - fmt.Fprintf(&buf, "%s", tagb.String()) - } + fmt.Fprintf(&buf, "%s", tagb.String()) // write measured value fmt.Fprintf(&buf, " %v\n", value) @@ -212,15 +188,15 @@ func (d *Dynatrace) Write(metrics []telegraf.Metric) error { func (d *Dynatrace) send(msg []byte) error { var err error - req, err := http.NewRequest("POST", d.EnvironmentURL, bytes.NewBuffer(msg)) + req, err := http.NewRequest("POST", d.URL, bytes.NewBuffer(msg)) if err != nil { d.Log.Errorf("Dynatrace error: %s", err.Error()) return fmt.Errorf("Dynatrace error while creating HTTP request:, %s", err.Error()) } req.Header.Add("Content-Type", "text/plain; charset=UTF-8") - if len(d.EnvironmentAPIToken) != 0 { - req.Header.Add("Authorization", "Api-Token "+d.EnvironmentAPIToken) + if len(d.APIToken) != 0 { + req.Header.Add("Authorization", "Api-Token "+d.APIToken) } // add user-agent header to identify metric source req.Header.Add("User-Agent", "telegraf") @@ -248,8 +224,30 @@ func (d *Dynatrace) send(msg []byte) error { return nil } +func (d *Dynatrace) Init() error { + if len(d.URL) == 0 { + d.Log.Infof("Dynatrace URL is empty, defaulting to OneAgent metrics interface") + d.URL = oneAgentMetricsUrl + } + if d.URL != oneAgentMetricsUrl && len(d.APIToken) == 0 { + d.Log.Errorf("Dynatrace api_token is a required field for Dynatrace output") + return fmt.Errorf("api_token is a required field for Dynatrace output") + } + + d.client = &http.Client{ + Transport: &http.Transport{ + Proxy: http.ProxyFromEnvironment, + TLSClientConfig: &tls.Config{InsecureSkipVerify: d.InsecureSkipVerify}, + }, + Timeout: d.Timeout.Duration, + } + return nil +} + func init() { outputs.Add("dynatrace", func() telegraf.Output { - return &Dynatrace{} + return &Dynatrace{ + Timeout: internal.Duration{Duration: time.Second * 5}, + } }) } From de2728f243d70f1e4c79bab224fca6d6f320c613 Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Wed, 12 Aug 2020 07:54:28 +0200 Subject: [PATCH 20/51] changed log output --- plugins/outputs/dynatrace/dynatrace.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index 2ec655a813653..7c6cb6d94078c 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -158,7 +158,7 @@ func (d *Dynatrace) Write(metrics []telegraf.Metric) error { value = "0" } default: - d.Log.Infof("Dynatrace type not supported! %s", v) + d.Log.Debugf("Dynatrace type not supported! %s", v) continue } @@ -216,7 +216,7 @@ func (d *Dynatrace) send(msg []byte) error { d.Log.Errorf("Dynatrace error reading response") } bodyString := string(bodyBytes) - d.Log.Infof("Dynatrace returned: %s", bodyString) + d.Log.Debugf("Dynatrace returned: %s", bodyString) } else { return fmt.Errorf("Dynatrace request failed with response code:, %d", resp.StatusCode) } From 72b0bc03551f1d34a6e0471cc34d2c69262db6da Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Wed, 12 Aug 2020 08:06:51 +0200 Subject: [PATCH 21/51] changed tls configuration --- plugins/outputs/dynatrace/dynatrace.go | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index 7c6cb6d94078c..b1d067b115dfc 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -2,11 +2,11 @@ package dynatrace import ( "bytes" - "crypto/tls" "fmt" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/plugins/outputs" + "github.com/influxdata/telegraf/plugins/common/tls" "io/ioutil" "math" "net/http" @@ -35,6 +35,8 @@ type Dynatrace struct { Log telegraf.Logger `toml:"-"` Timeout internal.Duration `toml:"timeout"` + tls.ClientConfig + client *http.Client } @@ -56,9 +58,15 @@ const sampleConfig = ` ## Optional prefix for metric names (e.g.: "telegraf.") prefix = "telegraf." - + + ## Optional TLS Config + # tls_ca = "/etc/telegraf/ca.pem" + # tls_cert = "/etc/telegraf/cert.pem" + # tls_key = "/etc/telegraf/key.pem" + ## Optional flag for ignoring tls certificate check - insecure_skip_verify = false + # insecure_skip_verify = false + ## Connection timeout, defaults to "5s" if not set. timeout = "5s" @@ -234,10 +242,15 @@ func (d *Dynatrace) Init() error { return fmt.Errorf("api_token is a required field for Dynatrace output") } + tlsCfg, err := d.ClientConfig.TLSConfig() + if err != nil { + return err + } + d.client = &http.Client{ Transport: &http.Transport{ Proxy: http.ProxyFromEnvironment, - TLSClientConfig: &tls.Config{InsecureSkipVerify: d.InsecureSkipVerify}, + TLSClientConfig: tlsCfg, }, Timeout: d.Timeout.Duration, } From f75f25471aeb66426ffa0a4fa4bbd4b57e4f59cd Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Wed, 12 Aug 2020 08:29:49 +0200 Subject: [PATCH 22/51] adapted tests --- plugins/outputs/dynatrace/dynatrace_test.go | 34 ++++++++++----------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/plugins/outputs/dynatrace/dynatrace_test.go b/plugins/outputs/dynatrace/dynatrace_test.go index 4a848efb90d08..682b37b51f8bf 100644 --- a/plugins/outputs/dynatrace/dynatrace_test.go +++ b/plugins/outputs/dynatrace/dynatrace_test.go @@ -20,8 +20,8 @@ func TestNilMetrics(t *testing.T) { defer ts.Close() d := &Dynatrace{} - d.EnvironmentURL = ts.URL - d.EnvironmentAPIToken = "123" + d.URL = ts.URL + d.APIToken = "123" d.Log = testutil.Logger{} err := d.Connect() require.NoError(t, err) @@ -38,8 +38,8 @@ func TestEmptyMetricsSlice(t *testing.T) { defer ts.Close() d := &Dynatrace{} - d.EnvironmentURL = ts.URL - d.EnvironmentAPIToken = "123" + d.URL = ts.URL + d.APIToken = "123" d.Log = testutil.Logger{} err := d.Connect() require.NoError(t, err) @@ -56,8 +56,8 @@ func TestMockURL(t *testing.T) { defer ts.Close() d := &Dynatrace{} - d.EnvironmentURL = ts.URL - d.EnvironmentAPIToken = "123" + d.URL = ts.URL + d.APIToken = "123" d.Log = testutil.Logger{} err := d.Connect() require.NoError(t, err) @@ -70,7 +70,7 @@ func TestMissingURL(t *testing.T) { d := &Dynatrace{} d.Log = testutil.Logger{} err := d.Connect() - require.Equal(t, oneAgentMetricsUrl, d.EnvironmentURL) + require.Equal(t, oneAgentMetricsUrl, d.URL) require.NoError(t, err) } @@ -78,13 +78,13 @@ func TestMissingAPITokenMissingURL(t *testing.T) { d := &Dynatrace{} d.Log = testutil.Logger{} err := d.Connect() - require.Equal(t, oneAgentMetricsUrl, d.EnvironmentURL) + require.Equal(t, oneAgentMetricsUrl, d.URL) require.NoError(t, err) } func TestMissingAPIToken(t *testing.T) { d := &Dynatrace{} - d.EnvironmentURL = "test" + d.URL = "test" d.Log = testutil.Logger{} err := d.Connect() require.Error(t, err) @@ -98,8 +98,8 @@ func TestSendMetric(t *testing.T) { defer ts.Close() d := &Dynatrace{} - d.EnvironmentURL = ts.URL - d.EnvironmentAPIToken = "123" + d.URL = ts.URL + d.APIToken = "123" d.Log = testutil.Logger{} err := d.Connect() require.NoError(t, err) @@ -134,8 +134,8 @@ func TestSendSingleMetric(t *testing.T) { defer ts.Close() d := &Dynatrace{} - d.EnvironmentURL = ts.URL - d.EnvironmentAPIToken = "123" + d.URL = ts.URL + d.APIToken = "123" d.Log = testutil.Logger{} err := d.Connect() require.NoError(t, err) @@ -163,8 +163,8 @@ func TestSendMetricWithoutTags(t *testing.T) { defer ts.Close() d := &Dynatrace{} - d.EnvironmentURL = ts.URL - d.EnvironmentAPIToken = "123" + d.URL = ts.URL + d.APIToken = "123" d.Log = testutil.Logger{} err := d.Connect() require.NoError(t, err) @@ -192,8 +192,8 @@ func TestSendBooleanMetricWithoutTags(t *testing.T) { defer ts.Close() d := &Dynatrace{} - d.EnvironmentURL = ts.URL - d.EnvironmentAPIToken = "123" + d.URL = ts.URL + d.APIToken = "123" d.Log = testutil.Logger{} err := d.Connect() require.NoError(t, err) From 72c2418710bc062bd2897fe6fab4de57a3182457 Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Wed, 12 Aug 2020 08:38:47 +0200 Subject: [PATCH 23/51] fmtted file --- plugins/outputs/dynatrace/dynatrace.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index b1d067b115dfc..0eba4d825bf0a 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -5,8 +5,8 @@ import ( "fmt" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/internal" - "github.com/influxdata/telegraf/plugins/outputs" "github.com/influxdata/telegraf/plugins/common/tls" + "github.com/influxdata/telegraf/plugins/outputs" "io/ioutil" "math" "net/http" @@ -28,11 +28,11 @@ var ( // Dynatrace Configuration for the Dynatrace output plugin type Dynatrace struct { - URL string `toml:"url"` - APIToken string `toml:"api_token"` - InsecureSkipVerify bool `toml:"insecure_skip_verify"` - Prefix string `toml:"prefix"` - Log telegraf.Logger `toml:"-"` + URL string `toml:"url"` + APIToken string `toml:"api_token"` + InsecureSkipVerify bool `toml:"insecure_skip_verify"` + Prefix string `toml:"prefix"` + Log telegraf.Logger `toml:"-"` Timeout internal.Duration `toml:"timeout"` tls.ClientConfig From 869cda447270869d2a49bd803d47e5527e259d36 Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Wed, 12 Aug 2020 09:30:25 +0200 Subject: [PATCH 24/51] fixed tests --- plugins/outputs/dynatrace/dynatrace_test.go | 58 ++++++++++++++++----- 1 file changed, 46 insertions(+), 12 deletions(-) diff --git a/plugins/outputs/dynatrace/dynatrace_test.go b/plugins/outputs/dynatrace/dynatrace_test.go index 682b37b51f8bf..898ac4e33d072 100644 --- a/plugins/outputs/dynatrace/dynatrace_test.go +++ b/plugins/outputs/dynatrace/dynatrace_test.go @@ -3,6 +3,7 @@ package dynatrace import ( "encoding/json" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/metric" "github.com/influxdata/telegraf/testutil" "github.com/stretchr/testify/require" @@ -19,11 +20,17 @@ func TestNilMetrics(t *testing.T) { })) defer ts.Close() - d := &Dynatrace{} + d := &Dynatrace{ + Timeout: internal.Duration{Duration: time.Second * 5}, + } + d.URL = ts.URL d.APIToken = "123" d.Log = testutil.Logger{} - err := d.Connect() + err := d.Init() + require.NoError(t, err) + + err = d.Connect() require.NoError(t, err) err = d.Write(nil) @@ -38,10 +45,15 @@ func TestEmptyMetricsSlice(t *testing.T) { defer ts.Close() d := &Dynatrace{} + d.URL = ts.URL d.APIToken = "123" d.Log = testutil.Logger{} - err := d.Connect() + + err := d.Init() + require.NoError(t, err) + + err = d.Connect() require.NoError(t, err) empty := []telegraf.Metric{} err = d.Write(empty) @@ -56,37 +68,47 @@ func TestMockURL(t *testing.T) { defer ts.Close() d := &Dynatrace{} + d.URL = ts.URL d.APIToken = "123" d.Log = testutil.Logger{} - err := d.Connect() - require.NoError(t, err) + err := d.Init() + require.NoError(t, err) + err = d.Connect() + require.NoError(t, err) err = d.Write(testutil.MockMetrics()) require.NoError(t, err) } func TestMissingURL(t *testing.T) { d := &Dynatrace{} + d.Log = testutil.Logger{} - err := d.Connect() + err := d.Init() + require.Equal(t, oneAgentMetricsUrl, d.URL) + err = d.Connect() require.Equal(t, oneAgentMetricsUrl, d.URL) require.NoError(t, err) } func TestMissingAPITokenMissingURL(t *testing.T) { d := &Dynatrace{} + d.Log = testutil.Logger{} - err := d.Connect() + err := d.Init() + require.Equal(t, oneAgentMetricsUrl, d.URL) + err = d.Connect() require.Equal(t, oneAgentMetricsUrl, d.URL) require.NoError(t, err) } func TestMissingAPIToken(t *testing.T) { d := &Dynatrace{} + d.URL = "test" d.Log = testutil.Logger{} - err := d.Connect() + err := d.Init() require.Error(t, err) } @@ -98,10 +120,13 @@ func TestSendMetric(t *testing.T) { defer ts.Close() d := &Dynatrace{} + d.URL = ts.URL d.APIToken = "123" d.Log = testutil.Logger{} - err := d.Connect() + err := d.Init() + require.NoError(t, err) + err = d.Connect() require.NoError(t, err) // Init metrics @@ -134,10 +159,13 @@ func TestSendSingleMetric(t *testing.T) { defer ts.Close() d := &Dynatrace{} + d.URL = ts.URL d.APIToken = "123" d.Log = testutil.Logger{} - err := d.Connect() + err := d.Init() + require.NoError(t, err) + err = d.Connect() require.NoError(t, err) // Init metrics @@ -163,10 +191,13 @@ func TestSendMetricWithoutTags(t *testing.T) { defer ts.Close() d := &Dynatrace{} + d.URL = ts.URL d.APIToken = "123" d.Log = testutil.Logger{} - err := d.Connect() + err := d.Init() + require.NoError(t, err) + err = d.Connect() require.NoError(t, err) // Init metrics @@ -192,10 +223,13 @@ func TestSendBooleanMetricWithoutTags(t *testing.T) { defer ts.Close() d := &Dynatrace{} + d.URL = ts.URL d.APIToken = "123" d.Log = testutil.Logger{} - err := d.Connect() + err := d.Init() + require.NoError(t, err) + err = d.Connect() require.NoError(t, err) // Init metrics From 9f1609976a0521c8d5a682bdd15a5baf8f8d4d16 Mon Sep 17 00:00:00 2001 From: wolfgangB33r Date: Wed, 12 Aug 2020 16:27:43 +0200 Subject: [PATCH 25/51] fixed tag order and added encoding checks in all tests --- plugins/outputs/dynatrace/dynatrace.go | 14 ++- plugins/outputs/dynatrace/dynatrace_test.go | 94 +++++++++++++++++++-- 2 files changed, 100 insertions(+), 8 deletions(-) diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index 0eba4d825bf0a..a8eee87e8620b 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -14,6 +14,7 @@ import ( "strconv" "strings" "time" + "sort" ) const ( @@ -134,12 +135,19 @@ func (d *Dynatrace) Write(metrics []telegraf.Metric) error { // first write the tags into a buffer tagb.Reset() if len(metric.Tags()) > 0 { - for tk, tv := range metric.Tags() { - tagKey, err := d.normalize(tk, maxDimKeyLen) + keys := make([]string, 0, len(metric.Tags())) + for k := range metric.Tags() { + keys = append(keys, k) + } + // sort tag keys to expect the same order in ech run + sort.Strings(keys) + + for _, k := range keys { + tagKey, err := d.normalize(k, maxDimKeyLen) if err != nil { continue } - fmt.Fprintf(&tagb, ",%s=%s", strings.ToLower(tagKey), d.escape(tv)) + fmt.Fprintf(&tagb, ",%s=%s", strings.ToLower(tagKey), d.escape(metric.Tags()[k])) } } diff --git a/plugins/outputs/dynatrace/dynatrace_test.go b/plugins/outputs/dynatrace/dynatrace_test.go index 898ac4e33d072..2af472e233bf9 100644 --- a/plugins/outputs/dynatrace/dynatrace_test.go +++ b/plugins/outputs/dynatrace/dynatrace_test.go @@ -11,6 +11,7 @@ import ( "net/http/httptest" "testing" "time" + "io/ioutil" ) func TestNilMetrics(t *testing.T) { @@ -114,6 +115,16 @@ func TestMissingAPIToken(t *testing.T) { func TestSendMetric(t *testing.T) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // check the encoded result + bodyBytes, err := ioutil.ReadAll(r.Body) + if err != nil { + require.NoError(t, err) + } + bodyString := string(bodyBytes) + expected := "mymeasurement.myfield,host=\"192.168.0.1\",nix=\"nix\" 3.140000\nmymeasurement.value,host=\"192.168.0.1\" 3.140000\n" + if bodyString != expected { + t.Errorf("Metric encoding failed. expected: %s but got: %s", expected, bodyString) + } w.WriteHeader(http.StatusOK) json.NewEncoder(w).Encode(`{"linesOk":10,"linesInvalid":0,"error":null}`) })) @@ -151,10 +162,20 @@ func TestSendMetric(t *testing.T) { require.NoError(t, err) } -func TestSendSingleMetric(t *testing.T) { +func TestSendSingleMetricWithUnorderedTags(t *testing.T) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // check the encoded result + bodyBytes, err := ioutil.ReadAll(r.Body) + if err != nil { + require.NoError(t, err) + } + bodyString := string(bodyBytes) + expected := "mymeasurement.myfield,a=\"test\",b=\"test\",c=\"test\" 3.140000\n" + if bodyString != expected { + t.Errorf("Metric encoding failed. expected: %s but got: %s", expected, bodyString) + } w.WriteHeader(http.StatusOK) - json.NewEncoder(w).Encode(`{"linesOk":10,"linesInvalid":0,"error":null}`) + json.NewEncoder(w).Encode(`{"linesOk":1,"linesInvalid":0,"error":null}`) })) defer ts.Close() @@ -172,7 +193,7 @@ func TestSendSingleMetric(t *testing.T) { m1, _ := metric.New( "mymeasurement", - map[string]string{"host": "192.168.0.1", "nix": "nix"}, + map[string]string{"a": "test", "c": "test", "b": "test"}, map[string]interface{}{"myfield": float64(3.14)}, time.Date(2010, time.November, 10, 23, 0, 0, 0, time.UTC), ) @@ -186,7 +207,17 @@ func TestSendSingleMetric(t *testing.T) { func TestSendMetricWithoutTags(t *testing.T) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) - json.NewEncoder(w).Encode(`{"linesOk":10,"linesInvalid":0,"error":null}`) + // check the encoded result + bodyBytes, err := ioutil.ReadAll(r.Body) + if err != nil { + require.NoError(t, err) + } + bodyString := string(bodyBytes) + expected := "mymeasurement.myfield 3.140000\n" + if bodyString != expected { + t.Errorf("Metric encoding failed. expected: %s but got: %s", expected, bodyString) + } + json.NewEncoder(w).Encode(`{"linesOk":1,"linesInvalid":0,"error":null}`) })) defer ts.Close() @@ -215,10 +246,63 @@ func TestSendMetricWithoutTags(t *testing.T) { require.NoError(t, err) } + +func TestSendMetricWithUpperCaseTagKeys(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + // check the encoded result + bodyBytes, err := ioutil.ReadAll(r.Body) + if err != nil { + require.NoError(t, err) + } + bodyString := string(bodyBytes) + expected := "mymeasurement.myfield,aaa=\"test\",b_b=\"test\",ccc=\"test\" 3.140000\n" + if bodyString != expected { + t.Errorf("Metric encoding failed. expected: %s but got: %s", expected, bodyString) + } + json.NewEncoder(w).Encode(`{"linesOk":1,"linesInvalid":0,"error":null}`) + })) + defer ts.Close() + + d := &Dynatrace{} + + d.URL = ts.URL + d.APIToken = "123" + d.Log = testutil.Logger{} + err := d.Init() + require.NoError(t, err) + err = d.Connect() + require.NoError(t, err) + + // Init metrics + + m1, _ := metric.New( + "mymeasurement", + map[string]string{"AAA": "test", "CcC": "test", "B B": "test"}, + map[string]interface{}{"myfield": float64(3.14)}, + time.Date(2010, time.November, 10, 23, 0, 0, 0, time.UTC), + ) + + metrics := []telegraf.Metric{m1} + + err = d.Write(metrics) + require.NoError(t, err) +} + func TestSendBooleanMetricWithoutTags(t *testing.T) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) - json.NewEncoder(w).Encode(`{"linesOk":10,"linesInvalid":0,"error":null}`) + // check the encoded result + bodyBytes, err := ioutil.ReadAll(r.Body) + if err != nil { + require.NoError(t, err) + } + bodyString := string(bodyBytes) + expected := "mymeasurement.myfield 1\n" + if bodyString != expected { + t.Errorf("Metric encoding failed. expected: %s but got: %s", expected, bodyString) + } + json.NewEncoder(w).Encode(`{"linesOk":1,"linesInvalid":0,"error":null}`) })) defer ts.Close() From 2167b548c648bb9bbe6438f16fdab49ca211cb31 Mon Sep 17 00:00:00 2001 From: Wolfgang Beer Date: Wed, 12 Aug 2020 17:02:51 +0200 Subject: [PATCH 26/51] changed formatting --- plugins/outputs/dynatrace/dynatrace.go | 2 +- plugins/outputs/dynatrace/dynatrace_test.go | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index a8eee87e8620b..4796d1d8fc444 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -11,10 +11,10 @@ import ( "math" "net/http" "regexp" + "sort" "strconv" "strings" "time" - "sort" ) const ( diff --git a/plugins/outputs/dynatrace/dynatrace_test.go b/plugins/outputs/dynatrace/dynatrace_test.go index 2af472e233bf9..cf6549c72ff11 100644 --- a/plugins/outputs/dynatrace/dynatrace_test.go +++ b/plugins/outputs/dynatrace/dynatrace_test.go @@ -7,11 +7,11 @@ import ( "github.com/influxdata/telegraf/metric" "github.com/influxdata/telegraf/testutil" "github.com/stretchr/testify/require" + "io/ioutil" "net/http" "net/http/httptest" "testing" "time" - "io/ioutil" ) func TestNilMetrics(t *testing.T) { @@ -119,7 +119,7 @@ func TestSendMetric(t *testing.T) { bodyBytes, err := ioutil.ReadAll(r.Body) if err != nil { require.NoError(t, err) - } + } bodyString := string(bodyBytes) expected := "mymeasurement.myfield,host=\"192.168.0.1\",nix=\"nix\" 3.140000\nmymeasurement.value,host=\"192.168.0.1\" 3.140000\n" if bodyString != expected { @@ -246,7 +246,6 @@ func TestSendMetricWithoutTags(t *testing.T) { require.NoError(t, err) } - func TestSendMetricWithUpperCaseTagKeys(t *testing.T) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) From 09a8523fecd6da32c01008ce6ac5fdc1c236a525 Mon Sep 17 00:00:00 2001 From: Michael Kopp Date: Mon, 17 Aug 2020 08:58:42 +0200 Subject: [PATCH 27/51] Update README.md Add dynatrace output plugin --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 715a232345620..8bfb9fed6d024 100644 --- a/README.md +++ b/README.md @@ -410,6 +410,7 @@ For documentation on the latest development code see the [documentation index][d * [cratedb](./plugins/outputs/cratedb) * [datadog](./plugins/outputs/datadog) * [discard](./plugins/outputs/discard) +* [dynatrace](./plugins/outputs/dynatrace) * [elasticsearch](./plugins/outputs/elasticsearch) * [exec](./plugins/outputs/exec) * [execd](./plugins/outputs/execd) From fb6c59ad6eecfec4e2bc0492bef0bda65d4305a0 Mon Sep 17 00:00:00 2001 From: Michael Kopp Date: Mon, 17 Aug 2020 09:37:47 +0200 Subject: [PATCH 28/51] Update README.md --- plugins/outputs/dynatrace/README.md | 31 +++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/plugins/outputs/dynatrace/README.md b/plugins/outputs/dynatrace/README.md index 203d572679956..987c110abd381 100644 --- a/plugins/outputs/dynatrace/README.md +++ b/plugins/outputs/dynatrace/README.md @@ -1,19 +1,31 @@ # Dynatrace Output Plugin -This plugin writes telegraf metrics to a Dynatrace environment. +This plugin is sending telegraf metrics to [Dynatrace](www.dynatrace.com). It has two operational modes. -An API token is necessary, which can be obtained in your Dynatrace environment. Navigate to **Dynatrace > Settings > Integration > Dynatrace API** and create a new token with -'Data ingest' access scope enabled. +## Running alongside Dynatrace OneAgent -Telegraf measurements which can't be converted to a float64 are skipped. +if you run the Telegraf agent on a host or VM that is monitored by the Dynatrace OneAgent then you only need to enable the plugin but need no further configuration. The Dynatrace telegraf output plugin will send all metrics to the OneAgent which will use its secure and load balanced connection to send the metrics to your Dynatrace SaaS or Managed environment. + +## Running standlone + +If you run the Telegraf agent on a host or VM without a OneAgent you will need to configure the environment API endpoint to send the metrics to and a an API token for security. + +The endpoint for the Dynatrace Metrics API is -Metrics fields are added to the measurement name by using '.' in the metric name. +* Managed https://{your-domain}/e/{your-environment-id}/api/v2/metrics/ingest +* SaaS https://{your-environment-id}.live.dynatrace.com/api/v2/metrics/ingest -### Configuration +You can learn more about how to use the Dynatrace API [here](https://www.dynatrace.com/support/help/dynatrace-api/) + +You will also need to configure an API token for secure access. Find out how to create a token [here](https://www.dynatrace.com/support/help/dynatrace-api/environment-api/tokens/) or simply navigate to **Settings > Integration > Dynatrace API** in your Dynatrace environment and create a token with Dynatrace API** and create a new token with +'Data ingest' access scope enabled. + +## Configuration ```toml [[outputs.dynatrace]] - ## Dynatrace environment URL (e.g.: https://YOUR_DOMAIN/api/v2/metrics/ingest) or use the local ingest endpoint of your OneAgent monitored host (e.g.: http://127.0.0.1:14499/metrics/ingest). + ## Leave empty or use the local ingest endpoint of your OneAgent monitored host (e.g.: http://127.0.0.1:14499/metrics/ingest). + ## Set Dynatrace environment URL (e.g.: https://YOUR_DOMAIN/api/v2/metrics/ingest) if you do not use a OneAgent environmentURL = "" environmentApiToken = "" ## Optional prefix for metric names (e.g.: "telegraf.") @@ -21,4 +33,7 @@ Metrics fields are added to the measurement name by using '.' in the metric name ## Flag for skipping the tls certificate check, just for testing purposes, should be false by default skipCertificateCheck = false -``` \ No newline at end of file +``` + +## Limitations +Telegraf measurements which can't be converted to a float64 are skipped. From 1f9fe1670c2b3df0ade7667f9eb203f7524989ec Mon Sep 17 00:00:00 2001 From: Michael Kopp Date: Mon, 17 Aug 2020 09:45:28 +0200 Subject: [PATCH 29/51] Update README.md add version requirement. --- plugins/outputs/dynatrace/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/plugins/outputs/dynatrace/README.md b/plugins/outputs/dynatrace/README.md index 987c110abd381..2b3607826da75 100644 --- a/plugins/outputs/dynatrace/README.md +++ b/plugins/outputs/dynatrace/README.md @@ -35,5 +35,9 @@ You will also need to configure an API token for secure access. Find out how to ``` +## Requirements + +This plugin requires Dynatrace version 1.201 or higher. You will either need a Dynatrace OneAgent installed on the same host or vm as Telegraf with version 1.201 or higher; or a Dynatrace environment with version 1.201 or higher + ## Limitations Telegraf measurements which can't be converted to a float64 are skipped. From 42afe19b174e36ca42ac692685133e43c13c0749 Mon Sep 17 00:00:00 2001 From: Thomas Schuetz <38893055+thschue@users.noreply.github.com> Date: Mon, 17 Aug 2020 09:56:14 +0200 Subject: [PATCH 30/51] Update README.md --- plugins/outputs/dynatrace/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/outputs/dynatrace/README.md b/plugins/outputs/dynatrace/README.md index 2b3607826da75..646fb4bb7e2aa 100644 --- a/plugins/outputs/dynatrace/README.md +++ b/plugins/outputs/dynatrace/README.md @@ -6,7 +6,7 @@ This plugin is sending telegraf metrics to [Dynatrace](www.dynatrace.com). It ha if you run the Telegraf agent on a host or VM that is monitored by the Dynatrace OneAgent then you only need to enable the plugin but need no further configuration. The Dynatrace telegraf output plugin will send all metrics to the OneAgent which will use its secure and load balanced connection to send the metrics to your Dynatrace SaaS or Managed environment. -## Running standlone +## Running standalone If you run the Telegraf agent on a host or VM without a OneAgent you will need to configure the environment API endpoint to send the metrics to and a an API token for security. From c2134772b6bd516d395466375bf347ecc3ba0849 Mon Sep 17 00:00:00 2001 From: Thomas Schuetz <38893055+thschue@users.noreply.github.com> Date: Mon, 17 Aug 2020 09:56:39 +0200 Subject: [PATCH 31/51] Update README.md --- plugins/outputs/dynatrace/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/outputs/dynatrace/README.md b/plugins/outputs/dynatrace/README.md index 646fb4bb7e2aa..b48300ebd2c3d 100644 --- a/plugins/outputs/dynatrace/README.md +++ b/plugins/outputs/dynatrace/README.md @@ -8,7 +8,7 @@ if you run the Telegraf agent on a host or VM that is monitored by the Dynatrace ## Running standalone -If you run the Telegraf agent on a host or VM without a OneAgent you will need to configure the environment API endpoint to send the metrics to and a an API token for security. +If you run the Telegraf agent on a host or VM without a OneAgent you will need to configure the environment API endpoint to send the metrics to and an API token for security. The endpoint for the Dynatrace Metrics API is From ca3297f9620fd36183ffccab4e63456f0e8b4919 Mon Sep 17 00:00:00 2001 From: Thomas Schuetz <38893055+thschue@users.noreply.github.com> Date: Mon, 17 Aug 2020 09:59:22 +0200 Subject: [PATCH 32/51] Update README.md --- plugins/outputs/dynatrace/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plugins/outputs/dynatrace/README.md b/plugins/outputs/dynatrace/README.md index b48300ebd2c3d..ae1d45292c63e 100644 --- a/plugins/outputs/dynatrace/README.md +++ b/plugins/outputs/dynatrace/README.md @@ -26,12 +26,12 @@ You will also need to configure an API token for secure access. Find out how to [[outputs.dynatrace]] ## Leave empty or use the local ingest endpoint of your OneAgent monitored host (e.g.: http://127.0.0.1:14499/metrics/ingest). ## Set Dynatrace environment URL (e.g.: https://YOUR_DOMAIN/api/v2/metrics/ingest) if you do not use a OneAgent - environmentURL = "" - environmentApiToken = "" + url = "" + api_token = "" ## Optional prefix for metric names (e.g.: "telegraf.") prefix = "telegraf." ## Flag for skipping the tls certificate check, just for testing purposes, should be false by default - skipCertificateCheck = false + insecure_skip_verify = false ``` From 43dafeaaa9a0c6b2f606e3da43a2876d475d44ed Mon Sep 17 00:00:00 2001 From: Thomas Schuetz <38893055+thschue@users.noreply.github.com> Date: Mon, 17 Aug 2020 10:49:06 +0200 Subject: [PATCH 33/51] Update README.md --- plugins/outputs/dynatrace/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/outputs/dynatrace/README.md b/plugins/outputs/dynatrace/README.md index ae1d45292c63e..c663c701cee07 100644 --- a/plugins/outputs/dynatrace/README.md +++ b/plugins/outputs/dynatrace/README.md @@ -17,7 +17,7 @@ The endpoint for the Dynatrace Metrics API is You can learn more about how to use the Dynatrace API [here](https://www.dynatrace.com/support/help/dynatrace-api/) -You will also need to configure an API token for secure access. Find out how to create a token [here](https://www.dynatrace.com/support/help/dynatrace-api/environment-api/tokens/) or simply navigate to **Settings > Integration > Dynatrace API** in your Dynatrace environment and create a token with Dynatrace API** and create a new token with +You will also need to configure an API token for secure access. Find out how to create a token [here](https://www.dynatrace.com/support/help/dynatrace-api/environment-api/tokens/) or simply navigate to **Settings > Integration > Dynatrace API** in your Dynatrace environment and create a token with Dynatrace API and create a new token with 'Data ingest' access scope enabled. ## Configuration From 6edda7e72b56af465c300bdae601985a97c3535d Mon Sep 17 00:00:00 2001 From: Thomas Schuetz <38893055+thschue@users.noreply.github.com> Date: Mon, 17 Aug 2020 12:52:54 +0200 Subject: [PATCH 34/51] Update README.md --- plugins/outputs/dynatrace/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/outputs/dynatrace/README.md b/plugins/outputs/dynatrace/README.md index c663c701cee07..7685081b3ef40 100644 --- a/plugins/outputs/dynatrace/README.md +++ b/plugins/outputs/dynatrace/README.md @@ -37,7 +37,7 @@ You will also need to configure an API token for secure access. Find out how to ## Requirements -This plugin requires Dynatrace version 1.201 or higher. You will either need a Dynatrace OneAgent installed on the same host or vm as Telegraf with version 1.201 or higher; or a Dynatrace environment with version 1.201 or higher +This plugin requires Dynatrace version 1.202 or higher. You will either need a Dynatrace OneAgent installed on the same host or vm as Telegraf with version 1.201 or higher; or a Dynatrace environment with version 1.202 or higher ## Limitations Telegraf measurements which can't be converted to a float64 are skipped. From 0faf5c42fcf60f61560c4ac1f66402ea547f2094 Mon Sep 17 00:00:00 2001 From: Thomas Schuetz <38893055+thschue@users.noreply.github.com> Date: Mon, 17 Aug 2020 12:55:27 +0200 Subject: [PATCH 35/51] Update README.md --- plugins/outputs/dynatrace/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/outputs/dynatrace/README.md b/plugins/outputs/dynatrace/README.md index 7685081b3ef40..4e676d21fcf3f 100644 --- a/plugins/outputs/dynatrace/README.md +++ b/plugins/outputs/dynatrace/README.md @@ -37,7 +37,7 @@ You will also need to configure an API token for secure access. Find out how to ## Requirements -This plugin requires Dynatrace version 1.202 or higher. You will either need a Dynatrace OneAgent installed on the same host or vm as Telegraf with version 1.201 or higher; or a Dynatrace environment with version 1.202 or higher +This plugin requires Dynatrace version 1.202 or higher. You will either need a Dynatrace OneAgent installed on the same host or vm as Telegraf with version 1.202 or higher; or a Dynatrace environment with version 1.202 or higher ## Limitations Telegraf measurements which can't be converted to a float64 are skipped. From bf35e34900981907a14be4445ec3c537a8f0bc76 Mon Sep 17 00:00:00 2001 From: Thomas Schuetz <38893055+thschue@users.noreply.github.com> Date: Mon, 17 Aug 2020 13:06:30 +0200 Subject: [PATCH 36/51] Update README.md --- plugins/outputs/dynatrace/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/outputs/dynatrace/README.md b/plugins/outputs/dynatrace/README.md index 4e676d21fcf3f..44b5b8b7ec2f5 100644 --- a/plugins/outputs/dynatrace/README.md +++ b/plugins/outputs/dynatrace/README.md @@ -37,7 +37,7 @@ You will also need to configure an API token for secure access. Find out how to ## Requirements -This plugin requires Dynatrace version 1.202 or higher. You will either need a Dynatrace OneAgent installed on the same host or vm as Telegraf with version 1.202 or higher; or a Dynatrace environment with version 1.202 or higher +This plugin requires Dynatrace version 1.202 or higher. You will either need a Dynatrace OneAgent (version 1.202 or higher) installed on the same host as Telegraf; or a Dynatrace environment with version 1.202 or higher ## Limitations Telegraf measurements which can't be converted to a float64 are skipped. From 231878a3cf15227ca0deca8920549c34d1f84327 Mon Sep 17 00:00:00 2001 From: Michael Kopp Date: Mon, 17 Aug 2020 15:27:48 +0200 Subject: [PATCH 37/51] Update README.md --- plugins/outputs/dynatrace/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/outputs/dynatrace/README.md b/plugins/outputs/dynatrace/README.md index 44b5b8b7ec2f5..6863af0b94125 100644 --- a/plugins/outputs/dynatrace/README.md +++ b/plugins/outputs/dynatrace/README.md @@ -37,7 +37,7 @@ You will also need to configure an API token for secure access. Find out how to ## Requirements -This plugin requires Dynatrace version 1.202 or higher. You will either need a Dynatrace OneAgent (version 1.202 or higher) installed on the same host as Telegraf; or a Dynatrace environment with version 1.202 or higher +This plugin requires Dynatrace version 1.202 or higher. You will either need a Dynatrace OneAgent (version 1.201 or higher) installed on the same host as Telegraf; or a Dynatrace environment with version 1.202 or higher ## Limitations Telegraf measurements which can't be converted to a float64 are skipped. From ceb42213a7e9cc405d0ead9870f1fd73b2d87658 Mon Sep 17 00:00:00 2001 From: Thomas Schuetz <38893055+thschue@users.noreply.github.com> Date: Mon, 17 Aug 2020 16:21:24 +0200 Subject: [PATCH 38/51] Update README.md --- plugins/outputs/dynatrace/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/outputs/dynatrace/README.md b/plugins/outputs/dynatrace/README.md index 6863af0b94125..b2fa08c0e751d 100644 --- a/plugins/outputs/dynatrace/README.md +++ b/plugins/outputs/dynatrace/README.md @@ -18,7 +18,7 @@ The endpoint for the Dynatrace Metrics API is You can learn more about how to use the Dynatrace API [here](https://www.dynatrace.com/support/help/dynatrace-api/) You will also need to configure an API token for secure access. Find out how to create a token [here](https://www.dynatrace.com/support/help/dynatrace-api/environment-api/tokens/) or simply navigate to **Settings > Integration > Dynatrace API** in your Dynatrace environment and create a token with Dynatrace API and create a new token with -'Data ingest' access scope enabled. +'Ingest metrics data points' access scope enabled. ## Configuration From eeb6bede8cd7cc73e9c84b4fe459e2e4e8010668 Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Thu, 20 Aug 2020 16:07:24 +0200 Subject: [PATCH 39/51] removed InsecureSkipVerify from struct as it is part of github.com/influxdata/telegraf/plugins/common/tls --- plugins/outputs/dynatrace/dynatrace.go | 1 - 1 file changed, 1 deletion(-) diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index 4796d1d8fc444..02d714e3fc706 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -31,7 +31,6 @@ var ( type Dynatrace struct { URL string `toml:"url"` APIToken string `toml:"api_token"` - InsecureSkipVerify bool `toml:"insecure_skip_verify"` Prefix string `toml:"prefix"` Log telegraf.Logger `toml:"-"` Timeout internal.Duration `toml:"timeout"` From 99b39059acc9c1d9bcacc7ebeb7b886147533a25 Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Thu, 20 Aug 2020 16:12:00 +0200 Subject: [PATCH 40/51] Small Doc change --- plugins/outputs/dynatrace/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/outputs/dynatrace/README.md b/plugins/outputs/dynatrace/README.md index b2fa08c0e751d..522d8e4a85024 100644 --- a/plugins/outputs/dynatrace/README.md +++ b/plugins/outputs/dynatrace/README.md @@ -37,7 +37,7 @@ You will also need to configure an API token for secure access. Find out how to ## Requirements -This plugin requires Dynatrace version 1.202 or higher. You will either need a Dynatrace OneAgent (version 1.201 or higher) installed on the same host as Telegraf; or a Dynatrace environment with version 1.202 or higher +You will either need a Dynatrace OneAgent (version 1.201 or higher) installed on the same host as Telegraf; or a Dynatrace environment with version 1.202 or higher ## Limitations Telegraf measurements which can't be converted to a float64 are skipped. From 43b35f2e7729fc4ae9b9847b2448309fe4ef0d2d Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Thu, 20 Aug 2020 17:04:22 +0200 Subject: [PATCH 41/51] go fmtted file --- plugins/outputs/dynatrace/dynatrace.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index 02d714e3fc706..cbab667464dac 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -29,11 +29,11 @@ var ( // Dynatrace Configuration for the Dynatrace output plugin type Dynatrace struct { - URL string `toml:"url"` - APIToken string `toml:"api_token"` - Prefix string `toml:"prefix"` - Log telegraf.Logger `toml:"-"` - Timeout internal.Duration `toml:"timeout"` + URL string `toml:"url"` + APIToken string `toml:"api_token"` + Prefix string `toml:"prefix"` + Log telegraf.Logger `toml:"-"` + Timeout internal.Duration `toml:"timeout"` tls.ClientConfig From 6fdc2819db49ef46148c439cfabe346299fb0b5f Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Mon, 7 Sep 2020 12:07:31 +0200 Subject: [PATCH 42/51] Added dash to AllowedCharList --- plugins/outputs/dynatrace/dynatrace.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index cbab667464dac..39f0e5ff92c88 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -22,7 +22,7 @@ const ( ) var ( - reNameAllowedCharList = regexp.MustCompile("[^A-Za-z0-9.]+") + reNameAllowedCharList = regexp.MustCompile("[^A-Za-z0-9.-]+") maxDimKeyLen = 100 maxMetricKeyLen = 250 ) From 1067ad9ccd66212db3861ecdeefd63be6bd057ae Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Mon, 21 Sep 2020 15:24:58 +0200 Subject: [PATCH 43/51] Changed Handling of Monotonic Counters --- plugins/outputs/dynatrace/README.md | 4 ++-- plugins/outputs/dynatrace/dynatrace.go | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/plugins/outputs/dynatrace/README.md b/plugins/outputs/dynatrace/README.md index 522d8e4a85024..7c4c430016da6 100644 --- a/plugins/outputs/dynatrace/README.md +++ b/plugins/outputs/dynatrace/README.md @@ -37,7 +37,7 @@ You will also need to configure an API token for secure access. Find out how to ## Requirements -You will either need a Dynatrace OneAgent (version 1.201 or higher) installed on the same host as Telegraf; or a Dynatrace environment with version 1.202 or higher +You will either need a Dynatrace OneAgent (version 1.201 or higher) installed on the same host as Telegraf; or a Dynatrace environment with version 1.202 or higher. Monotonic counters (e.g. diskio.reads, system.uptime) require release 208 or later. ## Limitations -Telegraf measurements which can't be converted to a float64 are skipped. +Telegraf measurements which can't be converted to a float64 are skipped. diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index 39f0e5ff92c88..4e14849badb3d 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -193,11 +193,15 @@ func (d *Dynatrace) Write(metrics []telegraf.Metric) error { fmt.Fprintf(&buf, "%s", tagb.String()) // write measured value - fmt.Fprintf(&buf, " %v\n", value) + switch metric.Type() { + case telegraf.Counter: + fmt.Fprintf(&buf, " count,%v\n", value) + default: + fmt.Fprintf(&buf, " %v\n", value) + } } } } - return d.send(buf.Bytes()) } From f6b3fb35581f97dccab8747220ab3d5a1062b08f Mon Sep 17 00:00:00 2001 From: Thomas Schuetz <38893055+thschue@users.noreply.github.com> Date: Mon, 21 Sep 2020 17:56:25 +0200 Subject: [PATCH 44/51] Update plugins/outputs/dynatrace/README.md Co-authored-by: Steven Soroka --- plugins/outputs/dynatrace/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/outputs/dynatrace/README.md b/plugins/outputs/dynatrace/README.md index 5bf8f1d32cbae..be8bf7408c5e0 100644 --- a/plugins/outputs/dynatrace/README.md +++ b/plugins/outputs/dynatrace/README.md @@ -41,4 +41,4 @@ You will either need a Dynatrace OneAgent (version 1.201 or higher) installed on You will either need a Dynatrace OneAgent (version 1.201 or higher) installed on the same host as Telegraf; or a Dynatrace environment with version 1.202 or higher ## Limitations -Telegraf measurements which can't be converted to a float64 are skipped. +Telegraf measurements which can't be converted to a float64 are skipped. From a105de55127f3ccc4bbf4129bacc95d6e4d915e9 Mon Sep 17 00:00:00 2001 From: Wolfgang Beer Date: Thu, 24 Sep 2020 13:10:19 +0200 Subject: [PATCH 45/51] added delta value support for monotonic count metric types --- out.txt | 0 plugins/outputs/dynatrace/dynatrace.go | 32 +++++++++++++++++--------- 2 files changed, 21 insertions(+), 11 deletions(-) create mode 100644 out.txt diff --git a/out.txt b/out.txt new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index 4e14849badb3d..f5b403824d025 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -3,10 +3,6 @@ package dynatrace import ( "bytes" "fmt" - "github.com/influxdata/telegraf" - "github.com/influxdata/telegraf/internal" - "github.com/influxdata/telegraf/plugins/common/tls" - "github.com/influxdata/telegraf/plugins/outputs" "io/ioutil" "math" "net/http" @@ -15,6 +11,11 @@ import ( "strconv" "strings" "time" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" + "github.com/influxdata/telegraf/plugins/common/tls" + "github.com/influxdata/telegraf/plugins/outputs" ) const ( @@ -27,6 +28,8 @@ var ( maxMetricKeyLen = 250 ) +var counts map[string]string + // Dynatrace Configuration for the Dynatrace output plugin type Dynatrace struct { URL string `toml:"url"` @@ -188,16 +191,22 @@ func (d *Dynatrace) Write(metrics []telegraf.Metric) error { if err != nil { continue } - fmt.Fprintf(&buf, "%s", metricID) - // add the tag string - fmt.Fprintf(&buf, "%s", tagb.String()) - - // write measured value + // write metric id,tags and value switch metric.Type() { case telegraf.Counter: - fmt.Fprintf(&buf, " count,%v\n", value) + if lastvalue, ok := counts[metricID+tagb.String()]; ok { + // only send a counter if a lastvalue is found in the map + // if last value is found we can calc and send the delta value + if v, err := strconv.ParseFloat(lastvalue, 32); err == nil { + if v2, err := strconv.ParseFloat(value, 32); err == nil { + fmt.Fprintf(&buf, "%s%s count,delta=%f\n", metricID, tagb.String(), v2-v) + } + } + } + // put the current value into the map as last value + counts[metricID+tagb.String()] = value default: - fmt.Fprintf(&buf, " %v\n", value) + fmt.Fprintf(&buf, "%s%s %v\n", metricID, tagb.String(), value) } } } @@ -244,6 +253,7 @@ func (d *Dynatrace) send(msg []byte) error { } func (d *Dynatrace) Init() error { + counts = make(map[string]string) if len(d.URL) == 0 { d.Log.Infof("Dynatrace URL is empty, defaulting to OneAgent metrics interface") d.URL = oneAgentMetricsUrl From 30f6cacf8eaabd46d7adad68fefc2e327d3d5e98 Mon Sep 17 00:00:00 2001 From: Wolfgang Beer Date: Thu, 24 Sep 2020 13:11:56 +0200 Subject: [PATCH 46/51] deleted out.txt file --- out.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 out.txt diff --git a/out.txt b/out.txt deleted file mode 100644 index e69de29bb2d1d..0000000000000 From 90a71f71b6e8ddcd750b6b13758d69d89bd90e39 Mon Sep 17 00:00:00 2001 From: Wolfgang Beer Date: Fri, 25 Sep 2020 16:18:06 +0200 Subject: [PATCH 47/51] added a global state flush interval to prevent any long running memory leak issues --- plugins/outputs/dynatrace/dynatrace.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index f5b403824d025..8c8fa984d82a8 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -29,6 +29,7 @@ var ( ) var counts map[string]string +var sent = 0 // Dynatrace Configuration for the Dynatrace output plugin type Dynatrace struct { @@ -211,6 +212,11 @@ func (d *Dynatrace) Write(metrics []telegraf.Metric) error { } } } + sent++ + // in typical interval of 10s, we will clean the counter state once in 24h which is 8640 iterations + if sent%8640 == 0 { + counts = make(map[string]string) + } return d.send(buf.Bytes()) } From 8964e3c8297a28886039f17e54fd56642bf68e8b Mon Sep 17 00:00:00 2001 From: Thomas Schuetz <38893055+thschue@users.noreply.github.com> Date: Fri, 9 Oct 2020 11:23:06 +0200 Subject: [PATCH 48/51] Fix delta reset (#6) --- .circleci/config.yml | 60 +- CHANGELOG.md | 106 +- Makefile | 18 +- README.md | 9 +- docs/CONFIGURATION.md | 12 +- docs/DATA_FORMATS_OUTPUT.md | 1 + docs/LICENSE_OF_DEPENDENCIES.md | 2 + etc/telegraf.conf | 326 ++- go.mod | 23 +- go.sum | 48 +- models/log.go | 5 +- models/running_aggregator.go | 2 +- models/running_input.go | 2 +- models/running_output.go | 2 +- models/running_processor.go | 2 +- models/running_processor_test.go | 37 +- plugins/common/encoding/decoder_reader.go | 3 +- plugins/inputs/all/all.go | 3 + plugins/inputs/bcache/bcache.go | 7 +- plugins/inputs/cloudwatch/README.md | 9 +- plugins/inputs/cloudwatch/cloudwatch.go | 43 +- plugins/inputs/consul/README.md | 26 +- plugins/inputs/consul/consul.go | 33 +- plugins/inputs/consul/consul_test.go | 59 + plugins/inputs/docker/docker_test.go | 12 +- plugins/inputs/docker/docker_testdata.go | 4 +- plugins/inputs/github/github.go | 4 +- plugins/inputs/github/github_test.go | 2 +- plugins/inputs/graylog/graylog.go | 7 +- plugins/inputs/haproxy/haproxy.go | 13 +- plugins/inputs/influxdb_v2_listener/README.md | 5 +- .../influxdb_v2_listener.go | 7 +- plugins/inputs/intel_rdt/README.md | 108 + plugins/inputs/intel_rdt/intel_rdt.go | 552 ++++++ plugins/inputs/intel_rdt/intel_rdt_test.go | 277 +++ plugins/inputs/intel_rdt/intel_rdt_windows.go | 3 + plugins/inputs/intel_rdt/processes.go | 40 + plugins/inputs/intel_rdt/publisher.go | 171 ++ plugins/inputs/intel_rdt/publisher_test.go | 444 +++++ plugins/inputs/ipmi_sensor/connection.go | 3 +- plugins/inputs/jenkins/jenkins.go | 4 +- plugins/inputs/jenkins/jenkins_test.go | 53 + plugins/inputs/jolokia/jolokia.go | 4 +- plugins/inputs/jolokia2/client.go | 4 + plugins/inputs/kibana/kibana.go | 4 +- plugins/inputs/mcrouter/mcrouter.go | 4 +- plugins/inputs/net_response/net_response.go | 5 + plugins/inputs/openldap/openldap.go | 4 + plugins/inputs/phpfpm/phpfpm.go | 19 +- plugins/inputs/phpfpm/phpfpm_test.go | 2 +- .../inputs/processes/processes_notwindows.go | 3 + plugins/inputs/procstat/native_finder.go | 2 +- plugins/inputs/procstat/pgrep.go | 8 +- plugins/inputs/procstat/procstat.go | 4 +- plugins/inputs/prometheus/parser.go | 5 +- plugins/inputs/prometheus/prometheus.go | 9 +- plugins/inputs/proxmox/README.md | 2 + plugins/inputs/proxmox/proxmox.go | 3 + plugins/inputs/ras/README.md | 58 + plugins/inputs/ras/ras.go | 294 +++ plugins/inputs/ras/ras_test.go | 254 +++ plugins/inputs/ras/ras_windows.go | 3 + plugins/inputs/redis/README.md | 5 + plugins/inputs/redis/redis.go | 46 + plugins/inputs/redis/redis_test.go | 43 + plugins/inputs/rethinkdb/rethinkdb_server.go | 4 + plugins/inputs/smart/README.md | 159 +- plugins/inputs/smart/smart.go | 608 +++++- plugins/inputs/smart/smart_test.go | 1215 +++++++++--- plugins/inputs/snmp/README.md | 3 + plugins/inputs/snmp/snmp.go | 14 +- plugins/inputs/snmp/snmp_test.go | 7 +- plugins/inputs/snmp_legacy/snmp_legacy.go | 3 + plugins/inputs/snmp_trap/README.md | 5 +- plugins/inputs/snmp_trap/snmp_trap.go | 4 + plugins/inputs/snmp_trap/snmp_trap_test.go | 13 +- plugins/inputs/sqlserver/README.md | 4 +- plugins/inputs/sqlserver/azuresqlqueries.go | 1763 +++++++++-------- plugins/inputs/sqlserver/sqlserver.go | 2 +- plugins/inputs/tail/README.md | 17 + plugins/inputs/tail/multiline.go | 135 ++ plugins/inputs/tail/multiline_test.go | 235 +++ plugins/inputs/tail/tail.go | 126 +- plugins/inputs/tail/tail_test.go | 175 ++ plugins/inputs/tail/testdata/test-patterns | 3 + .../inputs/tail/testdata/test_multiline.log | 7 + plugins/inputs/win_eventlog/README.md | 212 ++ plugins/inputs/win_eventlog/event.go | 70 + .../inputs/win_eventlog/syscall_windows.go | 39 + plugins/inputs/win_eventlog/util.go | 155 ++ plugins/inputs/win_eventlog/util_test.go | 200 ++ plugins/inputs/win_eventlog/win_eventlog.go | 518 +++++ .../win_eventlog/win_eventlog_notwindows.go | 5 + .../inputs/win_eventlog/win_eventlog_test.go | 136 ++ .../inputs/win_eventlog/zsyscall_windows.go | 151 ++ plugins/outputs/dynatrace/README.md | 3 + plugins/outputs/dynatrace/dynatrace.go | 24 +- plugins/outputs/influxdb/http.go | 6 +- plugins/outputs/influxdb_v2/README.md | 2 +- plugins/outputs/influxdb_v2/influxdb.go | 4 +- plugins/outputs/influxdb_v2/influxdb_test.go | 10 +- plugins/outputs/kafka/kafka.go | 7 +- plugins/outputs/sumologic/README.md | 2 + plugins/outputs/sumologic/sumologic.go | 10 +- plugins/outputs/sumologic/sumologic_test.go | 87 +- plugins/outputs/warp10/warp10.go | 7 +- plugins/parsers/influx/parser.go | 4 +- plugins/parsers/influx/parser_test.go | 15 + plugins/processors/starlark/README.md | 13 +- plugins/processors/starlark/starlark.go | 13 + .../processors/starlark/testdata/json.star | 18 + plugins/processors/streamingprocessor.go | 3 + plugins/serializers/carbon2/carbon2.go | 87 +- plugins/serializers/carbon2/carbon2_test.go | 91 +- scripts/alpine.docker | 2 +- scripts/buster.docker | 15 + scripts/ci-1.14.docker | 2 +- scripts/{ci-1.13.docker => ci-1.15.docker} | 7 +- scripts/stretch.docker | 2 +- 119 files changed, 8191 insertions(+), 1563 deletions(-) create mode 100644 plugins/inputs/intel_rdt/README.md create mode 100644 plugins/inputs/intel_rdt/intel_rdt.go create mode 100644 plugins/inputs/intel_rdt/intel_rdt_test.go create mode 100644 plugins/inputs/intel_rdt/intel_rdt_windows.go create mode 100644 plugins/inputs/intel_rdt/processes.go create mode 100644 plugins/inputs/intel_rdt/publisher.go create mode 100644 plugins/inputs/intel_rdt/publisher_test.go create mode 100644 plugins/inputs/ras/README.md create mode 100644 plugins/inputs/ras/ras.go create mode 100644 plugins/inputs/ras/ras_test.go create mode 100644 plugins/inputs/ras/ras_windows.go create mode 100644 plugins/inputs/tail/multiline.go create mode 100644 plugins/inputs/tail/multiline_test.go create mode 100644 plugins/inputs/tail/testdata/test-patterns create mode 100644 plugins/inputs/tail/testdata/test_multiline.log create mode 100644 plugins/inputs/win_eventlog/README.md create mode 100644 plugins/inputs/win_eventlog/event.go create mode 100644 plugins/inputs/win_eventlog/syscall_windows.go create mode 100644 plugins/inputs/win_eventlog/util.go create mode 100644 plugins/inputs/win_eventlog/util_test.go create mode 100644 plugins/inputs/win_eventlog/win_eventlog.go create mode 100644 plugins/inputs/win_eventlog/win_eventlog_notwindows.go create mode 100644 plugins/inputs/win_eventlog/win_eventlog_test.go create mode 100644 plugins/inputs/win_eventlog/zsyscall_windows.go create mode 100644 plugins/processors/starlark/testdata/json.star create mode 100644 scripts/buster.docker rename scripts/{ci-1.13.docker => ci-1.15.docker} (66%) diff --git a/.circleci/config.yml b/.circleci/config.yml index dad9a5e0d1a38..2d3c152fedc8b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -4,12 +4,12 @@ defaults: working_directory: '/go/src/github.com/influxdata/telegraf' environment: GOFLAGS: -p=8 - go-1_13: &go-1_13 - docker: - - image: 'quay.io/influxdb/telegraf-ci:1.13.13' go-1_14: &go-1_14 docker: - - image: 'quay.io/influxdb/telegraf-ci:1.14.5' + - image: 'quay.io/influxdb/telegraf-ci:1.14.9' + go-1_15: &go-1_15 + docker: + - image: 'quay.io/influxdb/telegraf-ci:1.15.2' mac: &mac macos: xcode: 11.3.1 @@ -21,7 +21,7 @@ defaults: version: 2 jobs: deps: - <<: [ *defaults, *go-1_14 ] + <<: [ *defaults, *go-1_15 ] steps: - checkout - restore_cache: @@ -62,22 +62,22 @@ jobs: - 'usr/local/bin/gofmt' - 'Users/distiller/go' - test-go-1.13: - <<: [ *defaults, *go-1_13 ] + test-go-1.14: + <<: [ *defaults, *go-1_14 ] steps: - attach_workspace: at: '/go' - run: 'make' - run: 'make test' - test-go-1.13-386: - <<: [ *defaults, *go-1_13 ] + test-go-1.14-386: + <<: [ *defaults, *go-1_14 ] steps: - attach_workspace: at: '/go' - run: 'GOARCH=386 make' - run: 'GOARCH=386 make test' - test-go-1.14: - <<: [ *defaults, *go-1_14 ] + test-go-1.15: + <<: [ *defaults, *go-1_15 ] steps: - attach_workspace: at: '/go' @@ -85,8 +85,8 @@ jobs: - run: 'make check' - run: 'make check-deps' - run: 'make test' - test-go-1.14-386: - <<: [ *defaults, *go-1_14 ] + test-go-1.15-386: + <<: [ *defaults, *go-1_15 ] steps: - attach_workspace: at: '/go' @@ -103,7 +103,7 @@ jobs: - run: 'make test' package: - <<: [ *defaults, *go-1_14 ] + <<: [ *defaults, *go-1_15 ] steps: - attach_workspace: at: '/go' @@ -113,7 +113,7 @@ jobs: destination: 'build/dist' release: - <<: [ *defaults, *go-1_14 ] + <<: [ *defaults, *go-1_15 ] steps: - attach_workspace: at: '/go' @@ -122,7 +122,7 @@ jobs: path: './build/dist' destination: 'build/dist' nightly: - <<: [ *defaults, *go-1_14 ] + <<: [ *defaults, *go-1_15 ] steps: - attach_workspace: at: '/go' @@ -144,25 +144,25 @@ workflows: filters: tags: only: /.*/ - - 'test-go-1.13': + - 'test-go-1.14': requires: - 'deps' filters: tags: only: /.*/ - - 'test-go-1.13-386': + - 'test-go-1.14-386': requires: - 'deps' filters: tags: only: /.*/ - - 'test-go-1.14': + - 'test-go-1.15': requires: - 'deps' filters: tags: only: /.*/ - - 'test-go-1.14-386': + - 'test-go-1.15-386': requires: - 'deps' filters: @@ -177,17 +177,17 @@ workflows: - 'package': requires: - 'test-go-darwin' - - 'test-go-1.13' - - 'test-go-1.13-386' - 'test-go-1.14' - 'test-go-1.14-386' + - 'test-go-1.15' + - 'test-go-1.15-386' - 'release': requires: - 'test-go-darwin' - - 'test-go-1.13' - - 'test-go-1.13-386' - 'test-go-1.14' - 'test-go-1.14-386' + - 'test-go-1.15' + - 'test-go-1.15-386' filters: tags: only: /.*/ @@ -197,16 +197,16 @@ workflows: jobs: - 'deps' - 'macdeps' - - 'test-go-1.13': + - 'test-go-1.14': requires: - 'deps' - - 'test-go-1.13-386': + - 'test-go-1.14-386': requires: - 'deps' - - 'test-go-1.14': + - 'test-go-1.15': requires: - 'deps' - - 'test-go-1.14-386': + - 'test-go-1.15-386': requires: - 'deps' - 'test-go-darwin': @@ -215,10 +215,10 @@ workflows: - 'nightly': requires: - 'test-go-darwin' - - 'test-go-1.13' - - 'test-go-1.13-386' - 'test-go-1.14' - 'test-go-1.14-386' + - 'test-go-1.15' + - 'test-go-1.15-386' triggers: - schedule: cron: "0 7 * * *" diff --git a/CHANGELOG.md b/CHANGELOG.md index ec9540d30d56a..a5b3dfc4e85cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,48 +3,114 @@ #### Release Notes - Many documentation updates - - New [code examples](https://github.com/influxdata/telegraf/tree/master/plugins/processors/starlark/testdata) for the [Starlark processor](https://github.com/influxdata/telegraf/blob/master/plugins/processors/starlark/README.md) - - [#7837](https://github.com/influxdata/telegraf/pull/7837) `build` update Go versions: 1.14.5, 1.13.13 + - New [code examples](/plugins/processors/starlark/testdata) for the [Starlark processor](/plugins/processors/starlark/README.md) + - [#8220](https://github.com/influxdata/telegraf/pull/8220) `build` update to go 1.15 + - [#7864](https://github.com/influxdata/telegraf/pull/7864) `processors.starlark` Add logic starlark example + - [#7865](https://github.com/influxdata/telegraf/pull/7865) `common.shim` shim logger improvements - [#7920](https://github.com/influxdata/telegraf/pull/7920) `inputs.rabbitmq` remove deprecated healthcheck + - [#7932](https://github.com/influxdata/telegraf/pull/7932) Support for AWS Cloudwatch Alarms #7931 + - [#7953](https://github.com/influxdata/telegraf/pull/7953) Add details to connect to InfluxDB OSS 2 and Cloud 2 + - [#7980](https://github.com/influxdata/telegraf/pull/7980) `processors.starlark` add example input/outputs to starlark examples + - [#8054](https://github.com/influxdata/telegraf/pull/8054) add guidelines run to external plugins with execd + - [#8198](https://github.com/influxdata/telegraf/pull/8198) `inputs.influxdb_v2_listener` change default influxdb port from 9999 to 8086 to match OSS 2.0 release #### Features - [#7814](https://github.com/influxdata/telegraf/pull/7814) `agent` Send metrics in FIFO order - [#7869](https://github.com/influxdata/telegraf/pull/7869) `inputs.modbus` extend support of fixed point values on input - [#7870](https://github.com/influxdata/telegraf/pull/7870) `inputs.mongodb` Added new metric "pages written from cache" - - [#7808](https://github.com/influxdata/telegraf/pull/7808) `inputs.sqlserver` added new counter - Lock Timeouts (timeout > 0)/sec + - [#7875](https://github.com/influxdata/telegraf/pull/7875) `inputs.consul` input consul - added metric_version flag + - [#7894](https://github.com/influxdata/telegraf/pull/7894) `inputs.cloudwatch` Implement AWS CloudWatch Input Plugin ListMetrics API calls to use Active Metric Filter - [#7904](https://github.com/influxdata/telegraf/pull/7904) `inputs.clickhouse` add additional metrics to clickhouse input plugin - - [#7986](https://github.com/influxdata/telegraf/pull/7986) `inputs.http_listener_v2` make http header tags case insensitive + - [#7934](https://github.com/influxdata/telegraf/pull/7934) `inputs.sqlserver` Database_type config to Split up sql queries by engine type + - [#8018](https://github.com/influxdata/telegraf/pull/8018) `processors.ifname` Add addTag debugging in ifname plugin + - [#8019](https://github.com/influxdata/telegraf/pull/8019) `outputs.elasticsearch` added force_document_id option to ES output enable resend data and avoiding duplicated ES documents + - [#8025](https://github.com/influxdata/telegraf/pull/8025) `inputs.aerospike` Add set, and histogram reporting to aerospike telegraf plugin + - [#8082](https://github.com/influxdata/telegraf/pull/8082) `inputs.snmp` Add agent host tag configuration option + - [#8113](https://github.com/influxdata/telegraf/pull/8113) `inputs.smart` Add more missing NVMe attributes to smart plugin + - [#8120](https://github.com/influxdata/telegraf/pull/8120) `inputs.sqlserver` Added more performance counters to SqlServer input plugin + - [#8127](https://github.com/influxdata/telegraf/pull/8127) `agent` Sort plugin name lists for output + - [#8132](https://github.com/influxdata/telegraf/pull/8132) `outputs.sumologic` Sumo Logic output plugin: carbon2 default to include field in metric + - [#8133](https://github.com/influxdata/telegraf/pull/8133) `inputs.influxdb_v2_listener` influxdb_v2_listener - add /ready route + - [#8168](https://github.com/influxdata/telegraf/pull/8168) `processors.starlark` add json parsing support to starlark + - [#8186](https://github.com/influxdata/telegraf/pull/8186) `inputs.sqlserver` New sql server queries (Azure) + - [#8189](https://github.com/influxdata/telegraf/pull/8189) `inputs.snmp_trap` If the community string is available, add it as a tag + - [#8190](https://github.com/influxdata/telegraf/pull/8190) `inputs.tail` Semigroupoid multiline (#8167) + - [#8196](https://github.com/influxdata/telegraf/pull/8196) `inputs.redis` add functionality to get values from redis commands + +#### Bugfixes -#### Bug Fixes - - - [#7819](https://github.com/influxdata/telegraf/pull/7819) `inputs.tail` Close file to ensure it has been flushed - - [#7818](https://github.com/influxdata/telegraf/pull/7818) `build` Fix darwin package build flags - [#7816](https://github.com/influxdata/telegraf/pull/7816) `shim` fix bug with loading plugins in shim with no config - - [#7867](https://github.com/influxdata/telegraf/pull/7867) `agent` fix issue with execd restart_delay being ignored - - [#7908](https://github.com/influxdata/telegraf/pull/7908) `outputs.opentsdb` now skips NaN and Inf JSON values + - [#7818](https://github.com/influxdata/telegraf/pull/7818) `build` Fix darwin package build flags + - [#7819](https://github.com/influxdata/telegraf/pull/7819) `inputs.tail` Close file to ensure it has been flushed + - [#7853](https://github.com/influxdata/telegraf/pull/7853) Initialize aggregation processors + - [#7867](https://github.com/influxdata/telegraf/pull/7867) `inputs.execd` fix issue with execd restart_delay being ignored + - [#7872](https://github.com/influxdata/telegraf/pull/7872) `inputs.gnmi` Recv next message after send returns EOF + - [#7877](https://github.com/influxdata/telegraf/pull/7877) Fix arch name in deb/rpm builds + - [#7909](https://github.com/influxdata/telegraf/pull/7909) fixes issue with rpm /var/log/telegraf permissions + - [#7918](https://github.com/influxdata/telegraf/pull/7918) `inputs.net` fix broken link to proc.c + - [#7927](https://github.com/influxdata/telegraf/pull/7927) `inputs.tail` Fix tail following on EOF + - [#8005](https://github.com/influxdata/telegraf/pull/8005) Fix docker-image make target + - [#8039](https://github.com/influxdata/telegraf/pull/8039) `serializers.splunkmetric` Remove Event field as it is causing issues with pre-trained source types + - [#8048](https://github.com/influxdata/telegraf/pull/8048) `inputs.jenkins` Multiple escaping occurs on Jenkins URLs at certain folder depth + - [#8071](https://github.com/influxdata/telegraf/pull/8071) `inputs.kubernetes` add missing error check for HTTP req failure + - [#8145](https://github.com/influxdata/telegraf/pull/8145) `processors.execd` Increased the maximum serialized metric size in line protocol + - [#8159](https://github.com/influxdata/telegraf/pull/8159) `outputs.dynatrace` Dynatrace Output: change handling of monotonic counters + - [#8176](https://github.com/influxdata/telegraf/pull/8176) fix panic on streaming processers using logging + - [#8177](https://github.com/influxdata/telegraf/pull/8177) `parsers.influx` fix: plugins/parsers/influx: avoid ParseError.Error panic + - [#8199](https://github.com/influxdata/telegraf/pull/8199) `inputs.docker` Fix vulnerabilities found in BDBA scan + - [#8200](https://github.com/influxdata/telegraf/pull/8200) `inputs.sqlserver` Fixed Query mapping + - [#8201](https://github.com/influxdata/telegraf/pull/8201) `outputs.sumologic` Fix carbon2 serializer not falling through to field separate when carbon2_format field is unset + - [#8210](https://github.com/influxdata/telegraf/pull/8210) update gopsutil: fix procstat performance regression + - [#8162](https://github.com/influxdata/telegraf/pull/8162) Fix bool serialization when using carbon2 + - [#8240](https://github.com/influxdata/telegraf/pull/8240) Fix bugs found by LGTM analysis platform #### New Input Plugins -- [proxmox](https://github.com/influxdata/telegraf/blob/master/plugins/inputs/proxmox/README.md) - Contributed by @effitient -- [opcua](https://github.com/influxdata/telegraf/blob/master/plugins/inputs/opcua/README.md) - Contributed by @influxdata + - [influxdb_v2_listener](/plugins/inputs/influxdb_v2_listener/README.md) Influxdb v2 listener - Contributed by @magichair + - [intel_rdt](/plugins/inputs/intel_rdt/README.md) New input plugin for Intel RDT (Intel Resource Director Technology) - Contributed by @p-zak + - [nsd](/plugins/inputs/nsd/README.md) add nsd input plugin - Contributed by @gearnode + - [opcua](/plugins/inputs/opcua/README.md) Add OPC UA input plugin - Contributed by InfluxData + - [proxmox](/plugins/inputs/proxmox/README.md) Proxmox plugin - Contributed by @effitient + - [ras](/plugins/inputs/ras/README.md) New input plugin for RAS (Reliability, Availability and Serviceability) - Contributed by @p-zak + - [win_eventlog](/plugins/inputs/win_eventlog/README.md) Windows eventlog input plugin - Contributed by @simnv #### New Output Plugins -- [dynatrace](https://github.com/influxdata/telegraf/blob/master/plugins/outputs/dynatrace/README.md) - Contributed by @thschue -- [sumologic](https://github.com/influxdata/telegraf/blob/master/plugins/outputs/dynatrace/README.md) - Contributed by @pmalek-sumo - - + - [dynatrace](/plugins/outputs/dynatrace/README.md) Dynatrace output plugin - Contributed by @thschue + - [sumologic](/plugins/outputs/sumologic/README.md) Sumo Logic output plugin - Contributed by @pmalek-sumo #### New External Plugins - See [EXTERNAL_PLUGINS.md](https://github.com/influxdata/telegraf/blob/master/EXTERNAL_PLUGINS.md) for a full list of external plugins + See [EXTERNAL_PLUGINS.md](/EXTERNAL_PLUGINS.md) for a full list of external plugins - [awsalarms](https://github.com/vipinvkmenon/awsalarms) - Simple plugin to gather/monitor alarms generated in AWS. - [youtube-telegraf-plugin](https://github.com/inabagumi/youtube-telegraf-plugin) - Gather view and subscriber stats from your youtube videos + - [octoprint](https://github.com/BattleBas/octoprint-telegraf-plugin) - Gather 3d print information from the octoprint API. + +## v1.15.3 [2020-09-11] + +#### Release Notes + + - Many documentation updates + - New [code examples](https://github.com/influxdata/telegraf/tree/master/plugins/processors/starlark/testdata) for the [Starlark processor](https://github.com/influxdata/telegraf/blob/master/plugins/processors/starlark/README.md) + +#### Bugfixes + + - [#7999](https://github.com/influxdata/telegraf/pull/7999) `agent` fix minor agent error message race condition + - [#8051](https://github.com/influxdata/telegraf/pull/8051) `build` fix docker build. update dockerfiles to Go 1.14 + - [#8052](https://github.com/influxdata/telegraf/pull/8052) `shim` fix bug in shim logger affecting AddError + - [#7996](https://github.com/influxdata/telegraf/pull/7996) `shim` fix issue with shim use of config.Duration + - [#8006](https://github.com/influxdata/telegraf/pull/8006) `inputs.eventhub_consumer` Fix string to int conversion in eventhub consumer + - [#7986](https://github.com/influxdata/telegraf/pull/7986) `inputs.http_listener_v2` make http header tags case insensitive + - [#7869](https://github.com/influxdata/telegraf/pull/7869) `inputs.modbus` extend support of fixed point values on input + - [#7861](https://github.com/influxdata/telegraf/pull/7861) `inputs.ping` Fix Ping Input plugin for FreeBSD's ping6 + - [#7808](https://github.com/influxdata/telegraf/pull/7808) `inputs.sqlserver` added new counter - Lock Timeouts (timeout > 0)/sec + - [#8026](https://github.com/influxdata/telegraf/pull/8026) `inputs.vsphere` vSphere Fixed missing clustername issue 7878 + - [#8020](https://github.com/influxdata/telegraf/pull/8020) `processors.starlark` improve the quality of starlark docs by executing them as tests + - [#7976](https://github.com/influxdata/telegraf/pull/7976) `processors.starlark` add pivot example for starlark processor + - [#7134](https://github.com/influxdata/telegraf/pull/7134) `outputs.application_insights` Added the ability to set the endpoint url + - [#7908](https://github.com/influxdata/telegraf/pull/7908) `outputs.opentsdb` fix JSON handling of values NaN and Inf ## v1.15.2 [2020-07-31] diff --git a/Makefile b/Makefile index 61b5b77315b93..4dd2754ec0910 100644 --- a/Makefile +++ b/Makefile @@ -91,7 +91,7 @@ telegraf: # Used by dockerfile builds .PHONY: go-install go-install: - go install -ldflags "-w -s $(LDFLAGS)" ./cmd/telegraf + go install -mod=mod -ldflags "-w -s $(LDFLAGS)" ./cmd/telegraf .PHONY: test test: @@ -159,7 +159,7 @@ clean: .PHONY: docker-image docker-image: - docker build -f scripts/stretch.docker -t "telegraf:$(commit)" . + docker build -f scripts/buster.docker -t "telegraf:$(commit)" . plugins/parsers/influx/machine.go: plugins/parsers/influx/machine.go.rl ragel -Z -G2 $^ -o $@ @@ -169,15 +169,15 @@ plugin-%: @echo "Starting dev environment for $${$(@)} input plugin..." @docker-compose -f plugins/inputs/$${$(@)}/dev/docker-compose.yml up +.PHONY: ci-1.15 +ci-1.15: + docker build -t quay.io/influxdb/telegraf-ci:1.15.2 - < scripts/ci-1.15.docker + docker push quay.io/influxdb/telegraf-ci:1.15.2 + .PHONY: ci-1.14 ci-1.14: - docker build -t quay.io/influxdb/telegraf-ci:1.14.5 - < scripts/ci-1.14.docker - docker push quay.io/influxdb/telegraf-ci:1.14.5 - -.PHONY: ci-1.13 -ci-1.13: - docker build -t quay.io/influxdb/telegraf-ci:1.13.13 - < scripts/ci-1.13.docker - docker push quay.io/influxdb/telegraf-ci:1.13.13 + docker build -t quay.io/influxdb/telegraf-ci:1.14.9 - < scripts/ci-1.14.docker + docker push quay.io/influxdb/telegraf-ci:1.14.9 .PHONY: install install: $(buildbin) diff --git a/README.md b/README.md index c9a6b70f7e49e..168db50fd6a24 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ Ansible role: https://github.com/rossmcdonald/telegraf Telegraf requires Go version 1.13 or newer, the Makefile requires GNU make. -1. [Install Go](https://golang.org/doc/install) >=1.13 (1.14 recommended) +1. [Install Go](https://golang.org/doc/install) >=1.13 (1.15 recommended) 2. Clone the Telegraf repository: ``` cd ~/src @@ -211,6 +211,8 @@ For documentation on the latest development code see the [documentation index][d * [infiniband](./plugins/inputs/infiniband) * [influxdb](./plugins/inputs/influxdb) * [influxdb_listener](./plugins/inputs/influxdb_listener) +* [influxdb_v2_listener](./plugins/inputs/influxdb_v2_listener) +* [intel_rdt](./plugins/inputs/intel_rdt) * [internal](./plugins/inputs/internal) * [interrupts](./plugins/inputs/interrupts) * [ipmi_sensor](./plugins/inputs/ipmi_sensor) @@ -260,11 +262,13 @@ For documentation on the latest development code see the [documentation index][d * [nginx_sts](./plugins/inputs/nginx_sts) * [nginx_upstream_check](./plugins/inputs/nginx_upstream_check) * [nginx_vts](./plugins/inputs/nginx_vts) +* [nsd](./plugins/inputs/nsd) * [nsq_consumer](./plugins/inputs/nsq_consumer) * [nsq](./plugins/inputs/nsq) * [nstat](./plugins/inputs/nstat) * [ntpq](./plugins/inputs/ntpq) * [nvidia_smi](./plugins/inputs/nvidia_smi) +* [opcua](./plugins/inputs/opcua) * [openldap](./plugins/inputs/openldap) * [openntpd](./plugins/inputs/openntpd) * [opensmtpd](./plugins/inputs/opensmtpd) @@ -282,9 +286,11 @@ For documentation on the latest development code see the [documentation index][d * [processes](./plugins/inputs/processes) * [procstat](./plugins/inputs/procstat) * [prometheus](./plugins/inputs/prometheus) (can be used for [Caddy server](./plugins/inputs/prometheus/README.md#usage-for-caddy-http-server)) +* [proxmox](./plugins/inputs/proxmox) * [puppetagent](./plugins/inputs/puppetagent) * [rabbitmq](./plugins/inputs/rabbitmq) * [raindrops](./plugins/inputs/raindrops) +* [ras](./plugins/inputs/ras) * [redfish](./plugins/inputs/redfish) * [redis](./plugins/inputs/redis) * [rethinkdb](./plugins/inputs/rethinkdb) @@ -327,6 +333,7 @@ For documentation on the latest development code see the [documentation index][d * [papertrail](./plugins/inputs/webhooks/papertrail) * [particle](./plugins/inputs/webhooks/particle) * [rollbar](./plugins/inputs/webhooks/rollbar) +* [win_eventlog](./plugins/inputs/win_eventlog) * [win_perf_counters](./plugins/inputs/win_perf_counters) (windows performance counters) * [win_services](./plugins/inputs/win_services) * [wireguard](./plugins/inputs/wireguard) diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 341accefd4eea..9b8b07263b700 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -55,7 +55,7 @@ INFLUX_PASSWORD="monkey123" ``` For InfluxDB OSS 2: ``` -INFLUX_HOST="http://localhost:9999" +INFLUX_HOST="http://localhost:8086" # used to be 9999 INFLUX_TOKEN="replace_with_your_token" INFLUX_ORG="your_username" INFLUX_BUCKET="replace_with_your_bucket_name" @@ -83,7 +83,7 @@ INFLUX_BUCKET="replace_with_your_bucket_name" urls = ["${INFLUX_URL}"] skip_database_creation = ${INFLUX_SKIP_DATABASE_CREATION} password = "${INFLUX_PASSWORD}" - + # For InfluxDB OSS 2: [[outputs.influxdb_v2]] urls = ["${INFLUX_HOST}"] @@ -112,10 +112,10 @@ parsed: urls = "http://localhost:8086" skip_database_creation = true password = "monkey123" - + # For InfluxDB OSS 2: [[outputs.influxdb_v2]] - urls = ["http://127.0.0.1:9999"] + urls = ["http://127.0.0.1:8086"] # double check the port. could be 9999 if using OSS Beta token = "replace_with_your_token" org = "your_username" bucket = "replace_with_your_bucket_name" @@ -499,8 +499,8 @@ patterns is emitted. The inverse of `tagpass`. If a match is found the metric is discarded. This is tested on metrics after they have passed the `tagpass` test. -> NOTE: Due to the way TOML is parsed, `tagpass` and `tagdrop` parameters must be -defined at the *_end_* of the plugin definition, otherwise subsequent plugin config +> NOTE: Due to the way TOML is parsed, `tagpass` and `tagdrop` parameters must be +defined at the *_end_* of the plugin definition, otherwise subsequent plugin config options will be interpreted as part of the tagpass/tagdrop tables. #### Modifiers diff --git a/docs/DATA_FORMATS_OUTPUT.md b/docs/DATA_FORMATS_OUTPUT.md index a8650b250f3fd..2b3e953601218 100644 --- a/docs/DATA_FORMATS_OUTPUT.md +++ b/docs/DATA_FORMATS_OUTPUT.md @@ -11,6 +11,7 @@ plugins. 1. [Prometheus](/plugins/serializers/prometheus) 1. [SplunkMetric](/plugins/serializers/splunkmetric) 1. [Wavefront](/plugins/serializers/wavefront) +1. [ServiceNow Metrics](/plugins/serializers/nowmetric) You will be able to identify the plugins with support by the presence of a `data_format` config option, for example, in the `file` output plugin: diff --git a/docs/LICENSE_OF_DEPENDENCIES.md b/docs/LICENSE_OF_DEPENDENCIES.md index 6b800ee1743b9..d8a942e63e1ad 100644 --- a/docs/LICENSE_OF_DEPENDENCIES.md +++ b/docs/LICENSE_OF_DEPENDENCIES.md @@ -31,6 +31,7 @@ following works: - github.com/cenkalti/backoff [MIT License](https://github.com/cenkalti/backoff/blob/master/LICENSE) - github.com/cespare/xxhash [MIT License](https://github.com/cespare/xxhash/blob/master/LICENSE.txt) - github.com/cisco-ie/nx-telemetry-proto [Apache License 2.0](https://github.com/cisco-ie/nx-telemetry-proto/blob/master/LICENSE) +- github.com/containerd/containerd [Apache License 2.0](https://github.com/containerd/containerd/blob/master/LICENSE) - github.com/couchbase/go-couchbase [MIT License](https://github.com/couchbase/go-couchbase/blob/master/LICENSE) - github.com/couchbase/gomemcached [MIT License](https://github.com/couchbase/gomemcached/blob/master/LICENSE) - github.com/couchbase/goutils [COUCHBASE INC. COMMUNITY EDITION LICENSE](https://github.com/couchbase/goutils/blob/master/LICENSE.md) @@ -93,6 +94,7 @@ following works: - github.com/kubernetes/apimachinery [Apache License 2.0](https://github.com/kubernetes/apimachinery/blob/master/LICENSE) - github.com/leodido/ragel-machinery [MIT License](https://github.com/leodido/ragel-machinery/blob/develop/LICENSE) - github.com/mailru/easyjson [MIT License](https://github.com/mailru/easyjson/blob/master/LICENSE) +- github.com/mattn/go-sqlite3 [MIT License](https://github.com/mattn/go-sqlite3/blob/master/LICENSE) - github.com/matttproud/golang_protobuf_extensions [Apache License 2.0](https://github.com/matttproud/golang_protobuf_extensions/blob/master/LICENSE) - github.com/mdlayher/apcupsd [MIT License](https://github.com/mdlayher/apcupsd/blob/master/LICENSE.md) - github.com/mdlayher/genetlink [MIT License](https://github.com/mdlayher/genetlink/blob/master/LICENSE.md) diff --git a/etc/telegraf.conf b/etc/telegraf.conf index b44f41addcf83..486ec5941101d 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -294,6 +294,9 @@ # ## Instrumentation key of the Application Insights resource. # instrumentation_key = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxx" # +# ## Regions that require endpoint modification https://docs.microsoft.com/en-us/azure/azure-monitor/app/custom-endpoints +# # endpoint_url = "https://dc.services.visualstudio.com/v2/track" +# # ## Timeout for closing (default: 5s). # # timeout = "5s" # @@ -462,6 +465,39 @@ # # no configuration +# # Send telegraf metrics to a Dynatrace environment +# [[outputs.dynatrace]] +# ## For usage with the Dynatrace OneAgent you can omit any configuration, +# ## the only requirement is that the OneAgent is running on the same host. +# ## Only setup environment url and token if you want to monitor a Host without the OneAgent present. +# ## +# ## Your Dynatrace environment URL. +# ## For Dynatrace OneAgent you can leave this empty or set it to "http://127.0.0.1:14499/metrics/ingest" (default) +# ## For Dynatrace SaaS environments the URL scheme is "https://{your-environment-id}.live.dynatrace.com/api/v2/metrics/ingest" +# ## For Dynatrace Managed environments the URL scheme is "https://{your-domain}/e/{your-environment-id}/api/v2/metrics/ingest" +# url = "" +# +# ## Your Dynatrace API token. +# ## Create an API token within your Dynatrace environment, by navigating to Settings > Integration > Dynatrace API +# ## The API token needs data ingest scope permission. When using OneAgent, no API token is required. +# api_token = "" +# +# ## Optional prefix for metric names (e.g.: "telegraf.") +# prefix = "telegraf." +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# +# ## Optional flag for ignoring tls certificate check +# # insecure_skip_verify = false +# +# +# ## Connection timeout, defaults to "5s" if not set. +# timeout = "5s" + + # # Configuration for Elasticsearch to send metrics to. # [[outputs.elasticsearch]] # ## The full HTTP endpoint URL for your Elasticsearch instance @@ -516,6 +552,7 @@ # ## it will enable data resend and update metric points avoiding duplicated metrics with diferent id's # force_document_id = false + # # Send metrics to command as input over stdin # [[outputs.exec]] # ## Command to ingest metrics via stdin. @@ -716,7 +753,7 @@ # ## Multiple URLs can be specified for a single cluster, only ONE of the # ## urls will be written to each interval. # ## ex: urls = ["https://us-west-2-1.aws.cloud2.influxdata.com"] -# urls = ["http://127.0.0.1:9999"] +# urls = ["http://127.0.0.1:8086"] # # ## Token for authentication. # token = "" @@ -2229,6 +2266,27 @@ # # tls_key = "/etc/telegraf/key.pem" # ## If false, skip chain & host verification # # insecure_skip_verify = true +# +# # Feature Options +# # Add namespace variable to limit the namespaces executed on +# # Leave blank to do all +# # disable_query_namespaces = true # default false +# # namespaces = ["namespace1", "namespace2"] +# +# # Enable set level telmetry +# # query_sets = true # default: false +# # Add namespace set combinations to limit sets executed on +# # Leave blank to do all sets +# # sets = ["namespace1/set1", "namespace1/set2", "namespace3"] +# +# # Histograms +# # enable_ttl_histogram = true # default: false +# # enable_object_size_linear_histogram = true # default: false +# +# # by default, aerospike produces a 100 bucket histogram +# # this is not great for most graphing tools, this will allow +# # the ability to squash this to a smaller number of buckets +# # num_histogram_buckets = 100 # default: 10 # # Read Apache status information (mod_status) @@ -2430,9 +2488,9 @@ # ## want to monitor if you have a large number of cgroups, to avoid # ## any cardinality issues. # # paths = [ -# # "/cgroup/memory", -# # "/cgroup/memory/child1", -# # "/cgroup/memory/child2/*", +# # "/sys/fs/cgroup/memory", +# # "/sys/fs/cgroup/memory/child1", +# # "/sys/fs/cgroup/memory/child2/*", # # ] # ## cgroup stat fields, as file names, globs are supported. # ## these file names are appended to each path from above. @@ -2488,6 +2546,13 @@ # ## gaps or overlap in pulled data # interval = "5m" # +# ## Recommended if "delay" and "period" are both within 3 hours of request time. Invalid values will be ignored. +# ## Recently Active feature will only poll for CloudWatch ListMetrics values that occurred within the last 3 Hours. +# ## If enabled, it will reduce total API usage of the CloudWatch ListMetrics API and require less memory to retain. +# ## Do not enable if "period" or "delay" is longer than 3 hours, as it will not return data more than 3 hours old. +# ## See https://docs.aws.amazon.com/AmazonCloudWatch/latest/APIReference/API_ListMetrics.html +# #recently_active = "PT3H" +# # ## Configure the TTL for the internal cache of metrics. # # cache_ttl = "1h" # @@ -2550,6 +2615,13 @@ # ## URI scheme for the Consul server, one of "http", "https" # # scheme = "http" # +# ## Metric version controls the mapping from Consul metrics into +# ## Telegraf metrics. +# ## +# ## example: metric_version = 1; deprecated in 1.15 +# ## metric_version = 2; recommended version +# # metric_version = 1 +# # ## ACL token used in every request # # token = "" # @@ -3182,7 +3254,7 @@ # ## If the response body size exceeds this limit a "body_read_error" will be raised # # response_body_max_size = "32MiB" # -# ## Optional substring or regex match in body of the response +# ## Optional substring or regex match in body of the response (case sensitive) # # response_string_match = "\"service_status\": \"up\"" # # response_string_match = "ok" # # response_string_match = "\".*_status\".?:.?\"up\"" @@ -3916,17 +3988,18 @@ # ## |---BA, DCBA - Little Endian # ## |---BADC - Mid-Big Endian # ## |---CDAB - Mid-Little Endian -# ## data_type - INT16, UINT16, INT32, UINT32, INT64, UINT64, FLOAT32, FLOAT32-IEEE (the IEEE 754 binary representation) +# ## data_type - INT16, UINT16, INT32, UINT32, INT64, UINT64, FLOAT32-IEEE (the IEEE 754 binary representation) +# ## FLOAT32, FIXED, UFIXED (fixed-point representation on input) # ## scale - the final numeric variable representation # ## address - variable address # # holding_registers = [ -# { name = "power_factor", byte_order = "AB", data_type = "FLOAT32", scale=0.01, address = [8]}, -# { name = "voltage", byte_order = "AB", data_type = "FLOAT32", scale=0.1, address = [0]}, -# { name = "energy", byte_order = "ABCD", data_type = "FLOAT32", scale=0.001, address = [5,6]}, -# { name = "current", byte_order = "ABCD", data_type = "FLOAT32", scale=0.001, address = [1,2]}, -# { name = "frequency", byte_order = "AB", data_type = "FLOAT32", scale=0.1, address = [7]}, -# { name = "power", byte_order = "ABCD", data_type = "FLOAT32", scale=0.1, address = [3,4]}, +# { name = "power_factor", byte_order = "AB", data_type = "FIXED", scale=0.01, address = [8]}, +# { name = "voltage", byte_order = "AB", data_type = "FIXED", scale=0.1, address = [0]}, +# { name = "energy", byte_order = "ABCD", data_type = "FIXED", scale=0.001, address = [5,6]}, +# { name = "current", byte_order = "ABCD", data_type = "FIXED", scale=0.001, address = [1,2]}, +# { name = "frequency", byte_order = "AB", data_type = "UFIXED", scale=0.1, address = [7]}, +# { name = "power", byte_order = "ABCD", data_type = "UFIXED", scale=0.1, address = [3,4]}, # ] # input_registers = [ # { name = "tank_level", byte_order = "AB", data_type = "INT16", scale=1.0, address = [0]}, @@ -4283,6 +4356,25 @@ # # insecure_skip_verify = false +# # A plugin to collect stats from the NSD authoritative DNS name server +# [[inputs.nsd]] +# ## Address of server to connect to, optionally ':port'. Defaults to the +# ## address in the nsd config file. +# server = "127.0.0.1:8953" +# +# ## If running as a restricted user you can prepend sudo for additional access: +# # use_sudo = false +# +# ## The default location of the nsd-control binary can be overridden with: +# # binary = "/usr/sbin/nsd-control" +# +# ## The default location of the nsd config file can be overridden with: +# # config_file = "/etc/nsd/nsd.conf" +# +# ## The default timeout of 1s can be overridden with: +# # timeout = "1s" + + # # Read NSQ topic and channel statistics. # [[inputs.nsq]] # ## An array of NSQD HTTP API endpoints @@ -4323,6 +4415,61 @@ # # timeout = "5s" +# # Retrieve data from OPCUA devices +# [[inputs.opcua]] +# [[inputs.opcua]] +# ## Device name +# # name = "localhost" +# # +# ## OPC UA Endpoint URL +# # endpoint = "opc.tcp://localhost:4840" +# # +# ## Maximum time allowed to establish a connect to the endpoint. +# # connect_timeout = "10s" +# # +# ## Maximum time allowed for a request over the estabilished connection. +# # request_timeout = "5s" +# # +# ## Security policy, one of "None", "Basic128Rsa15", "Basic256", +# ## "Basic256Sha256", or "auto" +# # security_policy = "auto" +# # +# ## Security mode, one of "None", "Sign", "SignAndEncrypt", or "auto" +# # security_mode = "auto" +# # +# ## Path to cert.pem. Required when security mode or policy isn't "None". +# ## If cert path is not supplied, self-signed cert and key will be generated. +# # certificate = "/etc/telegraf/cert.pem" +# # +# ## Path to private key.pem. Required when security mode or policy isn't "None". +# ## If key path is not supplied, self-signed cert and key will be generated. +# # private_key = "/etc/telegraf/key.pem" +# # +# ## Authentication Method, one of "Certificate", "UserName", or "Anonymous". To +# ## authenticate using a specific ID, select 'Certificate' or 'UserName' +# # auth_method = "Anonymous" +# # +# ## Username. Required for auth_method = "UserName" +# # username = "" +# # +# ## Password. Required for auth_method = "UserName" +# # password = "" +# # +# ## Node ID configuration +# ## name - the variable name +# ## namespace - integer value 0 thru 3 +# ## identifier_type - s=string, i=numeric, g=guid, b=opaque +# ## identifier - tag as shown in opcua browser +# ## data_type - boolean, byte, short, int, uint, uint16, int16, +# ## uint32, int32, float, double, string, datetime, number +# ## Example: +# ## {name="ProductUri", namespace="0", identifier_type="i", identifier="2262", data_type="string", description="http://open62541.org"} +# nodes = [ +# {name="", namespace="", identifier_type="", identifier="", data_type="", description=""}, +# {name="", namespace="", identifier_type="", identifier="", data_type="", description=""}, +# ] + + # # OpenLDAP cn=Monitor plugin # [[inputs.openldap]] # host = "localhost" @@ -4575,6 +4722,23 @@ # # pid_finder = "pgrep" +# # Provides metrics from Proxmox nodes (Proxmox Virtual Environment > 6.2). +# [[inputs.proxmox]] +# ## API connection configuration. The API token was introduced in Proxmox v6.2. Required permissions for user and token: PVEAuditor role on /. +# base_url = "https://localhost:8006/api2/json" +# api_token = "USER@REALM!TOKENID=UUID" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# insecure_skip_verify = false +# +# # HTTP response timeout (default: 5s) +# response_timeout = "5s" + + # # Reads last_run_summary.yaml file and converts to measurements # [[inputs.puppetagent]] # ## Location of puppet last run summary file @@ -4641,6 +4805,13 @@ # urls = ["http://localhost:8080/_raindrops"] +# # RAS plugin exposes counter metrics for Machine Check Errors provided by RASDaemon (sqlite3 output is required). +# [[inputs.ras]] +# ## Optional path to RASDaemon sqlite3 database. +# ## Default: /var/lib/rasdaemon/ras-mc_event.db +# # db_path = "" + + # # Read CPU, Fans, Powersupply and Voltage metrics of hardware server through redfish APIs # [[inputs.redfish]] # ## Server url @@ -4677,6 +4848,12 @@ # ## If no port is specified, 6379 is used # servers = ["tcp://localhost:6379"] # +# ## Optional. Specify redis commands to retrieve values +# # [[inputs.redis.commands]] +# # command = ["get", "sample-key"] +# # field = "sample-key-value" +# # type = "string" +# # ## specify server password # # password = "s#cr@t%" # @@ -4745,16 +4922,24 @@ # # Read metrics from storage devices supporting S.M.A.R.T. # [[inputs.smart]] # ## Optionally specify the path to the smartctl executable -# # path = "/usr/bin/smartctl" +# # path_smartctl = "/usr/bin/smartctl" # -# ## On most platforms smartctl requires root access. -# ## Setting 'use_sudo' to true will make use of sudo to run smartctl. -# ## Sudo must be configured to to allow the telegraf user to run smartctl +# ## Optionally specify the path to the nvme-cli executable +# # path_nvme = "/usr/bin/nvme" +# +# ## Optionally specify if vendor specific attributes should be propagated for NVMe disk case +# ## ["auto-on"] - automatically find and enable additional vendor specific disk info +# ## ["vendor1", "vendor2", ...] - e.g. "Intel" enable additional Intel specific disk info +# # enable_extensions = ["auto-on"] +# +# ## On most platforms used cli utilities requires root access. +# ## Setting 'use_sudo' to true will make use of sudo to run smartctl or nvme-cli. +# ## Sudo must be configured to allow the telegraf user to run smartctl or nvme-cli # ## without a password. # # use_sudo = false # # ## Skip checking disks in this power mode. Defaults to -# ## "standby" to not wake up disks that have stoped rotating. +# ## "standby" to not wake up disks that have stopped rotating. # ## See --nocheck in the man pages for smartctl. # ## smartctl version 5.41 and 5.42 have faulty detection of # ## power mode and might require changing this value to @@ -4765,16 +4950,15 @@ # ## information from each drive into the 'smart_attribute' measurement. # # attributes = false # -# ## Optionally specify devices to exclude from reporting. +# ## Optionally specify devices to exclude from reporting if disks auto-discovery is performed. # # excludes = [ "/dev/pass6" ] # # ## Optionally specify devices and device type, if unset -# ## a scan (smartctl --scan) for S.M.A.R.T. devices will -# ## done and all found will be included except for the -# ## excluded in excludes. -# # devices = [ "/dev/ada0 -d atacam" ] +# ## a scan (smartctl --scan and smartctl --scan -d nvme) for S.M.A.R.T. devices will be done +# ## and all found will be included except for the excluded in excludes. +# # devices = [ "/dev/ada0 -d atacam", "/dev/nvme0"] # -# ## Timeout for the smartctl command to complete. +# ## Timeout for the cli command to complete. # # timeout = "30s" @@ -4791,6 +4975,9 @@ # ## SNMP version; can be 1, 2, or 3. # # version = 2 # +# ## Agent host tag; the tag used to reference the source host +# # agent_host_tag = "agent_host" +# # ## SNMP community string. # # community = "public" # @@ -4952,6 +5139,15 @@ # # "Server=192.168.1.10;Port=1433;User Id=;Password=;app name=telegraf;log=1;", # # ] # +# ## This enables a specific set of queries depending on the database type. If specified, it replaces azuredb = true/false and query_version = 2 +# ## In the config file, the sql server plugin section should be repeated each with a set of servers for a specific database_type. +# ## Possible values for database_type are +# ## "AzureSQLDB" +# ## "SQLServer" +# ## "AzureSQLManagedInstance" +# # database_type = "AzureSQLDB" +# +# # ## Optional parameter, setting this to 2 will use a new version # ## of the collection queries that break compatibility with the original # ## dashboards. @@ -4972,6 +5168,7 @@ # ## - SqlRequests # ## - VolumeSpace # ## - Cpu +# # ## Version 1: # ## - PerformanceCounters # ## - WaitStatsCategorized @@ -4984,6 +5181,11 @@ # ## - VolumeSpace # ## - PerformanceMetrics # +# +# ## Queries enabled by default for specific Database Type +# ## database_type = AzureSQLDB +# ## AzureDBWaitStats, AzureDBResourceStats, AzureDBResourceGovernance, sqlAzureDBDatabaseIO +# # ## A list of queries to include. If not specified, all the above listed queries are used. # # include_query = [] # @@ -5346,6 +5548,32 @@ ############################################################################### +# # Intel Resource Director Technology plugin +# [[inputs.IntelRDT]] +# ## Optionally set sampling interval to Nx100ms. +# ## This value is propagated to pqos tool. Interval format is defined by pqos itself. +# ## If not provided or provided 0, will be set to 10 = 10x100ms = 1s. +# # sampling_interval = "10" +# +# ## Optionally specify the path to pqos executable. +# ## If not provided, auto discovery will be performed. +# # pqos_path = "/usr/local/bin/pqos" +# +# ## Optionally specify if IPC and LLC_Misses metrics shouldn't be propagated. +# ## If not provided, default value is false. +# # shortened_metrics = false +# +# ## Specify the list of groups of CPU core(s) to be provided as pqos input. +# ## Mandatory if processes aren't set and forbidden if processes are specified. +# ## e.g. ["0-3", "4,5,6"] or ["1-3,4"] +# # cores = ["0-3"] +# +# ## Specify the list of processes for which Metrics will be collected. +# ## Mandatory if cores aren't set and forbidden if cores are specified. +# ## e.g. ["qemu", "pmd"] +# # processes = ["process"] + + # # AMQP consumer plugin # [[inputs.amqp_consumer]] # ## Broker to consume from. @@ -5476,7 +5704,7 @@ # # Read metrics from one or many ClickHouse servers # [[inputs.clickhouse]] # ## Username for authorization on ClickHouse server -# ## example: user = "default"" +# ## example: username = "default"" # username = "default" # # ## Password for authorization on ClickHouse server @@ -5993,6 +6221,35 @@ # # basic_password = "barfoo" +# # Accept metrics over InfluxDB 2.x HTTP API +# [[inputs.influxdb_v2_listener]] +# ## Address and port to host InfluxDB listener on +# ## (Double check the port. Could be 9999 if using OSS Beta) +# service_address = ":8086" +# +# ## Maximum allowed HTTP request body size in bytes. +# ## 0 means to use the default of 32MiB. +# # max_body_size = "32MiB" +# +# ## Optional tag to determine the bucket. +# ## If the write has a bucket in the query string then it will be kept in this tag name. +# ## This tag can be used in downstream outputs. +# ## The default value of nothing means it will be off and the database will not be recorded. +# # bucket_tag = "" +# +# ## Set one or more allowed client CA certificate file names to +# ## enable mutually authenticated TLS connections +# # tls_allowed_cacerts = ["/etc/telegraf/clientca.pem"] +# +# ## Add service certificate and key +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# +# ## Optional token to accept for HTTP authentication. +# ## You probably want to make sure you have TLS configured above for this. +# # token = "some-long-shared-secret-token" + + # # Read JTI OpenConfig Telemetry from listed sensors # [[inputs.jti_openconfig_telemetry]] # ## List of device addresses to collect telemetry from @@ -6848,6 +7105,27 @@ # ## more about them here: # ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md # data_format = "influx" +# +# ## multiline parser/codec +# ## https://www.elastic.co/guide/en/logstash/2.4/plugins-filters-multiline.html +# #[inputs.tail.multiline] +# ## The pattern should be a regexp which matches what you believe to be an +# ## indicator that the field is part of an event consisting of multiple lines of log data. +# #pattern = "^\s" +# +# ## This field must be either "previous" or "next". +# ## If a line matches the pattern, "previous" indicates that it belongs to the previous line, +# ## whereas "next" indicates that the line belongs to the next one. +# #match_which_line = "previous" +# +# ## The invert_match field can be true or false (defaults to false). +# ## If true, a message not matching the pattern will constitute a match of the multiline +# ## filter and the what will be applied. (vice-versa is also true) +# #invert_match = false +# +# ## After the specified timeout, this plugin sends a multiline event even if no new pattern +# ## is found to start a new event. The default timeout is 5s. +# #timeout = 5s # # Generic TCP listener diff --git a/go.mod b/go.mod index b91c39b98c650..2cae7859a7d66 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/influxdata/telegraf -go 1.13 +go 1.15 require ( cloud.google.com/go v0.53.0 @@ -10,6 +10,7 @@ require ( collectd.org v0.3.0 github.com/Azure/azure-event-hubs-go/v3 v3.2.0 github.com/Azure/azure-storage-queue-go v0.0.0-20181215014128-6ed74e755687 + github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 // indirect github.com/Azure/go-autorest/autorest v0.9.3 github.com/Azure/go-autorest/autorest/azure/auth v0.4.2 github.com/BurntSushi/toml v0.3.1 @@ -25,7 +26,7 @@ require ( github.com/aristanetworks/glog v0.0.0-20191112221043-67e8567f59f3 // indirect github.com/aristanetworks/goarista v0.0.0-20190325233358-a123909ec740 github.com/armon/go-metrics v0.3.0 // indirect - github.com/aws/aws-sdk-go v1.30.9 + github.com/aws/aws-sdk-go v1.33.12 github.com/benbjohnson/clock v1.0.3 github.com/bitly/go-hostpool v0.1.0 // indirect github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 @@ -33,6 +34,7 @@ require ( github.com/cenkalti/backoff v2.0.0+incompatible // indirect github.com/cisco-ie/nx-telemetry-proto v0.0.0-20190531143454-82441e232cf6 github.com/cockroachdb/apd v1.1.0 // indirect + github.com/containerd/containerd v1.4.1 // indirect github.com/couchbase/go-couchbase v0.0.0-20180501122049-16db1f1fe037 github.com/couchbase/gomemcached v0.0.0-20180502221210-0da75df14530 // indirect github.com/couchbase/goutils v0.0.0-20180530154633-e865a1461c8a // indirect @@ -40,7 +42,7 @@ require ( github.com/dgrijalva/jwt-go v3.2.0+incompatible github.com/dimchansky/utfbom v1.1.0 github.com/docker/distribution v2.6.0-rc.1.0.20170726174610-edc3ab29cdff+incompatible // indirect - github.com/docker/docker v1.4.2-0.20180327123150-ed7b6428c133 + github.com/docker/docker v17.12.0-ce-rc1.0.20200916142827-bd33bbf0497b+incompatible github.com/docker/go-connections v0.3.0 // indirect github.com/docker/go-units v0.3.3 // indirect github.com/docker/libnetwork v0.8.0-dev.2.0.20181012153825-d7b61745d166 @@ -50,7 +52,7 @@ require ( github.com/glinton/ping v0.1.4-0.20200311211934-5ac87da8cd96 github.com/go-logfmt/logfmt v0.4.0 github.com/go-ole/go-ole v1.2.1 // indirect - github.com/go-redis/redis v6.12.0+incompatible + github.com/go-redis/redis v6.15.9+incompatible github.com/go-sql-driver/mysql v1.5.0 github.com/goburrow/modbus v0.1.0 github.com/goburrow/serial v0.1.0 // indirect @@ -60,11 +62,9 @@ require ( github.com/golang/geo v0.0.0-20190916061304-5b978397cfec github.com/golang/protobuf v1.3.5 github.com/google/go-cmp v0.4.0 - github.com/google/go-github v17.0.0+incompatible - github.com/google/go-querystring v1.0.0 // indirect + github.com/google/go-github/v32 v32.1.0 github.com/gopcua/opcua v0.1.12 github.com/gorilla/mux v1.6.2 - github.com/gotestyourself/gotestyourself v2.2.0+incompatible // indirect github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed // indirect github.com/harlow/kinesis-consumer v0.3.1-0.20181230152818-2f58b136fee0 github.com/hashicorp/consul v1.2.1 @@ -88,10 +88,12 @@ require ( github.com/leesper/go_rng v0.0.0-20190531154944-a612b043e353 // indirect github.com/lib/pq v1.3.0 // indirect github.com/mailru/easyjson v0.0.0-20180717111219-efc7eb8984d6 // indirect + github.com/mattn/go-sqlite3 v1.14.0 github.com/matttproud/golang_protobuf_extensions v1.0.1 github.com/mdlayher/apcupsd v0.0.0-20190314144147-eb3dd99a75fe github.com/miekg/dns v1.0.14 github.com/mitchellh/go-testing-interface v1.0.0 // indirect + github.com/morikuni/aec v1.0.0 // indirect github.com/multiplay/go-ts3 v1.0.0 github.com/naoina/go-stringutil v0.1.0 // indirect github.com/nats-io/nats-server/v2 v2.1.4 @@ -108,10 +110,11 @@ require ( github.com/prometheus/client_golang v1.5.1 github.com/prometheus/client_model v0.2.0 github.com/prometheus/common v0.9.1 + github.com/prometheus/procfs v0.0.8 github.com/safchain/ethtool v0.0.0-20200218184317-f459e2d13664 github.com/samuel/go-zookeeper v0.0.0-20180130194729-c4fab1ac1bec // indirect github.com/satori/go.uuid v1.2.1-0.20181028125025-b2ce2384e17b // indirect - github.com/shirou/gopsutil v2.20.7+incompatible + github.com/shirou/gopsutil v2.20.9+incompatible github.com/shopspring/decimal v0.0.0-20200105231215-408a2507e114 // indirect github.com/sirupsen/logrus v1.4.2 github.com/soniah/gosnmp v1.25.0 @@ -128,10 +131,10 @@ require ( github.com/wvanbergen/kafka v0.0.0-20171203153745-e2edea948ddf github.com/wvanbergen/kazoo-go v0.0.0-20180202103751-f72d8611297a // indirect github.com/yuin/gopher-lua v0.0.0-20180630135845-46796da1b0b4 // indirect - go.starlark.net v0.0.0-20191227232015-caa3e9aa5008 + go.starlark.net v0.0.0-20200901195727-6e684ef5eeee golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6 // indirect golang.org/x/lint v0.0.0-20200302205851-738671d3881b // indirect - golang.org/x/net v0.0.0-20200301022130-244492dfa37a + golang.org/x/net v0.0.0-20200707034311-ab3426394381 golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a golang.org/x/sys v0.0.0-20200826173525-f9321e4c35a6 diff --git a/go.sum b/go.sum index ed84b5f2556b4..5973e475a3d35 100644 --- a/go.sum +++ b/go.sum @@ -44,6 +44,8 @@ github.com/Azure/azure-storage-queue-go v0.0.0-20181215014128-6ed74e755687 h1:7M github.com/Azure/azure-storage-queue-go v0.0.0-20181215014128-6ed74e755687/go.mod h1:K6am8mT+5iFXgingS9LUc7TmbsW6XBw3nxaRyaMyWc8= github.com/Azure/go-amqp v0.12.6 h1:34yItuwhA/nusvq2sPSNPQxZLCf/CtaogYH8n578mnY= github.com/Azure/go-amqp v0.12.6/go.mod h1:qApuH6OFTSKZFmCOxccvAv5rLizBQf4v8pRmG138DPo= +github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 h1:w+iIsaOQNcT7OZ575w+acHgRric5iCyQh+xv+KJ4HB8= +github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8= github.com/Azure/go-autorest/autorest v0.9.0/go.mod h1:xyHB1BMZT0cuDHU7I0+g046+BFDTQ8rEZB0s4Yfa6bI= github.com/Azure/go-autorest/autorest v0.9.3 h1:OZEIaBbMdUE/Js+BQKlpO81XlISgipr6yDJ+PSwsgi4= github.com/Azure/go-autorest/autorest v0.9.3/go.mod h1:GsRuLYvwzLjjjRoWEIyMUaYq8GNUx2nRB378IPt/1p0= @@ -81,6 +83,7 @@ github.com/Microsoft/ApplicationInsights-Go v0.4.2/go.mod h1:CukZ/G66zxXtI+h/VcV github.com/Microsoft/go-winio v0.4.9 h1:3RbgqgGVqmcpbOiwrjbVtDHLlJBGF6aE+yHmNtBNsFQ= github.com/Microsoft/go-winio v0.4.9/go.mod h1:VhR8bwka0BXejwEJY73c50VrPtXAaKcyvVC4A4RozmA= github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ= +github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= @@ -101,6 +104,7 @@ github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4 h1:Hs82Z41s6SdL1C github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/amir/raidman v0.0.0-20170415203553-1ccc43bfb9c9 h1:FXrPTd8Rdlc94dKccl7KPmdmIbVh/OjelJ8/vgMRzcQ= github.com/amir/raidman v0.0.0-20170415203553-1ccc43bfb9c9/go.mod h1:eliMa/PW+RDr2QLWRmLH1R1ZA4RInpmvOzDDXtaIZkc= +github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= github.com/apache/thrift v0.12.0 h1:pODnxUFNcjP9UTLZGTdeh+j16A8lJbRvD3rOtrk/7bs= github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= github.com/aristanetworks/glog v0.0.0-20191112221043-67e8567f59f3 h1:Bmjk+DjIi3tTAU0wxGaFbfjGUqlxxSXARq9A96Kgoos= @@ -110,8 +114,8 @@ github.com/aristanetworks/goarista v0.0.0-20190325233358-a123909ec740/go.mod h1: github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= github.com/armon/go-metrics v0.3.0 h1:B7AQgHi8QSEi4uHu7Sbsga+IJDU+CENgjxoo81vDUqU= github.com/armon/go-metrics v0.3.0/go.mod h1:zXjbSimjXTd7vOpY8B0/2LpvNvDoXBuplAD+gJD3GYs= -github.com/aws/aws-sdk-go v1.30.9 h1:DntpBUKkchINPDbhEzDRin1eEn1TG9TZFlzWPf0i8to= -github.com/aws/aws-sdk-go v1.30.9/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0= +github.com/aws/aws-sdk-go v1.33.12 h1:eydMoSwfrSTD9PWKUJOiDL7+/UwDW8AjInUGVE5Llh4= +github.com/aws/aws-sdk-go v1.33.12/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0= github.com/benbjohnson/clock v1.0.3 h1:vkLuvpK4fmtSCuo60+yC63p7y0BmQ8gm5ZXGuBCJyXg= github.com/benbjohnson/clock v1.0.3/go.mod h1:bGMdMPoPVvcYyt1gHDf4J2KE153Yf9BuiUKYMaxlTDM= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973 h1:xJ4a3vCFaGF/jqvzLMYoU8P317H5OQ+Via4RmuPwCS0= @@ -141,6 +145,8 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cockroachdb/apd v1.1.0 h1:3LFP3629v+1aKXU5Q37mxmRxX/pIu1nijXydLShEq5I= github.com/cockroachdb/apd v1.1.0/go.mod h1:8Sl8LxpKi29FqWXR16WEFZRNSz3SoPzUzeMeY4+DwBQ= +github.com/containerd/containerd v1.4.1 h1:pASeJT3R3YyVn+94qEPk0SnU1OQ20Jd/T+SPKy9xehY= +github.com/containerd/containerd v1.4.1/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= github.com/couchbase/go-couchbase v0.0.0-20180501122049-16db1f1fe037 h1:Dbz60fpCq04vRxVVVJLbQuL0G7pRt0Gyo2BkozFc4SQ= github.com/couchbase/go-couchbase v0.0.0-20180501122049-16db1f1fe037/go.mod h1:TWI8EKQMs5u5jLKW/tsb9VwauIrMIxQG1r5fMsswK5U= github.com/couchbase/gomemcached v0.0.0-20180502221210-0da75df14530 h1:F8nmbiuX+gCz9xvWMi6Ak8HQntB4ATFXP46gaxifbp4= @@ -161,8 +167,8 @@ github.com/dimchansky/utfbom v1.1.0 h1:FcM3g+nofKgUteL8dm/UpdRXNC9KmADgTpLKsu0TR github.com/dimchansky/utfbom v1.1.0/go.mod h1:rO41eb7gLfo8SF1jd9F8HplJm1Fewwi4mQvIirEdv+8= github.com/docker/distribution v2.6.0-rc.1.0.20170726174610-edc3ab29cdff+incompatible h1:357nGVUC8gSpeSc2Axup8HfrfTLLUfWfCsCUhiQSKIg= github.com/docker/distribution v2.6.0-rc.1.0.20170726174610-edc3ab29cdff+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= -github.com/docker/docker v1.4.2-0.20180327123150-ed7b6428c133 h1:Kus8nU6ctI/u/l86ljUJl6GpUtmO7gtD/krn4u5dr0M= -github.com/docker/docker v1.4.2-0.20180327123150-ed7b6428c133/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/docker v17.12.0-ce-rc1.0.20200916142827-bd33bbf0497b+incompatible h1:SiUATuP//KecDjpOK2tvZJgeScYAklvyjfK8JZlU6fo= +github.com/docker/docker v17.12.0-ce-rc1.0.20200916142827-bd33bbf0497b+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/go-connections v0.3.0 h1:3lOnM9cSzgGwx8VfK/NGOW5fLQ0GjIlCkaktF+n1M6o= github.com/docker/go-connections v0.3.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= github.com/docker/go-units v0.3.3 h1:Xk8S3Xj5sLGlG5g67hJmYMmUgXv5N4PhkjJHHqrwnTk= @@ -214,8 +220,8 @@ github.com/go-openapi/jsonpointer v0.0.0-20160704185906-46af16f9f7b1/go.mod h1:+ github.com/go-openapi/jsonreference v0.0.0-20160704190145-13c6e3589ad9/go.mod h1:W3Z9FmVs9qj+KR4zFKmDPGiLdk1D9Rlm7cyMvf57TTg= github.com/go-openapi/spec v0.0.0-20160808142527-6aced65f8501/go.mod h1:J8+jY1nAiCcj+friV/PDoE1/3eeccG9LYBs0tYvLOWc= github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dpr1UfpPtxFw+EFuQ41HhCWZfha5jSVRG7C7I= -github.com/go-redis/redis v6.12.0+incompatible h1:s+64XI+z/RXqGHz2fQSgRJOEwqqSXeX3dliF7iVkMbE= -github.com/go-redis/redis v6.12.0+incompatible/go.mod h1:NAIEuMOZ/fxfXJIrKDQDz8wamY7mA7PouImQ2Jvg6kA= +github.com/go-redis/redis v6.15.9+incompatible h1:K0pv1D7EQUjfyoMql+r/jZqCLizCGKFlFgcHWWmHQjg= +github.com/go-redis/redis v6.15.9+incompatible/go.mod h1:NAIEuMOZ/fxfXJIrKDQDz8wamY7mA7PouImQ2Jvg6kA= github.com/go-sql-driver/mysql v1.5.0 h1:ozyZYNQW3x3HtqT1jira07DN2PArx2v7/mN66gGcHOs= github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= @@ -271,8 +277,8 @@ github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4= github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-github v17.0.0+incompatible h1:N0LgJ1j65A7kfXrZnUDaYCs/Sf4rEjNlfyDHW9dolSY= -github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ= +github.com/google/go-github/v32 v32.1.0 h1:GWkQOdXqviCPx7Q7Fj+KyPoGm4SwHRh8rheoPhd27II= +github.com/google/go-github/v32 v32.1.0/go.mod h1:rIEpZD9CTDQwDK9GDrtMTycQNA4JU3qBsCizh3q2WCI= github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASuANWTrk= github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= github.com/google/gofuzz v0.0.0-20161122191042-44d81051d367/go.mod h1:HP5RmnzzSNb993RKQDq4+1A4ia9nllfqcQFTQJedwGI= @@ -298,8 +304,6 @@ github.com/gorilla/context v1.1.1 h1:AWwleXJkX/nhcU9bZSnZoi3h/qGYqQAGhq6zZe/aQW8 github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= github.com/gorilla/mux v1.6.2 h1:Pgr17XVTNXAk3q/r4CpKzC5xBM/qW1uVLV+IhRZpIIk= github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= -github.com/gotestyourself/gotestyourself v2.2.0+incompatible h1:AQwinXlbQR2HvPjQZOmDhRqsv5mZf+Jb1RnSLxcqZcI= -github.com/gotestyourself/gotestyourself v2.2.0+incompatible/go.mod h1:zZKM6oeNM8k+FRljX1mnzVYeS8wiGgQyvST1/GafPbY= github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed h1:5upAirOpQc1Q53c0bnx2ufif5kANL7bfZWcc6VJWJd8= github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed/go.mod h1:tMWxXQ9wFIaZeTI9F+hmhFiGpFmhOHzyShyFUhRm0H4= github.com/harlow/kinesis-consumer v0.3.1-0.20181230152818-2f58b136fee0 h1:U0KvGD9CJIl1nbgu9yLsfWxMT6WqL8fG0IBB7RvOZZQ= @@ -401,6 +405,8 @@ github.com/lib/pq v1.3.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/mailru/easyjson v0.0.0-20160728113105-d5b7844b561a/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.0.0-20180717111219-efc7eb8984d6 h1:8/+Y8SKf0xCZ8cCTfnrMdY7HNzlEjPAt3bPjalNb6CA= github.com/mailru/easyjson v0.0.0-20180717111219-efc7eb8984d6/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/mattn/go-sqlite3 v1.14.0 h1:mLyGNKR8+Vv9CAU7PphKa2hkEqxxhn8i32J6FPj1/QA= +github.com/mattn/go-sqlite3 v1.14.0/go.mod h1:JIl7NbARA7phWnGvh0LKTyg7S9BA+6gx71ShQilpsus= github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/mdlayher/apcupsd v0.0.0-20190314144147-eb3dd99a75fe h1:yMrL+YorbzaBpj/h3BbLMP+qeslPZYMbzcpHFBNy1Yk= @@ -426,6 +432,8 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJ github.com/modern-go/reflect2 v0.0.0-20180320133207-05fbef0ca5da/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= +github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/multiplay/go-ts3 v1.0.0 h1:loxtEFqvYtpoGh1jOqEt6aDzctYuQsi3vb3dMpvWiWw= github.com/multiplay/go-ts3 v1.0.0/go.mod h1:14S6cS3fLNT3xOytrA/DkRyAFNuQLMLEqOYAsf87IbQ= github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= @@ -522,8 +530,8 @@ github.com/samuel/go-zookeeper v0.0.0-20180130194729-c4fab1ac1bec h1:6ncX5ko6B9L github.com/samuel/go-zookeeper v0.0.0-20180130194729-c4fab1ac1bec/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= -github.com/shirou/gopsutil v2.20.7+incompatible h1:Ymv4OD12d6zm+2yONe39VSmp2XooJe8za7ngOLW/o/w= -github.com/shirou/gopsutil v2.20.7+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= +github.com/shirou/gopsutil v2.20.9+incompatible h1:msXs2frUV+O/JLva9EDLpuJ84PrFsdCTCQex8PUdtkQ= +github.com/shirou/gopsutil v2.20.9+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= github.com/shopspring/decimal v0.0.0-20200105231215-408a2507e114 h1:Pm6R878vxWWWR+Sa3ppsLce/Zq+JNTs6aVvRu13jv9A= github.com/shopspring/decimal v0.0.0-20200105231215-408a2507e114/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= github.com/sirupsen/logrus v1.2.0 h1:juTguoYk5qI21pwyTXY3B3Y5cOTH3ZUyZCg1v/mihuo= @@ -583,8 +591,8 @@ go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.3 h1:8sGtKOrtQqkN1bp2AtX+misvLIlOmsEsNd+9NIcPEm8= go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= -go.starlark.net v0.0.0-20191227232015-caa3e9aa5008 h1:PUpdYMZifLwPlUnFfT/2Hkqr7p0SSpOR7xrDiPaD52k= -go.starlark.net v0.0.0-20191227232015-caa3e9aa5008/go.mod h1:nmDLcffg48OtT/PSW0Hg7FvpRQsQh5OSqIylirxKC7o= +go.starlark.net v0.0.0-20200901195727-6e684ef5eeee h1:N4eRtIIYHZE5Mw/Km/orb+naLdwAe+lv2HCxRR5rEBw= +go.starlark.net v0.0.0-20200901195727-6e684ef5eeee/go.mod h1:f0znQkUKRrkk36XxWbGjMqQM8wGv/xHBVE2qc3B5oFU= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= @@ -639,6 +647,7 @@ golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzB golang.org/x/mod v0.2.0 h1:KU7oHjnv3XNWfa5COkzUifxZmxp1TyI7ImMXqFxLwvQ= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20170114055629-f2499483f923/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -667,8 +676,10 @@ golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI= golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200301022130-244492dfa37a h1:GuSPYbZzB5/dcLNCwLQLsg3obCJtX9IJhpXkvY7kzk0= -golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e h1:3G+cUijn7XD+S4eJFddp53Pv7+slrESplyjG25HgL+k= +golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU= +golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421 h1:Wo7BWFiOk0QRFMLYMqJGFMd9CgUAcGx7V+qEg/h5IBI= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -708,8 +719,6 @@ golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456 h1:ng0gs1AKnRRuEMZoTLLlbOd+C17zUDepwGQBb/n+JVg= golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191002063906-3421d5a6bb1c h1:Vco5b+cuG5NNfORVxZy6bYZQ7rsigisU1WQFkvQ0L5E= -golang.org/x/sys v0.0.0-20191002063906-3421d5a6bb1c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191003212358-c178f38b412c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -719,7 +728,10 @@ golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4 h1:sfkvUWPNGwSV+8/fNqctR5lS2AqCSqYwXdrjCxp/dXo= golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd h1:xhmwyvizuTgC2qz7ZlMluP20uW+C3Rm0FD/WLDX8884= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200826173525-f9321e4c35a6 h1:DvY3Zkh7KabQE/kfzMvYvKirSiguP9Q/veMtkYyf0o8= golang.org/x/sys v0.0.0-20200826173525-f9321e4c35a6/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/models/log.go b/models/log.go index 2e42a516c2171..c0b52a812d924 100644 --- a/models/log.go +++ b/models/log.go @@ -79,7 +79,7 @@ func logName(pluginType, name, alias string) string { return pluginType + "." + name + "::" + alias } -func setLoggerOnPlugin(i interface{}, log telegraf.Logger) { +func SetLoggerOnPlugin(i interface{}, log telegraf.Logger) { valI := reflect.ValueOf(i) if valI.Type().Kind() != reflect.Ptr { @@ -96,6 +96,9 @@ func setLoggerOnPlugin(i interface{}, log telegraf.Logger) { if field.CanSet() { field.Set(reflect.ValueOf(log)) } + default: + log.Debugf("Plugin %q defines a 'Log' field on its struct of an unexpected type %q. Expected telegraf.Logger", + valI.Type().Name(), field.Type().String()) } return diff --git a/models/running_aggregator.go b/models/running_aggregator.go index ad054be76f6c1..cbfb9889b87e5 100644 --- a/models/running_aggregator.go +++ b/models/running_aggregator.go @@ -35,7 +35,7 @@ func NewRunningAggregator(aggregator telegraf.Aggregator, config *AggregatorConf aggErrorsRegister.Incr(1) }) - setLoggerOnPlugin(aggregator, logger) + SetLoggerOnPlugin(aggregator, logger) return &RunningAggregator{ Aggregator: aggregator, diff --git a/models/running_input.go b/models/running_input.go index 52f95cb522871..70a4c2ee3a70f 100644 --- a/models/running_input.go +++ b/models/running_input.go @@ -35,7 +35,7 @@ func NewRunningInput(input telegraf.Input, config *InputConfig) *RunningInput { inputErrorsRegister.Incr(1) GlobalGatherErrors.Incr(1) }) - setLoggerOnPlugin(input, logger) + SetLoggerOnPlugin(input, logger) return &RunningInput{ Input: input, diff --git a/models/running_output.go b/models/running_output.go index 0d2954c4aa4fa..894ae011c986d 100644 --- a/models/running_output.go +++ b/models/running_output.go @@ -72,7 +72,7 @@ func NewRunningOutput( logger.OnErr(func() { writeErrorsRegister.Incr(1) }) - setLoggerOnPlugin(output, logger) + SetLoggerOnPlugin(output, logger) if config.MetricBufferLimit > 0 { bufferLimit = config.MetricBufferLimit diff --git a/models/running_processor.go b/models/running_processor.go index c487f48219ef3..1bd2d0f6ed0c7 100644 --- a/models/running_processor.go +++ b/models/running_processor.go @@ -39,7 +39,7 @@ func NewRunningProcessor(processor telegraf.StreamingProcessor, config *Processo logger.OnErr(func() { processErrorsRegister.Incr(1) }) - setLoggerOnPlugin(processor, logger) + SetLoggerOnPlugin(processor, logger) return &RunningProcessor{ Processor: processor, diff --git a/models/running_processor_test.go b/models/running_processor_test.go index 1c431bde1e9ba..14df03253bd38 100644 --- a/models/running_processor_test.go +++ b/models/running_processor_test.go @@ -1,4 +1,4 @@ -package models +package models_test import ( "sort" @@ -6,6 +6,7 @@ import ( "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/models" "github.com/influxdata/telegraf/plugins/processors" "github.com/influxdata/telegraf/testutil" "github.com/stretchr/testify/require" @@ -52,7 +53,7 @@ func (p *MockProcessorToInit) Init() error { func TestRunningProcessor_Init(t *testing.T) { mock := MockProcessorToInit{} - rp := &RunningProcessor{ + rp := &models.RunningProcessor{ Processor: processors.NewStreamingProcessorFromProcessor(&mock), } rp.Init() @@ -75,7 +76,7 @@ func TagProcessor(key, value string) *MockProcessor { func TestRunningProcessor_Apply(t *testing.T) { type args struct { Processor telegraf.StreamingProcessor - Config *ProcessorConfig + Config *models.ProcessorConfig } tests := []struct { @@ -88,8 +89,8 @@ func TestRunningProcessor_Apply(t *testing.T) { name: "inactive filter applies metrics", args: args{ Processor: processors.NewStreamingProcessorFromProcessor(TagProcessor("apply", "true")), - Config: &ProcessorConfig{ - Filter: Filter{}, + Config: &models.ProcessorConfig{ + Filter: models.Filter{}, }, }, input: []telegraf.Metric{ @@ -119,8 +120,8 @@ func TestRunningProcessor_Apply(t *testing.T) { name: "filter applies", args: args{ Processor: processors.NewStreamingProcessorFromProcessor(TagProcessor("apply", "true")), - Config: &ProcessorConfig{ - Filter: Filter{ + Config: &models.ProcessorConfig{ + Filter: models.Filter{ NamePass: []string{"cpu"}, }, }, @@ -152,8 +153,8 @@ func TestRunningProcessor_Apply(t *testing.T) { name: "filter doesn't apply", args: args{ Processor: processors.NewStreamingProcessorFromProcessor(TagProcessor("apply", "true")), - Config: &ProcessorConfig{ - Filter: Filter{ + Config: &models.ProcessorConfig{ + Filter: models.Filter{ NameDrop: []string{"cpu"}, }, }, @@ -183,7 +184,7 @@ func TestRunningProcessor_Apply(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - rp := &RunningProcessor{ + rp := &models.RunningProcessor{ Processor: tt.args.Processor, Config: tt.args.Config, } @@ -204,25 +205,25 @@ func TestRunningProcessor_Apply(t *testing.T) { } func TestRunningProcessor_Order(t *testing.T) { - rp1 := &RunningProcessor{ - Config: &ProcessorConfig{ + rp1 := &models.RunningProcessor{ + Config: &models.ProcessorConfig{ Order: 1, }, } - rp2 := &RunningProcessor{ - Config: &ProcessorConfig{ + rp2 := &models.RunningProcessor{ + Config: &models.ProcessorConfig{ Order: 2, }, } - rp3 := &RunningProcessor{ - Config: &ProcessorConfig{ + rp3 := &models.RunningProcessor{ + Config: &models.ProcessorConfig{ Order: 3, }, } - procs := RunningProcessors{rp2, rp3, rp1} + procs := models.RunningProcessors{rp2, rp3, rp1} sort.Sort(procs) require.Equal(t, - RunningProcessors{rp1, rp2, rp3}, + models.RunningProcessors{rp1, rp2, rp3}, procs) } diff --git a/plugins/common/encoding/decoder_reader.go b/plugins/common/encoding/decoder_reader.go index 7324c8e72e883..79bf11ed5a94b 100644 --- a/plugins/common/encoding/decoder_reader.go +++ b/plugins/common/encoding/decoder_reader.go @@ -112,7 +112,7 @@ func (r *Reader) Read(p []byte) (int, error) { r.err = nil r.transformComplete = false - n, err := 0, error(nil) + n := 0 for { // Copy out any transformed bytes and return the final error if we are done. if r.dst0 != r.dst1 { @@ -131,6 +131,7 @@ func (r *Reader) Read(p []byte) (int, error) { // As the io.Reader documentation says, "process the n > 0 bytes returned // before considering the error". if r.src0 != r.src1 || r.err != nil { + var err error r.dst0 = 0 r.dst1, n, err = r.t.Transform(r.dst, r.src[r.src0:r.src1], r.err == io.EOF) r.src0 += n diff --git a/plugins/inputs/all/all.go b/plugins/inputs/all/all.go index 986c501ed7aa9..1d1b8eb58b463 100644 --- a/plugins/inputs/all/all.go +++ b/plugins/inputs/all/all.go @@ -63,6 +63,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/influxdb" _ "github.com/influxdata/telegraf/plugins/inputs/influxdb_listener" _ "github.com/influxdata/telegraf/plugins/inputs/influxdb_v2_listener" + _ "github.com/influxdata/telegraf/plugins/inputs/intel_rdt" _ "github.com/influxdata/telegraf/plugins/inputs/internal" _ "github.com/influxdata/telegraf/plugins/inputs/interrupts" _ "github.com/influxdata/telegraf/plugins/inputs/ipmi_sensor" @@ -140,6 +141,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/puppetagent" _ "github.com/influxdata/telegraf/plugins/inputs/rabbitmq" _ "github.com/influxdata/telegraf/plugins/inputs/raindrops" + _ "github.com/influxdata/telegraf/plugins/inputs/ras" _ "github.com/influxdata/telegraf/plugins/inputs/redfish" _ "github.com/influxdata/telegraf/plugins/inputs/redis" _ "github.com/influxdata/telegraf/plugins/inputs/rethinkdb" @@ -177,6 +179,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/varnish" _ "github.com/influxdata/telegraf/plugins/inputs/vsphere" _ "github.com/influxdata/telegraf/plugins/inputs/webhooks" + _ "github.com/influxdata/telegraf/plugins/inputs/win_eventlog" _ "github.com/influxdata/telegraf/plugins/inputs/win_perf_counters" _ "github.com/influxdata/telegraf/plugins/inputs/win_services" _ "github.com/influxdata/telegraf/plugins/inputs/wireguard" diff --git a/plugins/inputs/bcache/bcache.go b/plugins/inputs/bcache/bcache.go index 8d20e362394de..849e6dd37de0d 100644 --- a/plugins/inputs/bcache/bcache.go +++ b/plugins/inputs/bcache/bcache.go @@ -73,8 +73,11 @@ func prettyToBytes(v string) uint64 { func (b *Bcache) gatherBcache(bdev string, acc telegraf.Accumulator) error { tags := getTags(bdev) metrics, err := filepath.Glob(bdev + "/stats_total/*") - if len(metrics) < 0 { - return errors.New("Can't read any stats file") + if err != nil { + return err + } + if len(metrics) == 0 { + return errors.New("can't read any stats file") } file, err := ioutil.ReadFile(bdev + "/dirty_data") if err != nil { diff --git a/plugins/inputs/cloudwatch/README.md b/plugins/inputs/cloudwatch/README.md index 674dd0ac4363a..bc7b9b50c5d80 100644 --- a/plugins/inputs/cloudwatch/README.md +++ b/plugins/inputs/cloudwatch/README.md @@ -58,6 +58,13 @@ API endpoint. In the following order the plugin will attempt to authenticate. ## gaps or overlap in pulled data interval = "5m" + ## Recommended if "delay" and "period" are both within 3 hours of request time. Invalid values will be ignored. + ## Recently Active feature will only poll for CloudWatch ListMetrics values that occurred within the last 3 Hours. + ## If enabled, it will reduce total API usage of the CloudWatch ListMetrics API and require less memory to retain. + ## Do not enable if "period" or "delay" is longer than 3 hours, as it will not return data more than 3 hours old. + ## See https://docs.aws.amazon.com/AmazonCloudWatch/latest/APIReference/API_ListMetrics.html + #recently_active = "PT3H" + ## Configure the TTL for the internal cache of metrics. # cache_ttl = "1h" @@ -150,7 +157,7 @@ To maximize efficiency and savings, consider making fewer requests by increasing ### Measurements & Fields: -Each CloudWatch Namespace monitored records a measurement with fields for each available Metric Statistic +Each CloudWatch Namespace monitored records a measurement with fields for each available Metric Statistic. Namespace and Metrics are represented in [snake case](https://en.wikipedia.org/wiki/Snake_case) - cloudwatch_{namespace} diff --git a/plugins/inputs/cloudwatch/cloudwatch.go b/plugins/inputs/cloudwatch/cloudwatch.go index 042660a50ff3c..d1f5661a03eba 100644 --- a/plugins/inputs/cloudwatch/cloudwatch.go +++ b/plugins/inputs/cloudwatch/cloudwatch.go @@ -35,12 +35,13 @@ type CloudWatch struct { StatisticInclude []string `toml:"statistic_include"` Timeout config.Duration `toml:"timeout"` - Period config.Duration `toml:"period"` - Delay config.Duration `toml:"delay"` - Namespace string `toml:"namespace"` - Metrics []*Metric `toml:"metrics"` - CacheTTL config.Duration `toml:"cache_ttl"` - RateLimit int `toml:"ratelimit"` + Period config.Duration `toml:"period"` + Delay config.Duration `toml:"delay"` + Namespace string `toml:"namespace"` + Metrics []*Metric `toml:"metrics"` + CacheTTL config.Duration `toml:"cache_ttl"` + RateLimit int `toml:"ratelimit"` + RecentlyActive string `toml:"recently_active"` Log telegraf.Logger `toml:"-"` @@ -123,6 +124,13 @@ func (c *CloudWatch) SampleConfig() string { ## gaps or overlap in pulled data interval = "5m" + ## Recommended if "delay" and "period" are both within 3 hours of request time. Invalid values will be ignored. + ## Recently Active feature will only poll for CloudWatch ListMetrics values that occurred within the last 3 Hours. + ## If enabled, it will reduce total API usage of the CloudWatch ListMetrics API and require less memory to retain. + ## Do not enable if "period" or "delay" is longer than 3 hours, as it will not return data more than 3 hours old. + ## See https://docs.aws.amazon.com/AmazonCloudWatch/latest/APIReference/API_ListMetrics.html + #recently_active = "PT3H" + ## Configure the TTL for the internal cache of metrics. # cache_ttl = "1h" @@ -210,7 +218,7 @@ func (c *CloudWatch) Gather(acc telegraf.Accumulator) error { results := []*cloudwatch.MetricDataResult{} - // 100 is the maximum number of metric data queries a `GetMetricData` request can contain. + // 500 is the maximum number of metric data queries a `GetMetricData` request can contain. batchSize := 500 var batches [][]*cloudwatch.MetricDataQuery @@ -369,13 +377,22 @@ func (c *CloudWatch) fetchNamespaceMetrics() ([]*cloudwatch.Metric, error) { metrics := []*cloudwatch.Metric{} var token *string - params := &cloudwatch.ListMetricsInput{ - Namespace: aws.String(c.Namespace), - Dimensions: []*cloudwatch.DimensionFilter{}, - NextToken: token, - MetricName: nil, + var params *cloudwatch.ListMetricsInput + var recentlyActive *string = nil + + switch c.RecentlyActive { + case "PT3H": + recentlyActive = &c.RecentlyActive + default: + recentlyActive = nil + } + params = &cloudwatch.ListMetricsInput{ + Namespace: aws.String(c.Namespace), + Dimensions: []*cloudwatch.DimensionFilter{}, + NextToken: token, + MetricName: nil, + RecentlyActive: recentlyActive, } - for { resp, err := c.client.ListMetrics(params) if err != nil { diff --git a/plugins/inputs/consul/README.md b/plugins/inputs/consul/README.md index 8e1ecc094c3a8..71d7d26a8f5eb 100644 --- a/plugins/inputs/consul/README.md +++ b/plugins/inputs/consul/README.md @@ -17,6 +17,14 @@ report those stats already using StatsD protocol if needed. ## URI scheme for the Consul server, one of "http", "https" # scheme = "http" + ## Metric version controls the mapping from Consul metrics into + ## Telegraf metrics. Version 2 moved all fields with string values + ## to tags. + ## + ## example: metric_version = 1; deprecated in 1.16 + ## metric_version = 2; recommended version + # metric_version = 1 + ## ACL token used in every request # token = "" @@ -41,7 +49,7 @@ report those stats already using StatsD protocol if needed. ``` ### Metrics: - +##### metric_version = 1: - consul_health_checks - tags: - node (node that check/service is registered on) @@ -55,9 +63,23 @@ report those stats already using StatsD protocol if needed. - critical (integer) - warning (integer) +##### metric_version = 2: +- consul_health_checks + - tags: + - node (node that check/service is registered on) + - service_name + - check_id + - check_name + - service_id + - status + - fields: + - passing (integer) + - critical (integer) + - warning (integer) + `passing`, `critical`, and `warning` are integer representations of the health check state. A value of `1` represents that the status was the state of the -the health check at this sample. +the health check at this sample. `status` is string representation of the same state. ## Example output diff --git a/plugins/inputs/consul/consul.go b/plugins/inputs/consul/consul.go index 19ee7a17cd5dd..665373b5c928f 100644 --- a/plugins/inputs/consul/consul.go +++ b/plugins/inputs/consul/consul.go @@ -19,7 +19,9 @@ type Consul struct { Datacentre string // deprecated in 1.10; use Datacenter Datacenter string tls.ClientConfig - TagDelimiter string + TagDelimiter string + MetricVersion int + Log telegraf.Logger // client used to connect to Consul agnet client *api.Client @@ -32,6 +34,13 @@ var sampleConfig = ` ## URI scheme for the Consul server, one of "http", "https" # scheme = "http" + ## Metric version controls the mapping from Consul metrics into + ## Telegraf metrics. + ## + ## example: metric_version = 1; deprecated in 1.15 + ## metric_version = 2; recommended version + # metric_version = 1 + ## ACL token used in every request # token = "" @@ -55,6 +64,14 @@ var sampleConfig = ` # tag_delimiter = ":" ` +func (c *Consul) Init() error { + if c.MetricVersion != 2 { + c.Log.Warnf("Use of deprecated configuration: 'metric_version = 1'; please update to 'metric_version = 2'") + } + + return nil +} + func (c *Consul) Description() string { return "Gather health check statuses from services registered in Consul" } @@ -110,15 +127,21 @@ func (c *Consul) GatherHealthCheck(acc telegraf.Accumulator, checks []*api.Healt record := make(map[string]interface{}) tags := make(map[string]string) - record["check_name"] = check.Name - record["service_id"] = check.ServiceID - - record["status"] = check.Status record["passing"] = 0 record["critical"] = 0 record["warning"] = 0 record[check.Status] = 1 + if c.MetricVersion == 2 { + tags["check_name"] = check.Name + tags["service_id"] = check.ServiceID + tags["status"] = check.Status + } else { + record["check_name"] = check.Name + record["service_id"] = check.ServiceID + record["status"] = check.Status + } + tags["node"] = check.Node tags["service_name"] = check.ServiceName tags["check_id"] = check.CheckID diff --git a/plugins/inputs/consul/consul_test.go b/plugins/inputs/consul/consul_test.go index da345ce89db8d..f7301b5fb37fb 100644 --- a/plugins/inputs/consul/consul_test.go +++ b/plugins/inputs/consul/consul_test.go @@ -76,3 +76,62 @@ func TestGatherHealthCheckWithDelimitedTags(t *testing.T) { acc.AssertContainsTaggedFields(t, "consul_health_checks", expectedFields, expectedTags) } + +func TestGatherHealthCheckV2(t *testing.T) { + expectedFields := map[string]interface{}{ + "passing": 1, + "critical": 0, + "warning": 0, + } + + expectedTags := map[string]string{ + "node": "localhost", + "check_id": "foo.health123", + "check_name": "foo.health", + "status": "passing", + "service_id": "foo.123", + "service_name": "foo", + "bar": "bar", + "env:sandbox": "env:sandbox", + "tagkey:value:stillvalue": "tagkey:value:stillvalue", + } + + var acc testutil.Accumulator + + consul := &Consul{ + MetricVersion: 2, + } + consul.GatherHealthCheck(&acc, sampleChecks) + + acc.AssertContainsTaggedFields(t, "consul_health_checks", expectedFields, expectedTags) +} + +func TestGatherHealthCheckWithDelimitedTagsV2(t *testing.T) { + expectedFields := map[string]interface{}{ + "passing": 1, + "critical": 0, + "warning": 0, + } + + expectedTags := map[string]string{ + "node": "localhost", + "check_id": "foo.health123", + "check_name": "foo.health", + "status": "passing", + "service_id": "foo.123", + "service_name": "foo", + "bar": "bar", + "env": "sandbox", + "tagkey": "value:stillvalue", + } + + var acc testutil.Accumulator + + consul := &Consul{ + MetricVersion: 2, + TagDelimiter: ":", + } + consul.GatherHealthCheck(&acc, sampleChecks) + + acc.AssertContainsTaggedFields(t, "consul_health_checks", expectedFields, expectedTags) +} diff --git a/plugins/inputs/docker/docker_test.go b/plugins/inputs/docker/docker_test.go index a331479d10ea1..d8700217c307d 100644 --- a/plugins/inputs/docker/docker_test.go +++ b/plugins/inputs/docker/docker_test.go @@ -571,7 +571,7 @@ func TestContainerStatus(t *testing.T) { map[string]string{ "container_name": "etcd", "container_image": "quay.io/coreos/etcd", - "container_version": "v2.2.2", + "container_version": "v3.3.25", "engine_host": "absol", "label1": "test_value_1", "label2": "test_value_2", @@ -607,7 +607,7 @@ func TestContainerStatus(t *testing.T) { map[string]string{ "container_name": "etcd", "container_image": "quay.io/coreos/etcd", - "container_version": "v2.2.2", + "container_version": "v3.3.25", "engine_host": "absol", "label1": "test_value_1", "label2": "test_value_2", @@ -645,7 +645,7 @@ func TestContainerStatus(t *testing.T) { map[string]string{ "container_name": "etcd", "container_image": "quay.io/coreos/etcd", - "container_version": "v2.2.2", + "container_version": "v3.3.25", "engine_host": "absol", "label1": "test_value_1", "label2": "test_value_2", @@ -681,7 +681,7 @@ func TestContainerStatus(t *testing.T) { map[string]string{ "container_name": "etcd", "container_image": "quay.io/coreos/etcd", - "container_version": "v2.2.2", + "container_version": "v3.3.25", "engine_host": "absol", "label1": "test_value_1", "label2": "test_value_2", @@ -856,7 +856,7 @@ func TestDockerGatherInfo(t *testing.T) { "container_name": "etcd2", "container_image": "quay.io:4443/coreos/etcd", "cpu": "cpu3", - "container_version": "v2.2.2", + "container_version": "v3.3.25", "engine_host": "absol", "ENVVAR1": "loremipsum", "ENVVAR2": "dolorsitamet", @@ -881,7 +881,7 @@ func TestDockerGatherInfo(t *testing.T) { "engine_host": "absol", "container_name": "etcd2", "container_image": "quay.io:4443/coreos/etcd", - "container_version": "v2.2.2", + "container_version": "v3.3.25", "ENVVAR1": "loremipsum", "ENVVAR2": "dolorsitamet", "ENVVAR3": "=ubuntu:10.04", diff --git a/plugins/inputs/docker/docker_testdata.go b/plugins/inputs/docker/docker_testdata.go index d50b80b9a1d1d..bde0bd312c788 100644 --- a/plugins/inputs/docker/docker_testdata.go +++ b/plugins/inputs/docker/docker_testdata.go @@ -63,7 +63,7 @@ var containerList = []types.Container{ { ID: "e2173b9478a6ae55e237d4d74f8bbb753f0817192b5081334dc78476296b7dfb", Names: []string{"/etcd"}, - Image: "quay.io/coreos/etcd:v2.2.2", + Image: "quay.io/coreos/etcd:v3.3.25", Command: "/etcd -name etcd0 -advertise-client-urls http://localhost:2379 -listen-client-urls http://0.0.0.0:2379", Created: 1455941930, Status: "Up 4 hours", @@ -100,7 +100,7 @@ var containerList = []types.Container{ { ID: "b7dfbb9478a6ae55e237d4d74f8bbb753f0817192b5081334dc78476296e2173", Names: []string{"/etcd2"}, - Image: "quay.io:4443/coreos/etcd:v2.2.2", + Image: "quay.io:4443/coreos/etcd:v3.3.25", Command: "/etcd -name etcd2 -advertise-client-urls http://localhost:2379 -listen-client-urls http://0.0.0.0:2379", Created: 1455941933, Status: "Up 4 hours", diff --git a/plugins/inputs/github/github.go b/plugins/inputs/github/github.go index 3e5597707ffe4..a26923f3f305c 100644 --- a/plugins/inputs/github/github.go +++ b/plugins/inputs/github/github.go @@ -8,7 +8,7 @@ import ( "sync" "time" - "github.com/google/go-github/github" + "github.com/google/go-github/v32/github" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/plugins/inputs" @@ -74,7 +74,7 @@ func (g *GitHub) createGitHubClient(ctx context.Context) (*github.Client, error) &oauth2.Token{AccessToken: g.AccessToken}, ) oauthClient := oauth2.NewClient(ctx, tokenSource) - ctx = context.WithValue(ctx, oauth2.HTTPClient, oauthClient) + _ = context.WithValue(ctx, oauth2.HTTPClient, oauthClient) g.obfuscatedToken = g.AccessToken[0:4] + "..." + g.AccessToken[len(g.AccessToken)-3:] diff --git a/plugins/inputs/github/github_test.go b/plugins/inputs/github/github_test.go index 23fda6675cc44..7803558fbab30 100644 --- a/plugins/inputs/github/github_test.go +++ b/plugins/inputs/github/github_test.go @@ -5,7 +5,7 @@ import ( "reflect" "testing" - gh "github.com/google/go-github/github" + gh "github.com/google/go-github/v32/github" "github.com/stretchr/testify/require" ) diff --git a/plugins/inputs/graylog/graylog.go b/plugins/inputs/graylog/graylog.go index 09a7f173b3ba9..be5f8fc60aaa4 100644 --- a/plugins/inputs/graylog/graylog.go +++ b/plugins/inputs/graylog/graylog.go @@ -169,11 +169,12 @@ func (h *GrayLog) gatherServer( return err } requestURL, err := url.Parse(serverURL) - host, port, _ := net.SplitHostPort(requestURL.Host) - var dat ResponseMetrics if err != nil { - return err + return fmt.Errorf("unable to parse address '%s': %s", serverURL, err) } + + host, port, _ := net.SplitHostPort(requestURL.Host) + var dat ResponseMetrics if err := json.Unmarshal([]byte(resp), &dat); err != nil { return err } diff --git a/plugins/inputs/haproxy/haproxy.go b/plugins/inputs/haproxy/haproxy.go index 5a3de352822be..9ec9512ea170c 100644 --- a/plugins/inputs/haproxy/haproxy.go +++ b/plugins/inputs/haproxy/haproxy.go @@ -162,10 +162,13 @@ func (g *haproxy) gatherServer(addr string, acc telegraf.Accumulator) error { u, err := url.Parse(addr) if err != nil { - return fmt.Errorf("Unable parse server address '%s': %s", addr, err) + return fmt.Errorf("unable parse server address '%s': %s", addr, err) } req, err := http.NewRequest("GET", addr, nil) + if err != nil { + return fmt.Errorf("unable to create new request '%s': %s", addr, err) + } if u.User != nil { p, _ := u.User.Password() req.SetBasicAuth(u.User.Username(), p) @@ -179,16 +182,16 @@ func (g *haproxy) gatherServer(addr string, acc telegraf.Accumulator) error { res, err := g.client.Do(req) if err != nil { - return fmt.Errorf("Unable to connect to haproxy server '%s': %s", addr, err) + return fmt.Errorf("unable to connect to haproxy server '%s': %s", addr, err) } defer res.Body.Close() if res.StatusCode != 200 { - return fmt.Errorf("Unable to get valid stat result from '%s', http response code : %d", addr, res.StatusCode) + return fmt.Errorf("unable to get valid stat result from '%s', http response code : %d", addr, res.StatusCode) } if err := g.importCsvResult(res.Body, acc, u.Host); err != nil { - return fmt.Errorf("Unable to parse stat result from '%s': %s", addr, err) + return fmt.Errorf("unable to parse stat result from '%s': %s", addr, err) } return nil @@ -271,7 +274,7 @@ func (g *haproxy) importCsvResult(r io.Reader, acc telegraf.Accumulator, host st if err != nil { return fmt.Errorf("unable to parse type value '%s'", v) } - if int(vi) >= len(typeNames) { + if vi >= int64(len(typeNames)) { return fmt.Errorf("received unknown type value: %d", vi) } tags[fieldName] = typeNames[vi] diff --git a/plugins/inputs/influxdb_v2_listener/README.md b/plugins/inputs/influxdb_v2_listener/README.md index cef11e9ae380a..4258e021d85fd 100644 --- a/plugins/inputs/influxdb_v2_listener/README.md +++ b/plugins/inputs/influxdb_v2_listener/README.md @@ -9,12 +9,15 @@ The `/api/v2/write` endpoint supports the `precision` query parameter and can be to one of `ns`, `us`, `ms`, `s`. All other parameters are ignored and defer to the output plugins configuration. +Telegraf minimum version: Telegraf 1.16.0 + ### Configuration: ```toml [[inputs.influxdb_v2_listener]] ## Address and port to host InfluxDB listener on - service_address = ":9999" + ## (Double check the port. Could be 9999 if using OSS Beta) + service_address = ":8086" ## Maximum allowed HTTP request body size in bytes. ## 0 means to use the default of 32MiB. diff --git a/plugins/inputs/influxdb_v2_listener/influxdb_v2_listener.go b/plugins/inputs/influxdb_v2_listener/influxdb_v2_listener.go index 01d47b201f502..30c449f7dd910 100644 --- a/plugins/inputs/influxdb_v2_listener/influxdb_v2_listener.go +++ b/plugins/inputs/influxdb_v2_listener/influxdb_v2_listener.go @@ -67,13 +67,14 @@ type InfluxDBV2Listener struct { const sampleConfig = ` ## Address and port to host InfluxDB listener on - service_address = ":9999" + ## (Double check the port. Could be 9999 if using OSS Beta) + service_address = ":8086" ## Maximum allowed HTTP request body size in bytes. ## 0 means to use the default of 32MiB. # max_body_size = "32MiB" - ## Optional tag to determine the bucket. + ## Optional tag to determine the bucket. ## If the write has a bucket in the query string then it will be kept in this tag name. ## This tag can be used in downstream outputs. ## The default value of nothing means it will be off and the database will not be recorded. @@ -336,7 +337,7 @@ func getPrecisionMultiplier(precision string) time.Duration { func init() { inputs.Add("influxdb_v2_listener", func() telegraf.Input { return &InfluxDBV2Listener{ - ServiceAddress: ":9999", + ServiceAddress: ":8086", timeFunc: time.Now, } }) diff --git a/plugins/inputs/intel_rdt/README.md b/plugins/inputs/intel_rdt/README.md new file mode 100644 index 0000000000000..1a6e55f6a7fb9 --- /dev/null +++ b/plugins/inputs/intel_rdt/README.md @@ -0,0 +1,108 @@ +# Intel RDT Input Plugin +The intel_rdt plugin collects information provided by monitoring features of +Intel Resource Director Technology (Intel(R) RDT) like Cache Monitoring Technology (CMT), +Memory Bandwidth Monitoring (MBM), Cache Allocation Technology (CAT) and Code +and Data Prioritization (CDP) Technology provide the hardware framework to monitor +and control the utilization of shared resources, like last level cache, memory bandwidth. +These Technologies comprise Intel’s Resource Director Technology (RDT). +As multithreaded and multicore platform architectures emerge, +running workloads in single-threaded, multithreaded, or complex virtual machine environment, +the last level cache and memory bandwidth are key resources to manage. Intel introduces CMT, +MBM, CAT and CDP to manage these various workloads across shared resources. + +To gather Intel RDT metrics plugin uses _pqos_ cli tool which is a part of [Intel(R) RDT Software Package](https://github.com/intel/intel-cmt-cat). +Before using this plugin please be sure _pqos_ is properly installed and configured regarding that the plugin +run _pqos_ to work with `OS Interface` mode. This plugin supports _pqos_ version 4.0.0 and above. +Be aware pqos tool needs root privileges to work properly. + +Metrics will be constantly reported from the following `pqos` commands within the given interval: + +#### In case of cores monitoring: +``` +pqos -r --iface-os --mon-file-type=csv --mon-interval=INTERVAL --mon-core=all:[CORES]\;mbt:[CORES] +``` +where `CORES` is equal to group of cores provided in config. User can provide many groups. + +#### In case of process monitoring: +``` +pqos -r --iface-os --mon-file-type=csv --mon-interval=INTERVAL --mon-pid=all:[PIDS]\;mbt:[PIDS] +``` +where `PIDS` is group of processes IDs which name are equal to provided process name in a config. +User can provide many process names which lead to create many processes groups. + +In both cases `INTERVAL` is equal to sampling_interval from config. + +Because PIDs association within system could change in every moment, Intel RDT plugin provides a +functionality to check on every interval if desired processes change their PIDs association. +If some change is reported, plugin will restart _pqos_ tool with new arguments. If provided by user +process name is not equal to any of available processes, will be omitted and plugin will constantly +check for process availability. + +### Useful links +Pqos installation process: https://github.com/intel/intel-cmt-cat/blob/master/INSTALL +Enabling OS interface: https://github.com/intel/intel-cmt-cat/wiki, https://github.com/intel/intel-cmt-cat/wiki/resctrl +More about Intel RDT: https://www.intel.com/content/www/us/en/architecture-and-technology/resource-director-technology.html + +### Configuration +```toml +# Read Intel RDT metrics +[[inputs.IntelRDT]] + ## Optionally set sampling interval to Nx100ms. + ## This value is propagated to pqos tool. Interval format is defined by pqos itself. + ## If not provided or provided 0, will be set to 10 = 10x100ms = 1s. + # sampling_interval = "10" + + ## Optionally specify the path to pqos executable. + ## If not provided, auto discovery will be performed. + # pqos_path = "/usr/local/bin/pqos" + + ## Optionally specify if IPC and LLC_Misses metrics shouldn't be propagated. + ## If not provided, default value is false. + # shortened_metrics = false + + ## Specify the list of groups of CPU core(s) to be provided as pqos input. + ## Mandatory if processes aren't set and forbidden if processes are specified. + ## e.g. ["0-3", "4,5,6"] or ["1-3,4"] + # cores = ["0-3"] + + ## Specify the list of processes for which Metrics will be collected. + ## Mandatory if cores aren't set and forbidden if cores are specified. + ## e.g. ["qemu", "pmd"] + # processes = ["process"] +``` + +### Exposed metrics +| Name | Full name | Description | +|---------------|-----------------------------------------------|-------------| +| MBL | Memory Bandwidth on Local NUMA Node | Memory bandwidth utilization by the relevant CPU core/process on the local NUMA memory channel | +| MBR | Memory Bandwidth on Remote NUMA Node | Memory bandwidth utilization by the relevant CPU core/process on the remote NUMA memory channel | +| MBT | Total Memory Bandwidth | Total memory bandwidth utilized by a CPU core/process on local and remote NUMA memory channels | +| LLC | L3 Cache Occupancy | Total Last Level Cache occupancy by a CPU core/process | +| *LLC_Misses | L3 Cache Misses | Total Last Level Cache misses by a CPU core/process | +| *IPC | Instructions Per Cycle | Total instructions per cycle executed by a CPU core/process | + +*optional + +### Troubleshooting +Pointing to non-existing core will lead to throwing an error by _pqos_ and plugin will not work properly. +Be sure to check if provided core number exists within desired system. + +Be aware reading Intel RDT metrics by _pqos_ cannot be done simultaneously on the same resource. +So be sure to not use any other _pqos_ instance which is monitoring the same cores or PIDs within working system. +Also there is no possibility to monitor same cores or PIDs on different groups. + +Pids association for the given process could be manually checked by `pidof` command. E.g: +``` +pidof PROCESS +``` +where `PROCESS` is process name. + +### Example Output +``` +> rdt_metric,cores=12\,19,host=r2-compute-20,name=IPC,process=top value=0 1598962030000000000 +> rdt_metric,cores=12\,19,host=r2-compute-20,name=LLC_Misses,process=top value=0 1598962030000000000 +> rdt_metric,cores=12\,19,host=r2-compute-20,name=LLC,process=top value=0 1598962030000000000 +> rdt_metric,cores=12\,19,host=r2-compute-20,name=MBL,process=top value=0 1598962030000000000 +> rdt_metric,cores=12\,19,host=r2-compute-20,name=MBR,process=top value=0 1598962030000000000 +> rdt_metric,cores=12\,19,host=r2-compute-20,name=MBT,process=top value=0 1598962030000000000 +``` \ No newline at end of file diff --git a/plugins/inputs/intel_rdt/intel_rdt.go b/plugins/inputs/intel_rdt/intel_rdt.go new file mode 100644 index 0000000000000..e61266c0a4f6b --- /dev/null +++ b/plugins/inputs/intel_rdt/intel_rdt.go @@ -0,0 +1,552 @@ +// +build !windows + +package intel_rdt + +import ( + "bufio" + "context" + "fmt" + "io" + "os" + "os/exec" + "regexp" + "strconv" + "strings" + "sync" + "time" + + "github.com/google/go-cmp/cmp" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/choice" + "github.com/influxdata/telegraf/plugins/inputs" +) + +const ( + timestampFormat = "2006-01-02 15:04:05" + defaultSamplingInterval = 10 + pqosInitOutputLinesNumber = 4 + numberOfMetrics = 6 + secondsDenominator = 10 +) + +var pqosMetricOrder = map[int]string{ + 0: "IPC", // Instructions Per Cycle + 1: "LLC_Misses", // Cache Misses + 2: "LLC", // L3 Cache Occupancy + 3: "MBL", // Memory Bandwidth on Local NUMA Node + 4: "MBR", // Memory Bandwidth on Remote NUMA Node + 5: "MBT", // Total Memory Bandwidth +} + +type IntelRDT struct { + PqosPath string `toml:"pqos_path"` + Cores []string `toml:"cores"` + Processes []string `toml:"processes"` + SamplingInterval int32 `toml:"sampling_interval"` + ShortenedMetrics bool `toml:"shortened_metrics"` + + Log telegraf.Logger `toml:"-"` + Publisher Publisher `toml:"-"` + Processor ProcessesHandler `toml:"-"` + stopPQOSChan chan bool + quitChan chan struct{} + errorChan chan error + parsedCores []string + processesPIDsMap map[string]string + cancel context.CancelFunc + wg sync.WaitGroup +} + +type processMeasurement struct { + name string + measurement string +} + +// All gathering is done in the Start function +func (r *IntelRDT) Gather(_ telegraf.Accumulator) error { + return nil +} + +func (r *IntelRDT) Description() string { + return "Intel Resource Director Technology plugin" +} + +func (r *IntelRDT) SampleConfig() string { + return ` + ## Optionally set sampling interval to Nx100ms. + ## This value is propagated to pqos tool. Interval format is defined by pqos itself. + ## If not provided or provided 0, will be set to 10 = 10x100ms = 1s. + # sampling_interval = "10" + + ## Optionally specify the path to pqos executable. + ## If not provided, auto discovery will be performed. + # pqos_path = "/usr/local/bin/pqos" + + ## Optionally specify if IPC and LLC_Misses metrics shouldn't be propagated. + ## If not provided, default value is false. + # shortened_metrics = false + + ## Specify the list of groups of CPU core(s) to be provided as pqos input. + ## Mandatory if processes aren't set and forbidden if processes are specified. + ## e.g. ["0-3", "4,5,6"] or ["1-3,4"] + # cores = ["0-3"] + + ## Specify the list of processes for which Metrics will be collected. + ## Mandatory if cores aren't set and forbidden if cores are specified. + ## e.g. ["qemu", "pmd"] + # processes = ["process"] +` +} + +func (r *IntelRDT) Start(acc telegraf.Accumulator) error { + ctx, cancel := context.WithCancel(context.Background()) + r.cancel = cancel + + r.Processor = NewProcessor() + r.Publisher = NewPublisher(acc, r.Log, r.ShortenedMetrics) + + err := r.Initialize() + if err != nil { + return err + } + + r.Publisher.publish(ctx) + go r.errorHandler(ctx) + go r.scheduler(ctx) + + return nil +} + +func (r *IntelRDT) Initialize() error { + r.stopPQOSChan = make(chan bool) + r.quitChan = make(chan struct{}) + r.errorChan = make(chan error) + + err := validatePqosPath(r.PqosPath) + if err != nil { + return err + } + if len(r.Cores) != 0 && len(r.Processes) != 0 { + return fmt.Errorf("monitoring start error, process and core tracking can not be done simultaneously") + } + if len(r.Cores) == 0 && len(r.Processes) == 0 { + return fmt.Errorf("monitoring start error, at least one of cores or processes must be provided in config") + } + if r.SamplingInterval == 0 { + r.SamplingInterval = defaultSamplingInterval + } + if err = validateInterval(r.SamplingInterval); err != nil { + return err + } + r.parsedCores, err = parseCoresConfig(r.Cores) + if err != nil { + return err + } + r.processesPIDsMap, err = r.associateProcessesWithPIDs(r.Processes) + if err != nil { + return err + } + return nil +} + +func (r *IntelRDT) errorHandler(ctx context.Context) { + r.wg.Add(1) + defer r.wg.Done() + for { + select { + case err := <-r.errorChan: + if err != nil { + r.Log.Error(fmt.Sprintf("Error: %v", err)) + r.quitChan <- struct{}{} + } + case <-ctx.Done(): + return + } + } +} + +func (r *IntelRDT) scheduler(ctx context.Context) { + r.wg.Add(1) + defer r.wg.Done() + interval := time.Duration(r.SamplingInterval) + ticker := time.NewTicker(interval * time.Second / secondsDenominator) + + r.createArgsAndStartPQOS(ctx) + + for { + select { + case <-ticker.C: + if len(r.Processes) != 0 { + err := r.checkPIDsAssociation(ctx) + if err != nil { + r.errorChan <- err + } + } + case <-r.quitChan: + r.cancel() + return + case <-ctx.Done(): + return + } + } +} + +func (r *IntelRDT) Stop() { + r.cancel() + r.wg.Wait() +} + +func (r *IntelRDT) checkPIDsAssociation(ctx context.Context) error { + newProcessesPIDsMap, err := r.associateProcessesWithPIDs(r.Processes) + if err != nil { + return err + } + // change in PIDs association appears + if !cmp.Equal(newProcessesPIDsMap, r.processesPIDsMap) { + r.Log.Warnf("PIDs association has changed. Refreshing...") + if len(r.processesPIDsMap) != 0 { + r.stopPQOSChan <- true + } + r.processesPIDsMap = newProcessesPIDsMap + r.createArgsAndStartPQOS(ctx) + } + return nil +} + +func (r *IntelRDT) associateProcessesWithPIDs(providedProcesses []string) (map[string]string, error) { + mapProcessPIDs := map[string]string{} + + availableProcesses, err := r.Processor.getAllProcesses() + if err != nil { + return nil, fmt.Errorf("cannot gather information of all available processes") + } + for _, availableProcess := range availableProcesses { + if choice.Contains(availableProcess.Name, providedProcesses) { + PID := availableProcess.PID + mapProcessPIDs[availableProcess.Name] = mapProcessPIDs[availableProcess.Name] + fmt.Sprintf("%d", PID) + "," + } + } + for key := range mapProcessPIDs { + mapProcessPIDs[key] = strings.TrimSuffix(mapProcessPIDs[key], ",") + } + return mapProcessPIDs, nil +} + +func (r *IntelRDT) createArgsAndStartPQOS(ctx context.Context) { + args := []string{"-r", "--iface-os", "--mon-file-type=csv", fmt.Sprintf("--mon-interval=%d", r.SamplingInterval)} + + if len(r.parsedCores) != 0 { + coresArg := createArgCores(r.parsedCores) + args = append(args, coresArg) + go r.readData(args, nil, ctx) + + } else if len(r.processesPIDsMap) != 0 { + processArg := createArgProcess(r.processesPIDsMap) + args = append(args, processArg) + go r.readData(args, r.processesPIDsMap, ctx) + } + return +} + +func (r *IntelRDT) readData(args []string, processesPIDsAssociation map[string]string, ctx context.Context) { + r.wg.Add(1) + defer r.wg.Done() + + cmd := exec.Command(r.PqosPath, append(args)...) + + cmdReader, err := cmd.StdoutPipe() + if err != nil { + r.errorChan <- err + } + go r.processOutput(cmdReader, processesPIDsAssociation) + + go func() { + for { + select { + case <-r.stopPQOSChan: + if err := shutDownPqos(cmd); err != nil { + r.Log.Error(err) + } + return + case <-ctx.Done(): + if err := shutDownPqos(cmd); err != nil { + r.Log.Error(err) + } + return + } + } + }() + err = cmd.Start() + if err != nil { + r.errorChan <- fmt.Errorf("pqos: %v", err) + return + } + err = cmd.Wait() + if err != nil { + r.errorChan <- fmt.Errorf("pqos: %v", err) + } +} + +func (r *IntelRDT) processOutput(cmdReader io.ReadCloser, processesPIDsAssociation map[string]string) { + reader := bufio.NewScanner(cmdReader) + /* + Omit constant, first 4 lines : + "NOTE: Mixed use of MSR and kernel interfaces to manage + CAT or CMT & MBM may lead to unexpected behavior.\n" + CMT/MBM reset successful + "Time,Core,IPC,LLC Misses,LLC[KB],MBL[MB/s],MBR[MB/s],MBT[MB/s]\n" + */ + toOmit := pqosInitOutputLinesNumber + + // omit first measurements which are zeroes + if len(r.parsedCores) != 0 { + toOmit = toOmit + len(r.parsedCores) + // specify how many lines should pass before stopping + } else if len(processesPIDsAssociation) != 0 { + toOmit = toOmit + len(processesPIDsAssociation) + } + for omitCounter := 0; omitCounter < toOmit; omitCounter++ { + reader.Scan() + } + for reader.Scan() { + out := reader.Text() + // to handle situation when monitored PID disappear and "err" is shown in output + if strings.Contains(out, "err") { + continue + } + if len(r.Processes) != 0 { + newMetric := processMeasurement{} + + PIDs, err := findPIDsInMeasurement(out) + if err != nil { + r.errorChan <- err + break + } + for processName, PIDsProcess := range processesPIDsAssociation { + if PIDs == PIDsProcess { + newMetric.name = processName + newMetric.measurement = out + } + } + r.Publisher.BufferChanProcess <- newMetric + } else { + r.Publisher.BufferChanCores <- out + } + } +} + +func shutDownPqos(pqos *exec.Cmd) error { + if pqos.Process != nil { + err := pqos.Process.Signal(os.Interrupt) + if err != nil { + err = pqos.Process.Kill() + if err != nil { + return fmt.Errorf("failed to shut down pqos: %v", err) + } + } + } + return nil +} + +func createArgCores(cores []string) string { + allGroupsArg := "--mon-core=" + for _, coreGroup := range cores { + argGroup := createArgsForGroups(strings.Split(coreGroup, ",")) + allGroupsArg = allGroupsArg + argGroup + } + return allGroupsArg +} + +func createArgProcess(processPIDs map[string]string) string { + allPIDsArg := "--mon-pid=" + for _, PIDs := range processPIDs { + argPIDs := createArgsForGroups(strings.Split(PIDs, ",")) + allPIDsArg = allPIDsArg + argPIDs + } + return allPIDsArg +} + +func createArgsForGroups(coresOrPIDs []string) string { + template := "all:[%s];mbt:[%s];" + group := "" + + for _, coreOrPID := range coresOrPIDs { + group = group + coreOrPID + "," + } + if group != "" { + group = strings.TrimSuffix(group, ",") + return fmt.Sprintf(template, group, group) + } + return "" +} + +func validatePqosPath(pqosPath string) error { + if len(pqosPath) == 0 { + return fmt.Errorf("monitoring start error, can not find pqos executable") + } + pathInfo, err := os.Stat(pqosPath) + if os.IsNotExist(err) { + return fmt.Errorf("monitoring start error, provided pqos path not exist") + } + if mode := pathInfo.Mode(); !mode.IsRegular() { + return fmt.Errorf("monitoring start error, provided pqos path does not point to a regular file") + } + return nil +} + +func parseCoresConfig(cores []string) ([]string, error) { + var parsedCores []string + var allCores []int + configError := fmt.Errorf("wrong cores input config data format") + + for _, singleCoreGroup := range cores { + var actualGroupOfCores []int + separatedCores := strings.Split(singleCoreGroup, ",") + + for _, coreStr := range separatedCores { + actualCores, err := validateAndParseCores(coreStr) + if err != nil { + return nil, fmt.Errorf("%v: %v", configError, err) + } + if checkForDuplicates(allCores, actualCores) { + return nil, fmt.Errorf("%v: %v", configError, "core value cannot be duplicated") + } + actualGroupOfCores = append(actualGroupOfCores, actualCores...) + allCores = append(allCores, actualGroupOfCores...) + } + parsedCores = append(parsedCores, arrayToString(actualGroupOfCores)) + } + return parsedCores, nil +} + +func validateAndParseCores(coreStr string) ([]int, error) { + var processedCores []int + if strings.Contains(coreStr, "-") { + rangeValues := strings.Split(coreStr, "-") + + if len(rangeValues) != 2 { + return nil, fmt.Errorf("more than two values in range") + } + + startValue, err := strconv.Atoi(rangeValues[0]) + if err != nil { + return nil, err + } + stopValue, err := strconv.Atoi(rangeValues[1]) + if err != nil { + return nil, err + } + + if startValue > stopValue { + return nil, fmt.Errorf("first value cannot be higher than second") + } + + rangeOfCores := makeRange(startValue, stopValue) + processedCores = append(processedCores, rangeOfCores...) + } else { + newCore, err := strconv.Atoi(coreStr) + if err != nil { + return nil, err + } + processedCores = append(processedCores, newCore) + } + return processedCores, nil +} + +func findPIDsInMeasurement(measurements string) (string, error) { + // to distinguish PIDs from Cores (PIDs should be in quotes) + var insideQuoteRegex = regexp.MustCompile(`"(.*?)"`) + PIDsMatch := insideQuoteRegex.FindStringSubmatch(measurements) + if len(PIDsMatch) < 2 { + return "", fmt.Errorf("cannot find PIDs in measurement line") + } + PIDs := PIDsMatch[1] + return PIDs, nil +} + +func splitCSVLineIntoValues(line string) (timeValue string, metricsValues, coreOrPIDsValues []string, err error) { + values, err := splitMeasurementLine(line) + if err != nil { + return "", nil, nil, err + } + + timeValue = values[0] + // Because pqos csv format is broken when many cores are involved in PID or + // group of PIDs, there is need to work around it. E.g.: + // Time,PID,Core,IPC,LLC Misses,LLC[KB],MBL[MB/s],MBR[MB/s],MBT[MB/s] + // 2020-08-12 13:34:36,"45417,29170,",37,44,0.00,0,0.0,0.0,0.0,0.0 + metricsValues = values[len(values)-numberOfMetrics:] + coreOrPIDsValues = values[1 : len(values)-numberOfMetrics] + + return timeValue, metricsValues, coreOrPIDsValues, nil +} + +func validateInterval(interval int32) error { + if interval < 0 { + return fmt.Errorf("interval cannot be lower than 0") + } + return nil +} + +func splitMeasurementLine(line string) ([]string, error) { + values := strings.Split(line, ",") + if len(values) < 8 { + return nil, fmt.Errorf(fmt.Sprintf("not valid line format from pqos: %s", values)) + } + return values, nil +} + +func parseTime(value string) (time.Time, error) { + timestamp, err := time.Parse(timestampFormat, value) + if err != nil { + return time.Time{}, err + } + return timestamp, nil +} + +func parseFloat(value string) (float64, error) { + result, err := strconv.ParseFloat(value, 64) + if err != nil { + return result, err + } + return result, nil +} + +func arrayToString(array []int) string { + result := "" + for _, value := range array { + result = fmt.Sprintf("%s%d,", result, value) + } + return strings.TrimSuffix(result, ",") +} + +func checkForDuplicates(values []int, valuesToCheck []int) bool { + for _, value := range values { + for _, valueToCheck := range valuesToCheck { + if value == valueToCheck { + return true + } + } + } + return false +} + +func makeRange(min, max int) []int { + a := make([]int, max-min+1) + for i := range a { + a[i] = min + i + } + return a +} + +func init() { + inputs.Add("IntelRDT", func() telegraf.Input { + rdt := IntelRDT{} + pathPqos, _ := exec.LookPath("pqos") + if len(pathPqos) > 0 { + rdt.PqosPath = pathPqos + } + return &rdt + }) +} diff --git a/plugins/inputs/intel_rdt/intel_rdt_test.go b/plugins/inputs/intel_rdt/intel_rdt_test.go new file mode 100644 index 0000000000000..7e876425724ec --- /dev/null +++ b/plugins/inputs/intel_rdt/intel_rdt_test.go @@ -0,0 +1,277 @@ +// +build !windows + +package intel_rdt + +import ( + "testing" + + "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/assert" +) + +type MockProc struct{} + +func (m *MockProc) getAllProcesses() ([]Process, error) { + procs := []Process{ + {Name: "process", PID: 1000}, + {Name: "process2", PID: 1002}, + {Name: "process2", PID: 1003}, + } + return procs, nil +} + +func TestAssociateProcessesWithPIDs(t *testing.T) { + log := testutil.Logger{} + proc := &MockProc{} + rdt := IntelRDT{ + Log: log, + Processor: proc, + } + processes := []string{"process"} + expectedPID := "1000" + result, err := rdt.associateProcessesWithPIDs(processes) + assert.Nil(t, err) + assert.Equal(t, expectedPID, result[processes[0]]) + + processes = []string{"process2"} + expectedPID = "1002,1003" + result, err = rdt.associateProcessesWithPIDs(processes) + assert.Nil(t, err) + assert.Equal(t, expectedPID, result[processes[0]]) + + processes = []string{"process1"} + result, err = rdt.associateProcessesWithPIDs(processes) + assert.Nil(t, err) + assert.Len(t, result, 0) +} + +func TestSplitCSVLineIntoValues(t *testing.T) { + line := "2020-08-12 13:34:36,\"45417,29170\",37,44,0.00,0,0.0,0.0,0.0,0.0" + expectedTimeValue := "2020-08-12 13:34:36" + expectedMetricsValue := []string{"0.00", "0", "0.0", "0.0", "0.0", "0.0"} + expectedCoreOrPidsValue := []string{"\"45417", "29170\"", "37", "44"} + + timeValue, metricsValue, coreOrPidsValue, err := splitCSVLineIntoValues(line) + assert.Nil(t, err) + assert.Equal(t, expectedTimeValue, timeValue) + assert.Equal(t, expectedMetricsValue, metricsValue) + assert.Equal(t, expectedCoreOrPidsValue, coreOrPidsValue) + + wrongLine := "2020-08-12 13:34:36,37,44,0.00,0,0.0" + timeValue, metricsValue, coreOrPidsValue, err = splitCSVLineIntoValues(wrongLine) + assert.NotNil(t, err) + assert.Equal(t, "", timeValue) + assert.Nil(t, nil, metricsValue) + assert.Nil(t, nil, coreOrPidsValue) +} + +func TestFindPIDsInMeasurement(t *testing.T) { + line := "2020-08-12 13:34:36,\"45417,29170\"" + expected := "45417,29170" + result, err := findPIDsInMeasurement(line) + assert.Nil(t, err) + assert.Equal(t, expected, result) + + line = "pids not included" + result, err = findPIDsInMeasurement(line) + assert.NotNil(t, err) + assert.Equal(t, "", result) +} + +func TestCreateArgsProcesses(t *testing.T) { + processesPIDs := map[string]string{ + "process": "12345, 99999", + } + expected := "--mon-pid=all:[12345, 99999];mbt:[12345, 99999];" + result := createArgProcess(processesPIDs) + assert.EqualValues(t, expected, result) + + processesPIDs = map[string]string{ + "process": "12345, 99999", + "process2": "44444, 11111", + } + expectedPrefix := "--mon-pid=" + expectedSubstring := "all:[12345, 99999];mbt:[12345, 99999];" + expectedSubstring2 := "all:[44444, 11111];mbt:[44444, 11111];" + result = createArgProcess(processesPIDs) + assert.Contains(t, result, expectedPrefix) + assert.Contains(t, result, expectedSubstring) + assert.Contains(t, result, expectedSubstring2) +} + +func TestCreateArgsCores(t *testing.T) { + cores := []string{"1,2,3"} + expected := "--mon-core=all:[1,2,3];mbt:[1,2,3];" + result := createArgCores(cores) + assert.EqualValues(t, expected, result) + + cores = []string{"1,2,3", "4,5,6"} + expected = "--mon-core=" + expectedPrefix := "--mon-core=" + expectedSubstring := "all:[1,2,3];mbt:[1,2,3];" + expectedSubstring2 := "all:[4,5,6];mbt:[4,5,6];" + result = createArgCores(cores) + assert.Contains(t, result, expectedPrefix) + assert.Contains(t, result, expectedSubstring) + assert.Contains(t, result, expectedSubstring2) +} + +func TestParseCoresConfig(t *testing.T) { + t.Run("empty slice", func(t *testing.T) { + var configCores []string + result, err := parseCoresConfig(configCores) + assert.Nil(t, err) + assert.Nil(t, result) + }) + + t.Run("empty string in slice", func(t *testing.T) { + configCores := []string{""} + result, err := parseCoresConfig(configCores) + assert.NotNil(t, err) + assert.Nil(t, result) + }) + + t.Run("not correct string", func(t *testing.T) { + configCores := []string{"wrong string"} + result, err := parseCoresConfig(configCores) + assert.NotNil(t, err) + assert.Nil(t, result) + }) + + t.Run("not correct string", func(t *testing.T) { + configCores := []string{"1,2", "wasd:#$!;"} + result, err := parseCoresConfig(configCores) + assert.NotNil(t, err) + assert.Nil(t, result) + }) + + t.Run("not correct string", func(t *testing.T) { + configCores := []string{"1,2,2"} + result, err := parseCoresConfig(configCores) + assert.NotNil(t, err) + assert.Nil(t, result) + }) + + t.Run("coma separated cores - positive", func(t *testing.T) { + configCores := []string{"0,1,2,3,4,5"} + expected := []string{"0,1,2,3,4,5"} + result, err := parseCoresConfig(configCores) + assert.Nil(t, err) + assert.EqualValues(t, expected, result) + + configCores = []string{"0,1,2", "3,4,5"} + expected = []string{"0,1,2", "3,4,5"} + result, err = parseCoresConfig(configCores) + assert.Nil(t, err) + assert.EqualValues(t, expected, result) + + configCores = []string{"0,4,1", "2,3,5", "9"} + expected = []string{"0,4,1", "2,3,5", "9"} + result, err = parseCoresConfig(configCores) + assert.Nil(t, err) + assert.EqualValues(t, expected, result) + }) + + t.Run("coma separated cores - negative", func(t *testing.T) { + // cannot monitor same cores in different groups + configCores := []string{"0,1,2", "2"} + result, err := parseCoresConfig(configCores) + assert.NotNil(t, err) + assert.Nil(t, result) + + configCores = []string{"0,1,2", "2,3,4"} + result, err = parseCoresConfig(configCores) + assert.NotNil(t, err) + assert.Nil(t, result) + + configCores = []string{"0,-1,2", "2,3,4"} + result, err = parseCoresConfig(configCores) + assert.NotNil(t, err) + assert.Nil(t, result) + }) + + t.Run("dash separated cores - positive", func(t *testing.T) { + configCores := []string{"0-5"} + expected := []string{"0,1,2,3,4,5"} + result, err := parseCoresConfig(configCores) + assert.Nil(t, err) + assert.EqualValues(t, expected, result) + + configCores = []string{"0-5", "7-10"} + expected = []string{"0,1,2,3,4,5", "7,8,9,10"} + result, err = parseCoresConfig(configCores) + assert.Nil(t, err) + assert.EqualValues(t, expected, result) + + configCores = []string{"5-5"} + expected = []string{"5"} + result, err = parseCoresConfig(configCores) + assert.Nil(t, err) + assert.EqualValues(t, expected, result) + }) + + t.Run("dash separated cores - negative", func(t *testing.T) { + // cannot monitor same cores in different groups + configCores := []string{"0-5", "2-7"} + result, err := parseCoresConfig(configCores) + assert.NotNil(t, err) + assert.Nil(t, result) + + // more than two values in range + configCores = []string{"0-5-10"} + result, err = parseCoresConfig(configCores) + assert.NotNil(t, err) + assert.Nil(t, result) + + // first value cannot be higher than second + configCores = []string{"12-5"} + result, err = parseCoresConfig(configCores) + assert.NotNil(t, err) + assert.Nil(t, result) + + configCores = []string{"0-"} + result, err = parseCoresConfig(configCores) + assert.NotNil(t, err) + assert.Nil(t, result) + }) + + t.Run("mixed separator - positive", func(t *testing.T) { + configCores := []string{"0-5,6,7"} + expected := []string{"0,1,2,3,4,5,6,7"} + result, err := parseCoresConfig(configCores) + assert.Nil(t, err) + assert.EqualValues(t, expected, result) + + configCores = []string{"0-5,6,7", "8,9,10"} + expected = []string{"0,1,2,3,4,5,6,7", "8,9,10"} + result, err = parseCoresConfig(configCores) + assert.Nil(t, err) + assert.EqualValues(t, expected, result) + + configCores = []string{"0-7", "8-10"} + expected = []string{"0,1,2,3,4,5,6,7", "8,9,10"} + result, err = parseCoresConfig(configCores) + assert.Nil(t, err) + assert.EqualValues(t, expected, result) + }) + + t.Run("mixed separator - negative", func(t *testing.T) { + // cannot monitor same cores in different groups + configCores := []string{"0-5,", "2-7"} + result, err := parseCoresConfig(configCores) + assert.NotNil(t, err) + assert.Nil(t, result) + + // cores cannot be duplicated + configCores = []string{"0-5,5"} + result, err = parseCoresConfig(configCores) + assert.NotNil(t, err) + assert.Nil(t, result) + + // more than two values in range + configCores = []string{"0-5-6,9"} + result, err = parseCoresConfig(configCores) + assert.NotNil(t, err) + assert.Nil(t, result) + }) +} diff --git a/plugins/inputs/intel_rdt/intel_rdt_windows.go b/plugins/inputs/intel_rdt/intel_rdt_windows.go new file mode 100644 index 0000000000000..e3ab0978fb374 --- /dev/null +++ b/plugins/inputs/intel_rdt/intel_rdt_windows.go @@ -0,0 +1,3 @@ +// +build windows + +package intel_rdt diff --git a/plugins/inputs/intel_rdt/processes.go b/plugins/inputs/intel_rdt/processes.go new file mode 100644 index 0000000000000..ff86a4e6b745c --- /dev/null +++ b/plugins/inputs/intel_rdt/processes.go @@ -0,0 +1,40 @@ +// +build !windows + +package intel_rdt + +import "github.com/prometheus/procfs" + +type ProcessesHandler interface { + getAllProcesses() ([]Process, error) +} + +type Process struct { + Name string + PID int +} + +type ProcessManager struct{} + +func NewProcessor() ProcessesHandler { + return &ProcessManager{} +} + +func (p *ProcessManager) getAllProcesses() ([]Process, error) { + var processes []Process + allProcesses, err := procfs.AllProcs() + if err != nil { + return nil, err + } + for _, proc := range allProcesses { + procComm, err := proc.Comm() + if err != nil { + continue + } + newProcess := Process{ + PID: proc.PID, + Name: procComm, + } + processes = append(processes, newProcess) + } + return processes, nil +} diff --git a/plugins/inputs/intel_rdt/publisher.go b/plugins/inputs/intel_rdt/publisher.go new file mode 100644 index 0000000000000..5ca9890472b27 --- /dev/null +++ b/plugins/inputs/intel_rdt/publisher.go @@ -0,0 +1,171 @@ +// +build !windows + +package intel_rdt + +import ( + "context" + "strings" + "time" + + "github.com/influxdata/telegraf" +) + +// Publisher for publish new RDT metrics to telegraf accumulator +type Publisher struct { + acc telegraf.Accumulator + Log telegraf.Logger + shortenedMetrics bool + BufferChanProcess chan processMeasurement + BufferChanCores chan string + errChan chan error + stopChan chan bool +} + +func NewPublisher(acc telegraf.Accumulator, log telegraf.Logger, shortenedMetrics bool) Publisher { + return Publisher{ + acc: acc, + Log: log, + shortenedMetrics: shortenedMetrics, + BufferChanProcess: make(chan processMeasurement), + BufferChanCores: make(chan string), + errChan: make(chan error), + } +} + +func (p *Publisher) publish(ctx context.Context) { + go func() { + for { + select { + case newMeasurements := <-p.BufferChanCores: + p.publishCores(newMeasurements) + case newMeasurements := <-p.BufferChanProcess: + p.publishProcess(newMeasurements) + case err := <-p.errChan: + p.Log.Error(err) + case <-ctx.Done(): + return + } + } + }() +} + +func (p *Publisher) publishCores(measurement string) { + coresString, values, timestamp, err := parseCoresMeasurement(measurement) + if err != nil { + p.errChan <- err + } + p.addToAccumulatorCores(coresString, values, timestamp) + return +} + +func (p *Publisher) publishProcess(measurement processMeasurement) { + process, coresString, values, timestamp, err := parseProcessesMeasurement(measurement) + if err != nil { + p.errChan <- err + } + p.addToAccumulatorProcesses(process, coresString, values, timestamp) + return +} + +func parseCoresMeasurement(measurements string) (string, []float64, time.Time, error) { + var values []float64 + timeValue, metricsValues, cores, err := splitCSVLineIntoValues(measurements) + if err != nil { + return "", nil, time.Time{}, err + } + timestamp, err := parseTime(timeValue) + if err != nil { + return "", nil, time.Time{}, err + } + // change string slice to one string and separate it by coma + coresString := strings.Join(cores, ",") + // trim unwanted quotes + coresString = strings.Trim(coresString, "\"") + + for _, metric := range metricsValues { + parsedValue, err := parseFloat(metric) + if err != nil { + return "", nil, time.Time{}, err + } + values = append(values, parsedValue) + } + return coresString, values, timestamp, nil +} + +func (p *Publisher) addToAccumulatorCores(cores string, metricsValues []float64, timestamp time.Time) { + for i, value := range metricsValues { + if p.shortenedMetrics { + //0: "IPC" + //1: "LLC_Misses" + if i == 0 || i == 1 { + continue + } + } + tags := map[string]string{} + fields := make(map[string]interface{}) + + tags["cores"] = cores + tags["name"] = pqosMetricOrder[i] + fields["value"] = value + + p.acc.AddFields("rdt_metric", fields, tags, timestamp) + } +} + +func parseProcessesMeasurement(measurement processMeasurement) (string, string, []float64, time.Time, error) { + var values []float64 + timeValue, metricsValues, coreOrPidsValues, pids, err := parseProcessMeasurement(measurement.measurement) + if err != nil { + return "", "", nil, time.Time{}, err + } + timestamp, err := parseTime(timeValue) + if err != nil { + return "", "", nil, time.Time{}, err + } + actualProcess := measurement.name + lenOfPids := len(strings.Split(pids, ",")) + cores := coreOrPidsValues[lenOfPids:] + coresString := strings.Trim(strings.Join(cores, ","), `"`) + + for _, metric := range metricsValues { + parsedValue, err := parseFloat(metric) + if err != nil { + return "", "", nil, time.Time{}, err + } + values = append(values, parsedValue) + } + return actualProcess, coresString, values, timestamp, nil +} + +func (p *Publisher) addToAccumulatorProcesses(process string, cores string, metricsValues []float64, timestamp time.Time) { + for i, value := range metricsValues { + if p.shortenedMetrics { + //0: "IPC" + //1: "LLC_Misses" + if i == 0 || i == 1 { + continue + } + } + tags := map[string]string{} + fields := make(map[string]interface{}) + + tags["process"] = process + tags["cores"] = cores + tags["name"] = pqosMetricOrder[i] + fields["value"] = value + + p.acc.AddFields("rdt_metric", fields, tags, timestamp) + } +} + +func parseProcessMeasurement(measurements string) (string, []string, []string, string, error) { + timeValue, metricsValues, coreOrPidsValues, err := splitCSVLineIntoValues(measurements) + if err != nil { + return "", nil, nil, "", err + } + pids, err := findPIDsInMeasurement(measurements) + if err != nil { + return "", nil, nil, "", err + } + return timeValue, metricsValues, coreOrPidsValues, pids, nil +} diff --git a/plugins/inputs/intel_rdt/publisher_test.go b/plugins/inputs/intel_rdt/publisher_test.go new file mode 100644 index 0000000000000..5248ede7a16db --- /dev/null +++ b/plugins/inputs/intel_rdt/publisher_test.go @@ -0,0 +1,444 @@ +// +build !windows + +package intel_rdt + +import ( + "fmt" + "testing" + "time" + + "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/assert" +) + +var metricsValues = map[string]float64{ + "IPC": 0.5, + "LLC_Misses": 61650, + "LLC": 1632, + "MBL": 0.6, + "MBR": 0.9, + "MBT": 1.9, +} + +func TestParseCoresMeasurement(t *testing.T) { + timestamp := "2020-08-12 13:34:36" + cores := "\"37,44\"" + + t.Run("valid measurement string", func(t *testing.T) { + measurement := fmt.Sprintf("%s,%s,%f,%f,%f,%f,%f,%f", + timestamp, + cores, + metricsValues["IPC"], + metricsValues["LLC_Misses"], + metricsValues["LLC"], + metricsValues["MBL"], + metricsValues["MBR"], + metricsValues["MBT"]) + + expectedCores := "37,44" + expectedTimestamp := time.Date(2020, 8, 12, 13, 34, 36, 0, time.UTC) + + resultCoresString, resultValues, resultTimestamp, err := parseCoresMeasurement(measurement) + + assert.Nil(t, err) + assert.Equal(t, expectedCores, resultCoresString) + assert.Equal(t, expectedTimestamp, resultTimestamp) + assert.Equal(t, resultValues[0], metricsValues["IPC"]) + assert.Equal(t, resultValues[1], metricsValues["LLC_Misses"]) + assert.Equal(t, resultValues[2], metricsValues["LLC"]) + assert.Equal(t, resultValues[3], metricsValues["MBL"]) + assert.Equal(t, resultValues[4], metricsValues["MBR"]) + assert.Equal(t, resultValues[5], metricsValues["MBT"]) + }) + t.Run("not valid measurement string", func(t *testing.T) { + measurement := "not, valid, measurement" + + resultCoresString, resultValues, resultTimestamp, err := parseCoresMeasurement(measurement) + + assert.NotNil(t, err) + assert.Equal(t, "", resultCoresString) + assert.Nil(t, resultValues) + assert.Equal(t, time.Time{}, resultTimestamp) + }) + t.Run("not valid values string", func(t *testing.T) { + measurement := fmt.Sprintf("%s,%s,%s,%s,%f,%f,%f,%f", + timestamp, + cores, + "%d", + "in", + metricsValues["LLC"], + metricsValues["MBL"], + metricsValues["MBR"], + metricsValues["MBT"]) + + resultCoresString, resultValues, resultTimestamp, err := parseCoresMeasurement(measurement) + + assert.NotNil(t, err) + assert.Equal(t, "", resultCoresString) + assert.Nil(t, resultValues) + assert.Equal(t, time.Time{}, resultTimestamp) + }) + t.Run("not valid timestamp format", func(t *testing.T) { + invalidTimestamp := "2020-08-12-21 13:34:" + measurement := fmt.Sprintf("%s,%s,%f,%f,%f,%f,%f,%f", + invalidTimestamp, + cores, + metricsValues["IPC"], + metricsValues["LLC_Misses"], + metricsValues["LLC"], + metricsValues["MBL"], + metricsValues["MBR"], + metricsValues["MBT"]) + + resultCoresString, resultValues, resultTimestamp, err := parseCoresMeasurement(measurement) + + assert.NotNil(t, err) + assert.Equal(t, "", resultCoresString) + assert.Nil(t, resultValues) + assert.Equal(t, time.Time{}, resultTimestamp) + }) +} + +func TestParseProcessesMeasurement(t *testing.T) { + timestamp := "2020-08-12 13:34:36" + cores := "\"37,44\"" + pids := "\"12345,9999\"" + processName := "process_name" + + t.Run("valid measurement string", func(t *testing.T) { + measurement := fmt.Sprintf("%s,%s,%s,%f,%f,%f,%f,%f,%f", + timestamp, + pids, + cores, + metricsValues["IPC"], + metricsValues["LLC_Misses"], + metricsValues["LLC"], + metricsValues["MBL"], + metricsValues["MBR"], + metricsValues["MBT"]) + + expectedCores := "37,44" + expectedTimestamp := time.Date(2020, 8, 12, 13, 34, 36, 0, time.UTC) + + newMeasurement := processMeasurement{ + name: processName, + measurement: measurement, + } + actualProcess, resultCoresString, resultValues, resultTimestamp, err := parseProcessesMeasurement(newMeasurement) + + assert.Nil(t, err) + assert.Equal(t, processName, actualProcess) + assert.Equal(t, expectedCores, resultCoresString) + assert.Equal(t, expectedTimestamp, resultTimestamp) + assert.Equal(t, resultValues[0], metricsValues["IPC"]) + assert.Equal(t, resultValues[1], metricsValues["LLC_Misses"]) + assert.Equal(t, resultValues[2], metricsValues["LLC"]) + assert.Equal(t, resultValues[3], metricsValues["MBL"]) + assert.Equal(t, resultValues[4], metricsValues["MBR"]) + assert.Equal(t, resultValues[5], metricsValues["MBT"]) + }) + t.Run("not valid measurement string", func(t *testing.T) { + processName := "process_name" + measurement := "invalid,measurement,format" + + newMeasurement := processMeasurement{ + name: processName, + measurement: measurement, + } + actualProcess, resultCoresString, resultValues, resultTimestamp, err := parseProcessesMeasurement(newMeasurement) + + assert.NotNil(t, err) + assert.Equal(t, "", actualProcess) + assert.Equal(t, "", resultCoresString) + assert.Nil(t, resultValues) + assert.Equal(t, time.Time{}, resultTimestamp) + }) + t.Run("not valid timestamp format", func(t *testing.T) { + invalidTimestamp := "2020-20-20-31" + measurement := fmt.Sprintf("%s,%s,%s,%f,%f,%f,%f,%f,%f", + invalidTimestamp, + pids, + cores, + metricsValues["IPC"], + metricsValues["LLC_Misses"], + metricsValues["LLC"], + metricsValues["MBL"], + metricsValues["MBR"], + metricsValues["MBT"]) + + newMeasurement := processMeasurement{ + name: processName, + measurement: measurement, + } + actualProcess, resultCoresString, resultValues, resultTimestamp, err := parseProcessesMeasurement(newMeasurement) + + assert.NotNil(t, err) + assert.Equal(t, "", actualProcess) + assert.Equal(t, "", resultCoresString) + assert.Nil(t, resultValues) + assert.Equal(t, time.Time{}, resultTimestamp) + }) + t.Run("not valid values string", func(t *testing.T) { + measurement := fmt.Sprintf("%s,%s,%s,%s,%s,%f,%f,%f,%f", + timestamp, + pids, + cores, + "1##", + "da", + metricsValues["LLC"], + metricsValues["MBL"], + metricsValues["MBR"], + metricsValues["MBT"]) + + newMeasurement := processMeasurement{ + name: processName, + measurement: measurement, + } + actualProcess, resultCoresString, resultValues, resultTimestamp, err := parseProcessesMeasurement(newMeasurement) + + assert.NotNil(t, err) + assert.Equal(t, "", actualProcess) + assert.Equal(t, "", resultCoresString) + assert.Nil(t, resultValues) + assert.Equal(t, time.Time{}, resultTimestamp) + }) +} + +func TestAddToAccumulatorCores(t *testing.T) { + t.Run("shortened false", func(t *testing.T) { + var acc testutil.Accumulator + publisher := Publisher{acc: &acc} + + cores := "1,2,3" + metricsValues := []float64{1, 2, 3, 4, 5, 6} + timestamp := time.Date(2020, 8, 12, 13, 34, 36, 0, time.UTC) + + publisher.addToAccumulatorCores(cores, metricsValues, timestamp) + + for _, test := range testCoreMetrics { + acc.AssertContainsTaggedFields(t, "rdt_metric", test.fields, test.tags) + } + }) + t.Run("shortened true", func(t *testing.T) { + var acc testutil.Accumulator + publisher := Publisher{acc: &acc, shortenedMetrics: true} + + cores := "1,2,3" + metricsValues := []float64{1, 2, 3, 4, 5, 6} + timestamp := time.Date(2020, 8, 12, 13, 34, 36, 0, time.UTC) + + publisher.addToAccumulatorCores(cores, metricsValues, timestamp) + + for _, test := range testCoreMetricsShortened { + acc.AssertDoesNotContainsTaggedFields(t, "rdt_metric", test.fields, test.tags) + } + }) +} + +func TestAddToAccumulatorProcesses(t *testing.T) { + t.Run("shortened false", func(t *testing.T) { + var acc testutil.Accumulator + publisher := Publisher{acc: &acc} + + process := "process_name" + cores := "1,2,3" + metricsValues := []float64{1, 2, 3, 4, 5, 6} + timestamp := time.Date(2020, 8, 12, 13, 34, 36, 0, time.UTC) + + publisher.addToAccumulatorProcesses(process, cores, metricsValues, timestamp) + + for _, test := range testCoreProcesses { + acc.AssertContainsTaggedFields(t, "rdt_metric", test.fields, test.tags) + } + }) + t.Run("shortened true", func(t *testing.T) { + var acc testutil.Accumulator + publisher := Publisher{acc: &acc, shortenedMetrics: true} + + process := "process_name" + cores := "1,2,3" + metricsValues := []float64{1, 2, 3, 4, 5, 6} + timestamp := time.Date(2020, 8, 12, 13, 34, 36, 0, time.UTC) + + publisher.addToAccumulatorProcesses(process, cores, metricsValues, timestamp) + + for _, test := range testCoreProcessesShortened { + acc.AssertDoesNotContainsTaggedFields(t, "rdt_metric", test.fields, test.tags) + } + }) +} + +var ( + testCoreMetrics = []struct { + fields map[string]interface{} + tags map[string]string + }{ + { + map[string]interface{}{ + "value": float64(1), + }, + map[string]string{ + "cores": "1,2,3", + "name": "IPC", + }, + }, + { + map[string]interface{}{ + "value": float64(2), + }, + map[string]string{ + "cores": "1,2,3", + "name": "LLC_Misses", + }, + }, + { + map[string]interface{}{ + "value": float64(3), + }, + map[string]string{ + "cores": "1,2,3", + "name": "LLC", + }, + }, + { + map[string]interface{}{ + "value": float64(4), + }, + map[string]string{ + "cores": "1,2,3", + "name": "MBL", + }, + }, + { + map[string]interface{}{ + "value": float64(5), + }, + map[string]string{ + "cores": "1,2,3", + "name": "MBR", + }, + }, + { + map[string]interface{}{ + "value": float64(6), + }, + map[string]string{ + "cores": "1,2,3", + "name": "MBT", + }, + }, + } + testCoreMetricsShortened = []struct { + fields map[string]interface{} + tags map[string]string + }{ + { + map[string]interface{}{ + "value": float64(1), + }, + map[string]string{ + "cores": "1,2,3", + "name": "IPC", + }, + }, + { + map[string]interface{}{ + "value": float64(2), + }, + map[string]string{ + "cores": "1,2,3", + "name": "LLC_Misses", + }, + }, + } + testCoreProcesses = []struct { + fields map[string]interface{} + tags map[string]string + }{ + { + map[string]interface{}{ + "value": float64(1), + }, + map[string]string{ + "cores": "1,2,3", + "name": "IPC", + "process": "process_name", + }, + }, + { + map[string]interface{}{ + "value": float64(2), + }, + map[string]string{ + "cores": "1,2,3", + "name": "LLC_Misses", + "process": "process_name", + }, + }, + { + map[string]interface{}{ + "value": float64(3), + }, + map[string]string{ + "cores": "1,2,3", + "name": "LLC", + "process": "process_name", + }, + }, + { + map[string]interface{}{ + "value": float64(4), + }, + map[string]string{ + "cores": "1,2,3", + "name": "MBL", + "process": "process_name", + }, + }, + { + map[string]interface{}{ + "value": float64(5), + }, + map[string]string{ + "cores": "1,2,3", + "name": "MBR", + "process": "process_name", + }, + }, + { + map[string]interface{}{ + "value": float64(6), + }, + map[string]string{ + "cores": "1,2,3", + "name": "MBT", + "process": "process_name", + }, + }, + } + testCoreProcessesShortened = []struct { + fields map[string]interface{} + tags map[string]string + }{ + { + map[string]interface{}{ + "value": float64(1), + }, + map[string]string{ + "cores": "1,2,3", + "name": "IPC", + "process": "process_name", + }, + }, + { + map[string]interface{}{ + "value": float64(2), + }, + map[string]string{ + "cores": "1,2,3", + "name": "LLC_Misses", + "process": "process_name", + }, + }, + } +) diff --git a/plugins/inputs/ipmi_sensor/connection.go b/plugins/inputs/ipmi_sensor/connection.go index 8ce5e34488004..7f6a4c3594f61 100644 --- a/plugins/inputs/ipmi_sensor/connection.go +++ b/plugins/inputs/ipmi_sensor/connection.go @@ -22,7 +22,6 @@ func NewConnection(server string, privilege string) *Connection { conn.Privilege = privilege inx1 := strings.LastIndex(server, "@") inx2 := strings.Index(server, "(") - inx3 := strings.Index(server, ")") connstr := server @@ -36,7 +35,7 @@ func NewConnection(server string, privilege string) *Connection { if inx2 > 0 { inx2 = strings.Index(connstr, "(") - inx3 = strings.Index(connstr, ")") + inx3 := strings.Index(connstr, ")") conn.Interface = connstr[0:inx2] conn.Hostname = connstr[inx2+1 : inx3] diff --git a/plugins/inputs/jenkins/jenkins.go b/plugins/inputs/jenkins/jenkins.go index f4882b1dc0bf2..a2d3e3500bc30 100644 --- a/plugins/inputs/jenkins/jenkins.go +++ b/plugins/inputs/jenkins/jenkins.go @@ -439,7 +439,9 @@ type jobRequest struct { } func (jr jobRequest) combined() []string { - return append(jr.parents, jr.name) + path := make([]string, len(jr.parents)) + copy(path, jr.parents) + return append(path, jr.name) } func (jr jobRequest) combinedEscaped() []string { diff --git a/plugins/inputs/jenkins/jenkins_test.go b/plugins/inputs/jenkins/jenkins_test.go index b8284fc0d3348..be899476d8595 100644 --- a/plugins/inputs/jenkins/jenkins_test.go +++ b/plugins/inputs/jenkins/jenkins_test.go @@ -602,6 +602,59 @@ func TestGatherJobs(t *testing.T) { }, }, }, + { + name: "gather metrics for nested jobs with space exercising append slice behaviour", + input: mockHandler{ + responseMap: map[string]interface{}{ + "/api/json": &jobResponse{ + Jobs: []innerJob{ + {Name: "l1"}, + }, + }, + "/job/l1/api/json": &jobResponse{ + Jobs: []innerJob{ + {Name: "l2"}, + }, + }, + "/job/l1/job/l2/api/json": &jobResponse{ + Jobs: []innerJob{ + {Name: "job 1"}, + }, + }, + "/job/l1/job/l2/job/job%201/api/json": &jobResponse{ + Jobs: []innerJob{ + {Name: "job 2"}, + }, + }, + "/job/l1/job/l2/job/job%201/job/job%202/api/json": &jobResponse{ + LastBuild: jobBuild{ + Number: 3, + }, + }, + "/job/l1/job/l2/job/job%201/job/job%202/3/api/json": &buildResponse{ + Building: false, + Result: "SUCCESS", + Duration: 25558, + Timestamp: (time.Now().Unix() - int64(time.Minute.Seconds())) * 1000, + }, + }, + }, + output: &testutil.Accumulator{ + Metrics: []*testutil.Metric{ + { + Tags: map[string]string{ + "name": "job 2", + "parents": "l1/l2/job 1", + "result": "SUCCESS", + }, + Fields: map[string]interface{}{ + "duration": int64(25558), + "result_code": 0, + }, + }, + }, + }, + }, { name: "gather sub jobs, jobs filter", input: mockHandler{ diff --git a/plugins/inputs/jolokia/jolokia.go b/plugins/inputs/jolokia/jolokia.go index a9eb0b24bd26f..317a47efbd115 100644 --- a/plugins/inputs/jolokia/jolokia.go +++ b/plugins/inputs/jolokia/jolokia.go @@ -234,9 +234,11 @@ func (j *Jolokia) prepareRequest(server Server, metrics []Metric) (*http.Request } requestBody, err := json.Marshal(bulkBodyContent) + if err != nil { + return nil, err + } req, err := http.NewRequest("POST", jolokiaUrl.String(), bytes.NewBuffer(requestBody)) - if err != nil { return nil, err } diff --git a/plugins/inputs/jolokia2/client.go b/plugins/inputs/jolokia2/client.go index efa6db400692d..90aa9c0db7fce 100644 --- a/plugins/inputs/jolokia2/client.go +++ b/plugins/inputs/jolokia2/client.go @@ -131,6 +131,10 @@ func (c *Client) read(requests []ReadRequest) ([]ReadResponse, error) { } req, err := http.NewRequest("POST", requestUrl, bytes.NewBuffer(requestBody)) + if err != nil { + return nil, fmt.Errorf("unable to create new request '%s': %s", requestUrl, err) + } + req.Header.Add("Content-type", "application/json") resp, err := c.client.Do(req) diff --git a/plugins/inputs/kibana/kibana.go b/plugins/inputs/kibana/kibana.go index 7a2f7ae3d12fa..98b81a91f52b9 100644 --- a/plugins/inputs/kibana/kibana.go +++ b/plugins/inputs/kibana/kibana.go @@ -238,8 +238,10 @@ func (k *Kibana) gatherKibanaStatus(baseUrl string, acc telegraf.Accumulator) er } func (k *Kibana) gatherJsonData(url string, v interface{}) (host string, err error) { - request, err := http.NewRequest("GET", url, nil) + if err != nil { + return "", fmt.Errorf("unable to create new request '%s': %v", url, err) + } if (k.Username != "") || (k.Password != "") { request.SetBasicAuth(k.Username, k.Password) diff --git a/plugins/inputs/mcrouter/mcrouter.go b/plugins/inputs/mcrouter/mcrouter.go index 1ae5c79ea7c7e..d6303c87758e4 100644 --- a/plugins/inputs/mcrouter/mcrouter.go +++ b/plugins/inputs/mcrouter/mcrouter.go @@ -198,9 +198,11 @@ func (m *Mcrouter) gatherServer(ctx context.Context, address string, acc telegra var dialer net.Dialer address, protocol, err = m.ParseAddress(address) + if err != nil { + return err + } conn, err = dialer.DialContext(ctx, protocol, address) - if err != nil { return err } diff --git a/plugins/inputs/net_response/net_response.go b/plugins/inputs/net_response/net_response.go index 3f75a6058115d..023b4405e3609 100644 --- a/plugins/inputs/net_response/net_response.go +++ b/plugins/inputs/net_response/net_response.go @@ -141,6 +141,11 @@ func (n *NetResponse) UDPGather() (tags map[string]string, fields map[string]int start := time.Now() // Resolving udpAddr, err := net.ResolveUDPAddr("udp", n.Address) + // Handle error + if err != nil { + setResult(ConnectionFailed, fields, tags, n.Expect) + return tags, fields + } // Connecting conn, err := net.DialUDP("udp", nil, udpAddr) // Handle error diff --git a/plugins/inputs/openldap/openldap.go b/plugins/inputs/openldap/openldap.go index bafb5db892414..d5ed7e4cc1c3f 100644 --- a/plugins/inputs/openldap/openldap.go +++ b/plugins/inputs/openldap/openldap.go @@ -123,6 +123,10 @@ func (o *Openldap) Gather(acc telegraf.Accumulator) error { return nil } err = l.StartTLS(tlsConfig) + if err != nil { + acc.AddError(err) + return nil + } } else { acc.AddError(fmt.Errorf("Invalid setting for ssl: %s", o.TLS)) return nil diff --git a/plugins/inputs/phpfpm/phpfpm.go b/plugins/inputs/phpfpm/phpfpm.go index d6b3681209272..f191844a34d56 100644 --- a/plugins/inputs/phpfpm/phpfpm.go +++ b/plugins/inputs/phpfpm/phpfpm.go @@ -198,21 +198,22 @@ func (p *phpfpm) gatherFcgi(fcgi *conn, statusPath string, acc telegraf.Accumula func (p *phpfpm) gatherHttp(addr string, acc telegraf.Accumulator) error { u, err := url.Parse(addr) if err != nil { - return fmt.Errorf("Unable parse server address '%s': %s", addr, err) + return fmt.Errorf("unable parse server address '%s': %v", addr, err) + } + + req, err := http.NewRequest("GET", fmt.Sprintf("%s://%s%s", u.Scheme, u.Host, u.Path), nil) + if err != nil { + return fmt.Errorf("unable to create new request '%s': %v", addr, err) } - req, err := http.NewRequest("GET", fmt.Sprintf("%s://%s%s", u.Scheme, - u.Host, u.Path), nil) res, err := p.client.Do(req) if err != nil { - return fmt.Errorf("Unable to connect to phpfpm status page '%s': %v", - addr, err) + return fmt.Errorf("unable to connect to phpfpm status page '%s': %v", addr, err) } defer res.Body.Close() if res.StatusCode != 200 { - return fmt.Errorf("Unable to get valid stat result from '%s': %v", - addr, err) + return fmt.Errorf("unable to get valid stat result from '%s': %v", addr, err) } importMetric(res.Body, acc, addr) @@ -220,7 +221,7 @@ func (p *phpfpm) gatherHttp(addr string, acc telegraf.Accumulator) error { } // Import stat data into Telegraf system -func importMetric(r io.Reader, acc telegraf.Accumulator, addr string) (poolStat, error) { +func importMetric(r io.Reader, acc telegraf.Accumulator, addr string) poolStat { stats := make(poolStat) var currentPool string @@ -273,7 +274,7 @@ func importMetric(r io.Reader, acc telegraf.Accumulator, addr string) (poolStat, acc.AddFields("phpfpm", fields, tags) } - return stats, nil + return stats } func expandUrls(urls []string) ([]string, error) { diff --git a/plugins/inputs/phpfpm/phpfpm_test.go b/plugins/inputs/phpfpm/phpfpm_test.go index 5f68b07f5dbae..f3b72a8281b7e 100644 --- a/plugins/inputs/phpfpm/phpfpm_test.go +++ b/plugins/inputs/phpfpm/phpfpm_test.go @@ -301,7 +301,7 @@ func TestPhpFpmGeneratesMetrics_Throw_Error_When_Fpm_Status_Is_Not_Responding(t err = acc.GatherError(r.Gather) require.Error(t, err) - assert.Contains(t, err.Error(), `Unable to connect to phpfpm status page 'http://aninvalidone'`) + assert.Contains(t, err.Error(), `unable to connect to phpfpm status page 'http://aninvalidone'`) assert.Contains(t, err.Error(), `lookup aninvalidone`) } diff --git a/plugins/inputs/processes/processes_notwindows.go b/plugins/inputs/processes/processes_notwindows.go index 445e7fb9f7255..b77f1db767d39 100644 --- a/plugins/inputs/processes/processes_notwindows.go +++ b/plugins/inputs/processes/processes_notwindows.go @@ -136,6 +136,9 @@ func (p *Processes) gatherFromProc(fields map[string]interface{}) error { for _, filename := range filenames { _, err := os.Stat(filename) + if err != nil { + return err + } data, err := p.readProcFile(filename) if err != nil { return err diff --git a/plugins/inputs/procstat/native_finder.go b/plugins/inputs/procstat/native_finder.go index 57d9d81c221ce..5f286dd64a63e 100644 --- a/plugins/inputs/procstat/native_finder.go +++ b/plugins/inputs/procstat/native_finder.go @@ -48,7 +48,7 @@ func (pg *NativeFinder) PidFile(path string) ([]PID, error) { return pids, fmt.Errorf("Failed to read pidfile '%s'. Error: '%s'", path, err) } - pid, err := strconv.Atoi(strings.TrimSpace(string(pidString))) + pid, err := strconv.ParseInt(strings.TrimSpace(string(pidString)), 10, 32) if err != nil { return pids, err } diff --git a/plugins/inputs/procstat/pgrep.go b/plugins/inputs/procstat/pgrep.go index 48bf76ed69e52..37f9dfc3f67a9 100644 --- a/plugins/inputs/procstat/pgrep.go +++ b/plugins/inputs/procstat/pgrep.go @@ -30,7 +30,7 @@ func (pg *Pgrep) PidFile(path string) ([]PID, error) { return pids, fmt.Errorf("Failed to read pidfile '%s'. Error: '%s'", path, err) } - pid, err := strconv.Atoi(strings.TrimSpace(string(pidString))) + pid, err := strconv.ParseInt(strings.TrimSpace(string(pidString)), 10, 32) if err != nil { return pids, err } @@ -80,13 +80,11 @@ func parseOutput(out string) ([]PID, error) { pids := []PID{} fields := strings.Fields(out) for _, field := range fields { - pid, err := strconv.Atoi(field) + pid, err := strconv.ParseInt(field, 10, 32) if err != nil { return nil, err } - if err == nil { - pids = append(pids, PID(pid)) - } + pids = append(pids, PID(pid)) } return pids, nil } diff --git a/plugins/inputs/procstat/procstat.go b/plugins/inputs/procstat/procstat.go index 61e575370537b..1d6af5df42246 100644 --- a/plugins/inputs/procstat/procstat.go +++ b/plugins/inputs/procstat/procstat.go @@ -413,7 +413,7 @@ func (p *Procstat) systemdUnitPIDs() ([]PID, error) { if len(kv[1]) == 0 || bytes.Equal(kv[1], []byte("0")) { return nil, nil } - pid, err := strconv.Atoi(string(kv[1])) + pid, err := strconv.ParseInt(string(kv[1]), 10, 32) if err != nil { return nil, fmt.Errorf("invalid pid '%s'", kv[1]) } @@ -438,7 +438,7 @@ func (p *Procstat) cgroupPIDs() ([]PID, error) { if len(pidBS) == 0 { continue } - pid, err := strconv.Atoi(string(pidBS)) + pid, err := strconv.ParseInt(string(pidBS), 10, 32) if err != nil { return nil, fmt.Errorf("invalid pid '%s'", pidBS) } diff --git a/plugins/inputs/prometheus/parser.go b/plugins/inputs/prometheus/parser.go index 6427c3f8c6d52..0726c87713b0a 100644 --- a/plugins/inputs/prometheus/parser.go +++ b/plugins/inputs/prometheus/parser.go @@ -74,8 +74,7 @@ func ParseV2(buf []byte, header http.Header) ([]telegraf.Metric, error) { } else { // standard metric // reading fields - fields := make(map[string]interface{}) - fields = getNameAndValueV2(m, metricName) + fields := getNameAndValueV2(m, metricName) // converting to telegraf metric if len(fields) > 0 { var t time.Time @@ -203,7 +202,7 @@ func Parse(buf []byte, header http.Header) ([]telegraf.Metric, error) { // reading tags tags := makeLabels(m) // reading fields - fields := make(map[string]interface{}) + var fields map[string]interface{} if mf.GetType() == dto.MetricType_SUMMARY { // summary metric fields = makeQuantiles(m) diff --git a/plugins/inputs/prometheus/prometheus.go b/plugins/inputs/prometheus/prometheus.go index b4a8204b724ba..70d72e0b0a379 100644 --- a/plugins/inputs/prometheus/prometheus.go +++ b/plugins/inputs/prometheus/prometheus.go @@ -265,7 +265,11 @@ func (p *Prometheus) gatherURL(u URLAndAddress, acc telegraf.Accumulator) error if path == "" { path = "/metrics" } - req, err = http.NewRequest("GET", "http://localhost"+path, nil) + addr := "http://localhost" + path + req, err = http.NewRequest("GET", addr, nil) + if err != nil { + return fmt.Errorf("unable to create new request '%s': %s", addr, err) + } // ignore error because it's been handled before getting here tlsCfg, _ := p.ClientConfig.TLSConfig() @@ -285,6 +289,9 @@ func (p *Prometheus) gatherURL(u URLAndAddress, acc telegraf.Accumulator) error u.URL.Path = "/metrics" } req, err = http.NewRequest("GET", u.URL.String(), nil) + if err != nil { + return fmt.Errorf("unable to create new request '%s': %s", u.URL.String(), err) + } } req.Header.Add("Accept", acceptHeader) diff --git a/plugins/inputs/proxmox/README.md b/plugins/inputs/proxmox/README.md index 767756178b1ce..ac81633a3f461 100644 --- a/plugins/inputs/proxmox/README.md +++ b/plugins/inputs/proxmox/README.md @@ -2,6 +2,8 @@ The proxmox plugin gathers metrics about containers and VMs using the Proxmox API. +Telegraf minimum version: Telegraf 1.16.0 + ### Configuration: ```toml diff --git a/plugins/inputs/proxmox/proxmox.go b/plugins/inputs/proxmox/proxmox.go index 41b74760aa869..13dcb4a95f304 100644 --- a/plugins/inputs/proxmox/proxmox.go +++ b/plugins/inputs/proxmox/proxmox.go @@ -79,6 +79,9 @@ func init() { func getNodeSearchDomain(px *Proxmox) error { apiUrl := "/nodes/" + px.hostname + "/dns" jsonData, err := px.requestFunction(px, apiUrl, http.MethodGet, nil) + if err != nil { + return err + } var nodeDns NodeDns err = json.Unmarshal(jsonData, &nodeDns) diff --git a/plugins/inputs/ras/README.md b/plugins/inputs/ras/README.md new file mode 100644 index 0000000000000..641d1f48844ac --- /dev/null +++ b/plugins/inputs/ras/README.md @@ -0,0 +1,58 @@ +# RAS Input Plugin + +The `RAS` plugin gathers and counts errors provided by [RASDaemon](https://github.com/mchehab/rasdaemon). + +### Configuration + +```toml +[[inputs.ras]] + ## Optional path to RASDaemon sqlite3 database. + ## Default: /var/lib/rasdaemon/ras-mc_event.db + # db_path = "" +``` + +In addition `RASDaemon` runs, by default, with `--enable-sqlite3` flag. In case of problems with SQLite3 database please verify this is still a default option. + +### Metrics + +- ras + - tags: + - socket_id + - fields: + - memory_read_corrected_errors + - memory_read_uncorrectable_errors + - memory_write_corrected_errors + - memory_write_uncorrectable_errors + - cache_l0_l1_errors + - tlb_instruction_errors + - cache_l2_errors + - upi_errors + - processor_base_errors + - processor_bus_errors + - internal_timer_errors + - smm_handler_code_access_violation_errors + - internal_parity_errors + - frc_errors + - external_mce_errors + - microcode_rom_parity_errors + - unclassified_mce_errors + +Please note that `processor_base_errors` is aggregate counter measuring the following MCE events: +- internal_timer_errors +- smm_handler_code_access_violation_errors +- internal_parity_errors +- frc_errors +- external_mce_errors +- microcode_rom_parity_errors +- unclassified_mce_errors + +### Permissions + +This plugin requires access to SQLite3 database from `RASDaemon`. Please make sure that user has required permissions to this database. + +### Example Output + +``` +ras,host=ubuntu,socket_id=0 external_mce_base_errors=1i,frc_errors=1i,instruction_tlb_errors=5i,internal_parity_errors=1i,internal_timer_errors=1i,l0_and_l1_cache_errors=7i,memory_read_corrected_errors=25i,memory_read_uncorrectable_errors=0i,memory_write_corrected_errors=5i,memory_write_uncorrectable_errors=0i,microcode_rom_parity_errors=1i,processor_base_errors=7i,processor_bus_errors=1i,smm_handler_code_access_violation_errors=1i,unclassified_mce_base_errors=1i 1598867393000000000 +ras,host=ubuntu level_2_cache_errors=0i,upi_errors=0i 1598867393000000000 +``` diff --git a/plugins/inputs/ras/ras.go b/plugins/inputs/ras/ras.go new file mode 100644 index 0000000000000..036402eb87438 --- /dev/null +++ b/plugins/inputs/ras/ras.go @@ -0,0 +1,294 @@ +// +build !windows + +package ras + +import ( + "database/sql" + "strconv" + "strings" + "time" + + _ "github.com/mattn/go-sqlite3" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" +) + +type Ras struct { + DbPath string + latestTimestamp time.Time + cpuSocketCounters map[int]metricCounters + serverCounters metricCounters +} + +type machineCheckError struct { + Id int + Timestamp string + SocketId int + ErrorMsg string + MciStatusMsg string +} + +type metricCounters map[string]int64 + +const ( + mceQuery = ` + SELECT + id, timestamp, error_msg, mcistatus_msg, socketid + FROM mce_record + WHERE timestamp > ? + ` + defaultDbPath = "/var/lib/rasdaemon/ras-mc_event.db" + dateLayout = "2006-01-02 15:04:05 -0700" + memoryReadCorrected = "memory_read_corrected_errors" + memoryReadUncorrected = "memory_read_uncorrectable_errors" + memoryWriteCorrected = "memory_write_corrected_errors" + memoryWriteUncorrected = "memory_write_uncorrectable_errors" + instructionCache = "cache_l0_l1_errors" + instructionTLB = "tlb_instruction_errors" + levelTwoCache = "cache_l2_errors" + upi = "upi_errors" + processorBase = "processor_base_errors" + processorBus = "processor_bus_errors" + internalTimer = "internal_timer_errors" + smmHandlerCode = "smm_handler_code_access_violation_errors" + internalParity = "internal_parity_errors" + frc = "frc_errors" + externalMCEBase = "external_mce_errors" + microcodeROMParity = "microcode_rom_parity_errors" + unclassifiedMCEBase = "unclassified_mce_errors" +) + +func (r *Ras) SampleConfig() string { + return ` + ## Optional path to RASDaemon sqlite3 database. + ## Default: /var/lib/rasdaemon/ras-mc_event.db + # db_path = "" +` +} + +func (r *Ras) Description() string { + return "RAS plugin exposes counter metrics for Machine Check Errors provided by RASDaemon (sqlite3 output is required)." +} + +func (r *Ras) Gather(acc telegraf.Accumulator) error { + db, err := connectToDB(r.DbPath) + if err != nil { + return err + } + defer db.Close() + + rows, err := db.Query(mceQuery, r.latestTimestamp) + if err != nil { + return err + } + defer rows.Close() + + for rows.Next() { + mcError, err := fetchMachineCheckError(rows) + if err != nil { + return err + } + tsErr := r.updateLatestTimestamp(mcError.Timestamp) + if tsErr != nil { + return err + } + r.updateCounters(mcError) + } + + addCpuSocketMetrics(acc, r.cpuSocketCounters) + addServerMetrics(acc, r.serverCounters) + + return nil +} + +func (r *Ras) updateLatestTimestamp(timestamp string) error { + ts, err := parseDate(timestamp) + if err != nil { + return err + } + if ts.After(r.latestTimestamp) { + r.latestTimestamp = ts + } + + return nil +} + +func (r *Ras) updateCounters(mcError *machineCheckError) { + if strings.Contains(mcError.ErrorMsg, "No Error") { + return + } + + r.initializeCpuMetricDataIfRequired(mcError.SocketId) + r.updateSocketCounters(mcError) + r.updateServerCounters(mcError) +} + +func newMetricCounters() *metricCounters { + return &metricCounters{ + memoryReadCorrected: 0, + memoryReadUncorrected: 0, + memoryWriteCorrected: 0, + memoryWriteUncorrected: 0, + instructionCache: 0, + instructionTLB: 0, + processorBase: 0, + processorBus: 0, + internalTimer: 0, + smmHandlerCode: 0, + internalParity: 0, + frc: 0, + externalMCEBase: 0, + microcodeROMParity: 0, + unclassifiedMCEBase: 0, + } +} + +func (r *Ras) updateServerCounters(mcError *machineCheckError) { + if strings.Contains(mcError.ErrorMsg, "CACHE Level-2") && strings.Contains(mcError.ErrorMsg, "Error") { + r.serverCounters[levelTwoCache] += 1 + } + + if strings.Contains(mcError.ErrorMsg, "UPI:") { + r.serverCounters[upi] += 1 + } +} + +func connectToDB(server string) (*sql.DB, error) { + return sql.Open("sqlite3", server) +} + +func (r *Ras) initializeCpuMetricDataIfRequired(socketId int) { + if _, ok := r.cpuSocketCounters[socketId]; !ok { + r.cpuSocketCounters[socketId] = *newMetricCounters() + } +} + +func (r *Ras) updateSocketCounters(mcError *machineCheckError) { + r.updateMemoryCounters(mcError) + r.updateProcessorBaseCounters(mcError) + + if strings.Contains(mcError.ErrorMsg, "Instruction TLB") && strings.Contains(mcError.ErrorMsg, "Error") { + r.cpuSocketCounters[mcError.SocketId][instructionTLB] += 1 + } + + if strings.Contains(mcError.ErrorMsg, "BUS") && strings.Contains(mcError.ErrorMsg, "Error") { + r.cpuSocketCounters[mcError.SocketId][processorBus] += 1 + } + + if (strings.Contains(mcError.ErrorMsg, "CACHE Level-0") || + strings.Contains(mcError.ErrorMsg, "CACHE Level-1")) && + strings.Contains(mcError.ErrorMsg, "Error") { + r.cpuSocketCounters[mcError.SocketId][instructionCache] += 1 + } +} + +func (r *Ras) updateProcessorBaseCounters(mcError *machineCheckError) { + if strings.Contains(mcError.ErrorMsg, "Internal Timer error") { + r.cpuSocketCounters[mcError.SocketId][internalTimer] += 1 + r.cpuSocketCounters[mcError.SocketId][processorBase] += 1 + } + + if strings.Contains(mcError.ErrorMsg, "SMM Handler Code Access Violation") { + r.cpuSocketCounters[mcError.SocketId][smmHandlerCode] += 1 + r.cpuSocketCounters[mcError.SocketId][processorBase] += 1 + } + + if strings.Contains(mcError.ErrorMsg, "Internal parity error") { + r.cpuSocketCounters[mcError.SocketId][internalParity] += 1 + r.cpuSocketCounters[mcError.SocketId][processorBase] += 1 + } + + if strings.Contains(mcError.ErrorMsg, "FRC error") { + r.cpuSocketCounters[mcError.SocketId][frc] += 1 + r.cpuSocketCounters[mcError.SocketId][processorBase] += 1 + } + + if strings.Contains(mcError.ErrorMsg, "External error") { + r.cpuSocketCounters[mcError.SocketId][externalMCEBase] += 1 + r.cpuSocketCounters[mcError.SocketId][processorBase] += 1 + } + + if strings.Contains(mcError.ErrorMsg, "Microcode ROM parity error") { + r.cpuSocketCounters[mcError.SocketId][microcodeROMParity] += 1 + r.cpuSocketCounters[mcError.SocketId][processorBase] += 1 + } + + if strings.Contains(mcError.ErrorMsg, "Unclassified") || strings.Contains(mcError.ErrorMsg, "Internal unclassified") { + r.cpuSocketCounters[mcError.SocketId][unclassifiedMCEBase] += 1 + r.cpuSocketCounters[mcError.SocketId][processorBase] += 1 + } +} + +func (r *Ras) updateMemoryCounters(mcError *machineCheckError) { + if strings.Contains(mcError.ErrorMsg, "Memory read error") { + if strings.Contains(mcError.MciStatusMsg, "Corrected_error") { + r.cpuSocketCounters[mcError.SocketId][memoryReadCorrected] += 1 + } else { + r.cpuSocketCounters[mcError.SocketId][memoryReadUncorrected] += 1 + } + } + if strings.Contains(mcError.ErrorMsg, "Memory write error") { + if strings.Contains(mcError.MciStatusMsg, "Corrected_error") { + r.cpuSocketCounters[mcError.SocketId][memoryWriteCorrected] += 1 + } else { + r.cpuSocketCounters[mcError.SocketId][memoryWriteUncorrected] += 1 + } + } +} + +func addCpuSocketMetrics(acc telegraf.Accumulator, cpuSocketCounters map[int]metricCounters) { + for socketId, data := range cpuSocketCounters { + tags := map[string]string{ + "socket_id": strconv.Itoa(socketId), + } + fields := make(map[string]interface{}) + + for errorName, count := range data { + fields[errorName] = count + } + + acc.AddCounter("ras", fields, tags) + } +} + +func addServerMetrics(acc telegraf.Accumulator, counters map[string]int64) { + fields := make(map[string]interface{}) + for errorName, count := range counters { + fields[errorName] = count + } + + acc.AddCounter("ras", fields, map[string]string{}) +} + +func fetchMachineCheckError(rows *sql.Rows) (*machineCheckError, error) { + mcError := &machineCheckError{} + err := rows.Scan(&mcError.Id, &mcError.Timestamp, &mcError.ErrorMsg, &mcError.MciStatusMsg, &mcError.SocketId) + + if err != nil { + return nil, err + } + + return mcError, nil +} + +func parseDate(date string) (time.Time, error) { + return time.Parse(dateLayout, date) +} + +func init() { + inputs.Add("ras", func() telegraf.Input { + defaultTimestamp, _ := parseDate("1970-01-01 00:00:01 -0700") + return &Ras{ + DbPath: defaultDbPath, + latestTimestamp: defaultTimestamp, + cpuSocketCounters: map[int]metricCounters{ + 0: *newMetricCounters(), + }, + serverCounters: map[string]int64{ + levelTwoCache: 0, + upi: 0, + }, + } + }) +} diff --git a/plugins/inputs/ras/ras_test.go b/plugins/inputs/ras/ras_test.go new file mode 100644 index 0000000000000..7b34074218b5c --- /dev/null +++ b/plugins/inputs/ras/ras_test.go @@ -0,0 +1,254 @@ +// +build !windows + +package ras + +import ( + "fmt" + "testing" + + "github.com/influxdata/telegraf/testutil" + + "github.com/stretchr/testify/assert" +) + +func TestUpdateCounters(t *testing.T) { + ras := newRas() + for _, mce := range testData { + ras.updateCounters(&mce) + } + + assert.Equal(t, 1, len(ras.cpuSocketCounters), "Should contain counters only for single socket") + + for metric, value := range ras.cpuSocketCounters[0] { + if metric == processorBase { + // processor_base_errors is sum of other seven errors: internal_timer_errors, smm_handler_code_access_violation_errors, + // internal_parity_errors, frc_errors, external_mce_errors, microcode_rom_parity_errors and unclassified_mce_errors + assert.Equal(t, int64(7), value, fmt.Sprintf("%s should have value of 7", processorBase)) + } else { + assert.Equal(t, int64(1), value, fmt.Sprintf("%s should have value of 1", metric)) + } + } + + for metric, value := range ras.serverCounters { + assert.Equal(t, int64(1), value, fmt.Sprintf("%s should have value of 1", metric)) + } +} + +func TestUpdateLatestTimestamp(t *testing.T) { + ras := newRas() + ts := "2020-08-01 15:13:27 +0200" + testData = append(testData, []machineCheckError{ + { + Timestamp: "2019-05-20 08:25:55 +0200", + SocketId: 0, + ErrorMsg: "", + MciStatusMsg: "", + }, + { + Timestamp: "2018-02-21 12:27:22 +0200", + SocketId: 0, + ErrorMsg: "", + MciStatusMsg: "", + }, + { + Timestamp: ts, + SocketId: 0, + ErrorMsg: "", + MciStatusMsg: "", + }, + }...) + for _, mce := range testData { + err := ras.updateLatestTimestamp(mce.Timestamp) + assert.NoError(t, err) + } + assert.Equal(t, ts, ras.latestTimestamp.Format(dateLayout)) +} + +func TestMultipleSockets(t *testing.T) { + ras := newRas() + cacheL2 := "Instruction CACHE Level-2 Generic Error" + overflow := "Error_overflow Corrected_error" + testData = []machineCheckError{ + { + Timestamp: "2019-05-20 08:25:55 +0200", + SocketId: 0, + ErrorMsg: cacheL2, + MciStatusMsg: overflow, + }, + { + Timestamp: "2018-02-21 12:27:22 +0200", + SocketId: 1, + ErrorMsg: cacheL2, + MciStatusMsg: overflow, + }, + { + Timestamp: "2020-03-21 14:17:28 +0200", + SocketId: 2, + ErrorMsg: cacheL2, + MciStatusMsg: overflow, + }, + { + Timestamp: "2020-03-21 17:24:18 +0200", + SocketId: 3, + ErrorMsg: cacheL2, + MciStatusMsg: overflow, + }, + } + for _, mce := range testData { + ras.updateCounters(&mce) + } + assert.Equal(t, 4, len(ras.cpuSocketCounters), "Should contain counters for four sockets") + + for _, metricData := range ras.cpuSocketCounters { + for metric, value := range metricData { + if metric == levelTwoCache { + assert.Equal(t, int64(1), value, fmt.Sprintf("%s should have value of 1", levelTwoCache)) + } else { + assert.Equal(t, int64(0), value, fmt.Sprintf("%s should have value of 0", metric)) + } + } + } +} + +func TestMissingDatabase(t *testing.T) { + var acc testutil.Accumulator + ras := newRas() + ras.DbPath = "/tmp/test.db" + err := ras.Gather(&acc) + assert.Error(t, err) +} + +func TestEmptyDatabase(t *testing.T) { + ras := newRas() + + assert.Equal(t, 1, len(ras.cpuSocketCounters), "Should contain default counters for one socket") + assert.Equal(t, 2, len(ras.serverCounters), "Should contain default counters for server") + + for metric, value := range ras.cpuSocketCounters[0] { + assert.Equal(t, int64(0), value, fmt.Sprintf("%s should have value of 0", metric)) + } + + for metric, value := range ras.serverCounters { + assert.Equal(t, int64(0), value, fmt.Sprintf("%s should have value of 0", metric)) + } +} + +func newRas() *Ras { + defaultTimestamp, _ := parseDate("1970-01-01 00:00:01 -0700") + return &Ras{ + DbPath: defaultDbPath, + latestTimestamp: defaultTimestamp, + cpuSocketCounters: map[int]metricCounters{ + 0: *newMetricCounters(), + }, + serverCounters: map[string]int64{ + levelTwoCache: 0, + upi: 0, + }, + } +} + +var testData = []machineCheckError{ + { + Timestamp: "2020-05-20 07:34:53 +0200", + SocketId: 0, + ErrorMsg: "MEMORY CONTROLLER RD_CHANNEL0_ERR Transaction: Memory read error", + MciStatusMsg: "Error_overflow Corrected_error", + }, + { + Timestamp: "2020-05-20 07:35:11 +0200", + SocketId: 0, + ErrorMsg: "MEMORY CONTROLLER RD_CHANNEL0_ERR Transaction: Memory read error", + MciStatusMsg: "Uncorrected_error", + }, + { + Timestamp: "2020-05-20 07:37:50 +0200", + SocketId: 0, + ErrorMsg: "MEMORY CONTROLLER RD_CHANNEL2_ERR Transaction: Memory write error", + MciStatusMsg: "Uncorrected_error", + }, + { + Timestamp: "2020-05-20 08:14:51 +0200", + SocketId: 0, + ErrorMsg: "MEMORY CONTROLLER WR_CHANNEL2_ERR Transaction: Memory write error", + MciStatusMsg: "Error_overflow Corrected_error", + }, + { + Timestamp: "2020-05-20 08:15:31 +0200", + SocketId: 0, + ErrorMsg: "corrected filtering (some unreported errors in same region) Instruction CACHE Level-0 Read Error", + MciStatusMsg: "Error_overflow Corrected_error", + }, + { + Timestamp: "2020-05-20 08:16:32 +0200", + SocketId: 0, + ErrorMsg: "Instruction TLB Level-0 Error", + MciStatusMsg: "Error_overflow Corrected_error", + }, + { + Timestamp: "2020-05-20 08:16:56 +0200", + SocketId: 0, + ErrorMsg: "No Error", + MciStatusMsg: "Error_overflow Corrected_error", + }, + { + Timestamp: "2020-05-20 08:17:24 +0200", + SocketId: 0, + ErrorMsg: "Unclassified", + MciStatusMsg: "Error_overflow Corrected_error", + }, + { + Timestamp: "2020-05-20 08:17:41 +0200", + SocketId: 0, + ErrorMsg: "Microcode ROM parity error", + MciStatusMsg: "Error_overflow Corrected_error", + }, + { + Timestamp: "2020-05-20 08:17:48 +0200", + SocketId: 0, + ErrorMsg: "FRC error", + MciStatusMsg: "Error_overflow Corrected_error", + }, + { + Timestamp: "2020-05-20 08:18:18 +0200", + SocketId: 0, + ErrorMsg: "Internal parity error", + MciStatusMsg: "Error_overflow Corrected_error", + }, + { + Timestamp: "2020-05-20 08:18:34 +0200", + SocketId: 0, + ErrorMsg: "SMM Handler Code Access Violation", + MciStatusMsg: "Error_overflow Corrected_error", + }, + { + Timestamp: "2020-05-20 08:18:54 +0200", + SocketId: 0, + ErrorMsg: "Internal Timer error", + MciStatusMsg: "Error_overflow Corrected_error", + }, + { + Timestamp: "2020-05-20 08:21:23 +0200", + SocketId: 0, + ErrorMsg: "BUS Level-3 Generic Generic IO Request-did-not-timeout Error", + MciStatusMsg: "Error_overflow Corrected_error", + }, + { + Timestamp: "2020-05-20 08:23:23 +0200", + SocketId: 0, + ErrorMsg: "External error", + MciStatusMsg: "Error_overflow Corrected_error", + }, + { + Timestamp: "2020-05-20 08:25:31 +0200", + SocketId: 0, + ErrorMsg: "UPI: COR LL Rx detected CRC error - successful LLR without Phy Reinit", + MciStatusMsg: "Error_overflow Corrected_error", + }, + { + Timestamp: "2020-05-20 08:25:55 +0200", + SocketId: 0, + ErrorMsg: "Instruction CACHE Level-2 Generic Error", + MciStatusMsg: "Error_overflow Corrected_error", + }, +} diff --git a/plugins/inputs/ras/ras_windows.go b/plugins/inputs/ras/ras_windows.go new file mode 100644 index 0000000000000..ac7dadd567381 --- /dev/null +++ b/plugins/inputs/ras/ras_windows.go @@ -0,0 +1,3 @@ +// +build windows + +package ras diff --git a/plugins/inputs/redis/README.md b/plugins/inputs/redis/README.md index f62b9db6e3f61..c8f343b262aca 100644 --- a/plugins/inputs/redis/README.md +++ b/plugins/inputs/redis/README.md @@ -14,6 +14,11 @@ ## If no servers are specified, then localhost is used as the host. ## If no port is specified, 6379 is used servers = ["tcp://localhost:6379"] + ## Optional. Specify redis commands to retrieve values + # [[inputs.redis.commands]] + # command = ["get", "sample-key"] + # field = "sample-key-value" + # type = "string" ## specify server password # password = "s#cr@t%" diff --git a/plugins/inputs/redis/redis.go b/plugins/inputs/redis/redis.go index 5e32afef5c65f..3a76a351c05de 100644 --- a/plugins/inputs/redis/redis.go +++ b/plugins/inputs/redis/redis.go @@ -17,7 +17,14 @@ import ( "github.com/influxdata/telegraf/plugins/inputs" ) +type RedisCommand struct { + Command []interface{} + Field string + Type string +} + type Redis struct { + Commands []*RedisCommand Servers []string Password string tls.ClientConfig @@ -29,6 +36,7 @@ type Redis struct { } type Client interface { + Do(returnType string, args ...interface{}) (interface{}, error) Info() *redis.StringCmd BaseTags() map[string]string } @@ -38,6 +46,21 @@ type RedisClient struct { tags map[string]string } +func (r *RedisClient) Do(returnType string, args ...interface{}) (interface{}, error) { + rawVal := r.client.Do(args...) + + switch returnType { + case "integer": + return rawVal.Int64() + case "string": + return rawVal.String() + case "float": + return rawVal.Float64() + default: + return rawVal.String() + } +} + func (r *RedisClient) Info() *redis.StringCmd { return r.client.Info("ALL") } @@ -64,6 +87,12 @@ var sampleConfig = ` ## If no port is specified, 6379 is used servers = ["tcp://localhost:6379"] + ## Optional. Specify redis commands to retrieve values + # [[inputs.redis.commands]] + # command = ["get", "sample-key"] + # field = "sample-key-value" + # type = "string" + ## specify server password # password = "s#cr@t%" @@ -179,6 +208,7 @@ func (r *Redis) Gather(acc telegraf.Accumulator) error { go func(client Client) { defer wg.Done() acc.AddError(r.gatherServer(client, acc)) + acc.AddError(r.gatherCommandValues(client, acc)) }(client) } @@ -186,6 +216,22 @@ func (r *Redis) Gather(acc telegraf.Accumulator) error { return nil } +func (r *Redis) gatherCommandValues(client Client, acc telegraf.Accumulator) error { + fields := make(map[string]interface{}) + for _, command := range r.Commands { + val, err := client.Do(command.Type, command.Command...) + if err != nil { + return err + } + + fields[command.Field] = val + } + + acc.AddFields("redis_commands", fields, client.BaseTags()) + + return nil +} + func (r *Redis) gatherServer(client Client, acc telegraf.Accumulator) error { info, err := client.Info().Result() if err != nil { diff --git a/plugins/inputs/redis/redis_test.go b/plugins/inputs/redis/redis_test.go index 637b464f95e99..d5aaa7a7bfa38 100644 --- a/plugins/inputs/redis/redis_test.go +++ b/plugins/inputs/redis/redis_test.go @@ -7,11 +7,27 @@ import ( "testing" "time" + "github.com/go-redis/redis" "github.com/influxdata/telegraf/testutil" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) +type testClient struct { +} + +func (t *testClient) BaseTags() map[string]string { + return map[string]string{"host": "redis.net"} +} + +func (t *testClient) Info() *redis.StringCmd { + return nil +} + +func (t *testClient) Do(returnType string, args ...interface{}) (interface{}, error) { + return 2, nil +} + func TestRedisConnect(t *testing.T) { if testing.Short() { t.Skip("Skipping integration test in short mode") @@ -30,6 +46,33 @@ func TestRedisConnect(t *testing.T) { require.NoError(t, err) } +func TestRedis_Commands(t *testing.T) { + const redisListKey = "test-list-length" + var acc testutil.Accumulator + + tc := &testClient{} + + rc := &RedisCommand{ + Command: []interface{}{"llen", "test-list"}, + Field: redisListKey, + Type: "integer", + } + + r := &Redis{ + Commands: []*RedisCommand{rc}, + clients: []Client{tc}, + } + + err := r.gatherCommandValues(tc, &acc) + require.NoError(t, err) + + fields := map[string]interface{}{ + redisListKey: 2, + } + + acc.AssertContainsFields(t, "redis_commands", fields) +} + func TestRedis_ParseMetrics(t *testing.T) { var acc testutil.Accumulator tags := map[string]string{"host": "redis.net"} diff --git a/plugins/inputs/rethinkdb/rethinkdb_server.go b/plugins/inputs/rethinkdb/rethinkdb_server.go index a04206d32a6d9..c10605aa6d83e 100644 --- a/plugins/inputs/rethinkdb/rethinkdb_server.go +++ b/plugins/inputs/rethinkdb/rethinkdb_server.go @@ -164,6 +164,10 @@ var TableTracking = []string{ func (s *Server) addTableStats(acc telegraf.Accumulator) error { tablesCursor, err := gorethink.DB("rethinkdb").Table("table_status").Run(s.session) + if err != nil { + return fmt.Errorf("table stats query error, %s\n", err.Error()) + } + defer tablesCursor.Close() var tables []tableStatus err = tablesCursor.All(&tables) diff --git a/plugins/inputs/smart/README.md b/plugins/inputs/smart/README.md index 47320aeac2ebf..dec58e3f9afab 100644 --- a/plugins/inputs/smart/README.md +++ b/plugins/inputs/smart/README.md @@ -1,6 +1,6 @@ # S.M.A.R.T. Input Plugin -Get metrics using the command line utility `smartctl` for S.M.A.R.T. (Self-Monitoring, Analysis and Reporting Technology) storage devices. SMART is a monitoring system included in computer hard disk drives (HDDs) and solid-state drives (SSDs)[1] that detects and reports on various indicators of drive reliability, with the intent of enabling the anticipation of hardware failures. +Get metrics using the command line utility `smartctl` for S.M.A.R.T. (Self-Monitoring, Analysis and Reporting Technology) storage devices. SMART is a monitoring system included in computer hard disk drives (HDDs) and solid-state drives (SSDs) that detects and reports on various indicators of drive reliability, with the intent of enabling the anticipation of hardware failures. See smartmontools (https://www.smartmontools.org/). SMART information is separated between different measurements: `smart_device` is used for general information, while `smart_attribute` stores the detailed attribute information if `attributes = true` is enabled in the plugin configuration. @@ -19,57 +19,98 @@ smartctl --info --attributes --health -n --format=brief This plugin supports _smartmontools_ version 5.41 and above, but v. 5.41 and v. 5.42 might require setting `nocheck`, see the comment in the sample configuration. +Also, NVMe capabilities were introduced in version 6.5. To enable SMART on a storage device run: ``` smartctl -s on ``` +## NVMe vendor specific attributes -### Configuration +For NVMe disk type, plugin can use command line utility `nvme-cli`. It has a feature +to easy access a vendor specific attributes. +This plugin supports nmve-cli version 1.5 and above (https://github.com/linux-nvme/nvme-cli). +In case of `nvme-cli` absence NVMe vendor specific metrics will not be obtained. -```toml -# Read metrics from storage devices supporting S.M.A.R.T. -[[inputs.smart]] - ## Optionally specify the path to the smartctl executable - # path = "/usr/bin/smartctl" - - ## On most platforms smartctl requires root access. - ## Setting 'use_sudo' to true will make use of sudo to run smartctl. - ## Sudo must be configured to to allow the telegraf user to run smartctl - ## without a password. - # use_sudo = false +Vendor specific SMART metrics for NVMe disks may be reported from the following `nvme` command: - ## Skip checking disks in this power mode. Defaults to - ## "standby" to not wake up disks that have stoped rotating. - ## See --nocheck in the man pages for smartctl. - ## smartctl version 5.41 and 5.42 have faulty detection of - ## power mode and might require changing this value to - ## "never" depending on your disks. - # nocheck = "standby" +``` +nvme smart-log-add +``` - ## Gather all returned S.M.A.R.T. attribute metrics and the detailed - ## information from each drive into the `smart_attribute` measurement. - # attributes = false +Note that vendor plugins for `nvme-cli` could require different naming convention and report format. - ## Optionally specify devices to exclude from reporting. - # excludes = [ "/dev/pass6" ] +To see installed plugin extensions, depended on the nvme-cli version, look at the bottom of: +``` +nvme help +``` - ## Optionally specify devices and device type, if unset - ## a scan (smartctl --scan) for S.M.A.R.T. devices will - ## done and all found will be included except for the - ## excluded in excludes. - # devices = [ "/dev/ada0 -d atacam" ] +To gather disk vendor id (vid) `id-ctrl` could be used: +``` +nvme id-ctrl +``` +Association between a vid and company can be found there: https://pcisig.com/membership/member-companies. - ## Timeout for the smartctl command to complete. - # timeout = "30s" +Devices affiliation to being NVMe or non NVMe will be determined thanks to: +``` +smartctl --scan +``` +and: +``` +smartctl --scan -d nvme ``` -### Permissions +## Configuration -It's important to note that this plugin references smartctl, which may require additional permissions to execute successfully. -Depending on the user/group permissions of the telegraf user executing this plugin, you may need to use sudo. +```toml +# Read metrics from storage devices supporting S.M.A.R.T. +[[inputs.smart]] + ## Optionally specify the path to the smartctl executable + # path_smartctl = "/usr/bin/smartctl" + + ## Optionally specify the path to the nvme-cli executable + # path_nvme = "/usr/bin/nvme" + + ## Optionally specify if vendor specific attributes should be propagated for NVMe disk case + ## ["auto-on"] - automatically find and enable additional vendor specific disk info + ## ["vendor1", "vendor2", ...] - e.g. "Intel" enable additional Intel specific disk info + # enable_extensions = ["auto-on"] + + ## On most platforms used cli utilities requires root access. + ## Setting 'use_sudo' to true will make use of sudo to run smartctl or nvme-cli. + ## Sudo must be configured to allow the telegraf user to run smartctl or nvme-cli + ## without a password. + # use_sudo = false + + ## Skip checking disks in this power mode. Defaults to + ## "standby" to not wake up disks that have stopped rotating. + ## See --nocheck in the man pages for smartctl. + ## smartctl version 5.41 and 5.42 have faulty detection of + ## power mode and might require changing this value to + ## "never" depending on your disks. + # nocheck = "standby" + + ## Gather all returned S.M.A.R.T. attribute metrics and the detailed + ## information from each drive into the 'smart_attribute' measurement. + # attributes = false + + ## Optionally specify devices to exclude from reporting if disks auto-discovery is performed. + # excludes = [ "/dev/pass6" ] + + ## Optionally specify devices and device type, if unset + ## a scan (smartctl --scan and smartctl --scan -d nvme) for S.M.A.R.T. devices will be done + ## and all found will be included except for the excluded in excludes. + # devices = [ "/dev/ada0 -d atacam", "/dev/nvme0"] + + ## Timeout for the cli command to complete. + # timeout = "30s" +``` +## Permissions + +It's important to note that this plugin references smartctl and nvme-cli, which may require additional permissions to execute successfully. +Depending on the user/group permissions of the telegraf user executing this plugin, you may need to use sudo. You will need the following in your telegraf config: ```toml @@ -80,13 +121,20 @@ You will need the following in your telegraf config: You will also need to update your sudoers file: ```bash $ visudo -# Add the following line: +# For smartctl add the following lines: Cmnd_Alias SMARTCTL = /usr/bin/smartctl telegraf ALL=(ALL) NOPASSWD: SMARTCTL Defaults!SMARTCTL !logfile, !syslog, !pam_session + +# For nvme-cli add the following lines: +Cmnd_Alias NVME = /path/to/nvme +telegraf ALL=(ALL) NOPASSWD: NVME +Defaults!NVME !logfile, !syslog, !pam_session ``` +To run smartctl or nvme with `sudo` wrapper script can be created. `path_smartctl` or +`path_nvme` in the configuration should be set to execute this script. -### Metrics +## Metrics - smart_device: - tags: @@ -135,37 +183,44 @@ The interpretation of the tag `flags` is: #### Exit Status -The `exit_status` field captures the exit status of the smartctl command which +The `exit_status` field captures the exit status of the used cli utilities command which is defined by a bitmask. For the interpretation of the bitmask see the man page for -smartctl. - -#### Device Names +smartctl or nvme-cli. +## Device Names Device names, e.g., `/dev/sda`, are *not persistent*, and may be -subject to change across reboots or system changes. Instead, you can the +subject to change across reboots or system changes. Instead, you can use the *World Wide Name* (WWN) or serial number to identify devices. On Linux block devices can be referenced by the WWN in the following location: `/dev/disk/by-id/`. - -To run `smartctl` with `sudo` create a wrapper script and use `path` in -the configuration to execute that. - -### Troubleshooting +## Troubleshooting +If you expect to see more SMART metrics than this plugin shows, be sure to use a proper version +of smartctl or nvme-cli utility which has the functionality to gather desired data. Also, check +your device capability because not every SMART metrics are mandatory. +For example the number of temperature sensors depends on the device specification. If this plugin is not working as expected for your SMART enabled device, please run these commands and include the output in a bug report: + +For non NVMe devices (from smartctl version >= 7.0 this will also return NVMe devices by default): ``` smartctl --scan ``` - +For NVMe devices: +``` +smartctl --scan -d nvme +``` Run the following command replacing your configuration setting for NOCHECK and -the DEVICE from the previous command: +the DEVICE (name of the device could be taken from the previous command): ``` smartctl --info --health --attributes --tolerance=verypermissive --nocheck NOCHECK --format=brief -d DEVICE ``` - -### Example Output - +If you try to gather vendor specific metrics, please provide this commad +and replace vendor and device to match your case: +``` +nvme VENDOR smart-log-add DEVICE +``` +## Example SMART Plugin Outputs ``` smart_device,enabled=Enabled,host=mbpro.local,device=rdisk0,model=APPLE\ SSD\ SM0512F,serial_no=S1K5NYCD964433,wwn=5002538655584d30,capacity=500277790720 udma_crc_errors=0i,exit_status=0i,health_ok=true,read_error_rate=0i,temp_c=40i 1502536854000000000 smart_attribute,capacity=500277790720,device=rdisk0,enabled=Enabled,fail=-,flags=-O-RC-,host=mbpro.local,id=199,model=APPLE\ SSD\ SM0512F,name=UDMA_CRC_Error_Count,serial_no=S1K5NYCD964433,wwn=5002538655584d30 exit_status=0i,raw_value=0i,threshold=0i,value=200i,worst=200i 1502536854000000000 diff --git a/plugins/inputs/smart/smart.go b/plugins/inputs/smart/smart.go index 63d16aad3e35e..adc23f0921e26 100644 --- a/plugins/inputs/smart/smart.go +++ b/plugins/inputs/smart/smart.go @@ -3,6 +3,7 @@ package smart import ( "bufio" "fmt" + "os" "os/exec" "path" "regexp" @@ -11,12 +12,15 @@ import ( "sync" "syscall" "time" + "unicode" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/plugins/inputs" ) +const IntelVID = "0x8086" + var ( // Device Model: APPLE SSD SM256E // Product: HUH721212AL5204 @@ -27,7 +31,7 @@ var ( // LU WWN Device Id: 5 002538 655584d30 wwnInfo = regexp.MustCompile("^LU WWN Device Id:\\s+(.*)$") // User Capacity: 251,000,193,024 bytes [251 GB] - usercapacityInfo = regexp.MustCompile("^User Capacity:\\s+([0-9,]+)\\s+bytes.*$") + userCapacityInfo = regexp.MustCompile("^User Capacity:\\s+([0-9,]+)\\s+bytes.*$") // SMART support is: Enabled smartEnabledInfo = regexp.MustCompile("^SMART support is:\\s+(\\w+)$") // SMART overall-health self-assessment test result: PASSED @@ -44,6 +48,15 @@ var ( // 192 Power-Off_Retract_Count -O--C- 097 097 000 - 14716 attribute = regexp.MustCompile("^\\s*([0-9]+)\\s(\\S+)\\s+([-P][-O][-S][-R][-C][-K])\\s+([0-9]+)\\s+([0-9]+)\\s+([0-9-]+)\\s+([-\\w]+)\\s+([\\w\\+\\.]+).*$") + // Additional Smart Log for NVME device:nvme0 namespace-id:ffffffff + // key normalized raw + // program_fail_count : 100% 0 + intelExpressionPattern = regexp.MustCompile(`^([\w\s]+):([\w\s]+)%(.+)`) + + // vid : 0x8086 + // sn : CFGT53260XSP8011P + nvmeIdCtrlExpressionPattern = regexp.MustCompile(`^([\w\s]+):([\s\w]+)`) + deviceFieldIds = map[string]string{ "1": "read_error_rate", "7": "seek_error_rate", @@ -52,6 +65,7 @@ var ( "199": "udma_crc_errors", } + // to obtain metrics from smartctl sasNvmeAttributes = map[string]struct { ID string Name string @@ -146,31 +160,154 @@ var ( Name: "Critical_Temperature_Time", Parse: parseCommaSeparatedInt, }, + "Thermal Temp. 1 Transition Count": { + Name: "Thermal_Management_T1_Trans_Count", + Parse: parseCommaSeparatedInt, + }, + "Thermal Temp. 2 Transition Count": { + Name: "Thermal_Management_T2_Trans_Count", + Parse: parseCommaSeparatedInt, + }, + "Thermal Temp. 1 Total Time": { + Name: "Thermal_Management_T1_Total_Time", + Parse: parseCommaSeparatedInt, + }, + "Thermal Temp. 2 Total Time": { + Name: "Thermal_Management_T2_Total_Time", + Parse: parseCommaSeparatedInt, + }, + "Temperature Sensor 1": { + Name: "Temperature_Sensor_1", + Parse: parseTemperatureSensor, + }, + "Temperature Sensor 2": { + Name: "Temperature_Sensor_2", + Parse: parseTemperatureSensor, + }, + "Temperature Sensor 3": { + Name: "Temperature_Sensor_3", + Parse: parseTemperatureSensor, + }, + "Temperature Sensor 4": { + Name: "Temperature_Sensor_4", + Parse: parseTemperatureSensor, + }, + "Temperature Sensor 5": { + Name: "Temperature_Sensor_5", + Parse: parseTemperatureSensor, + }, + "Temperature Sensor 6": { + Name: "Temperature_Sensor_6", + Parse: parseTemperatureSensor, + }, + "Temperature Sensor 7": { + Name: "Temperature_Sensor_7", + Parse: parseTemperatureSensor, + }, + "Temperature Sensor 8": { + Name: "Temperature_Sensor_8", + Parse: parseTemperatureSensor, + }, + } + + // to obtain Intel specific metrics from nvme-cli + intelAttributes = map[string]struct { + ID string + Name string + Parse func(acc telegraf.Accumulator, fields map[string]interface{}, tags map[string]string, str string) error + }{ + "program_fail_count": { + Name: "Program_Fail_Count", + }, + "erase_fail_count": { + Name: "Erase_Fail_Count", + }, + "end_to_end_error_detection_count": { + Name: "End_To_End_Error_Detection_Count", + }, + "crc_error_count": { + Name: "Crc_Error_Count", + }, + "retry_buffer_overflow_count": { + Name: "Retry_Buffer_Overflow_Count", + }, + "wear_leveling": { + Name: "Wear_Leveling", + Parse: parseWearLeveling, + }, + "timed_workload_media_wear": { + Name: "Timed_Workload_Media_Wear", + Parse: parseTimedWorkload, + }, + "timed_workload_host_reads": { + Name: "Timed_Workload_Host_Reads", + Parse: parseTimedWorkload, + }, + "timed_workload_timer": { + Name: "Timed_Workload_Timer", + Parse: func(acc telegraf.Accumulator, fields map[string]interface{}, tags map[string]string, str string) error { + return parseCommaSeparatedIntWithAccumulator(acc, fields, tags, strings.TrimSuffix(str, " min")) + }, + }, + "thermal_throttle_status": { + Name: "Thermal_Throttle_Status", + Parse: parseThermalThrottle, + }, + "pll_lock_loss_count": { + Name: "Pll_Lock_Loss_Count", + }, + "nand_bytes_written": { + Name: "Nand_Bytes_Written", + Parse: parseBytesWritten, + }, + "host_bytes_written": { + Name: "Host_Bytes_Written", + Parse: parseBytesWritten, + }, } ) +type NVMeDevice struct { + name string + vendorID string + model string + serialNumber string +} + type Smart struct { - Path string - Nocheck string - Attributes bool - Excludes []string - Devices []string - UseSudo bool - Timeout internal.Duration + Path string `toml:"path"` //deprecated - to keep backward compatibility + PathSmartctl string `toml:"path_smartctl"` + PathNVMe string `toml:"path_nvme"` + Nocheck string `toml:"nocheck"` + EnableExtensions []string `toml:"enable_extensions"` + Attributes bool `toml:"attributes"` + Excludes []string `toml:"excludes"` + Devices []string `toml:"devices"` + UseSudo bool `toml:"use_sudo"` + Timeout internal.Duration `toml:"timeout"` + Log telegraf.Logger `toml:"-"` } var sampleConfig = ` ## Optionally specify the path to the smartctl executable - # path = "/usr/bin/smartctl" + # path_smartctl = "/usr/bin/smartctl" + + ## Optionally specify the path to the nvme-cli executable + # path_nvme = "/usr/bin/nvme" - ## On most platforms smartctl requires root access. - ## Setting 'use_sudo' to true will make use of sudo to run smartctl. - ## Sudo must be configured to to allow the telegraf user to run smartctl + ## Optionally specify if vendor specific attributes should be propagated for NVMe disk case + ## ["auto-on"] - automatically find and enable additional vendor specific disk info + ## ["vendor1", "vendor2", ...] - e.g. "Intel" enable additional Intel specific disk info + # enable_extensions = ["auto-on"] + + ## On most platforms used cli utilities requires root access. + ## Setting 'use_sudo' to true will make use of sudo to run smartctl or nvme-cli. + ## Sudo must be configured to allow the telegraf user to run smartctl or nvme-cli ## without a password. # use_sudo = false ## Skip checking disks in this power mode. Defaults to - ## "standby" to not wake up disks that have stoped rotating. + ## "standby" to not wake up disks that have stopped rotating. ## See --nocheck in the man pages for smartctl. ## smartctl version 5.41 and 5.42 have faulty detection of ## power mode and might require changing this value to @@ -181,16 +318,15 @@ var sampleConfig = ` ## information from each drive into the 'smart_attribute' measurement. # attributes = false - ## Optionally specify devices to exclude from reporting. + ## Optionally specify devices to exclude from reporting if disks auto-discovery is performed. # excludes = [ "/dev/pass6" ] ## Optionally specify devices and device type, if unset - ## a scan (smartctl --scan) for S.M.A.R.T. devices will - ## done and all found will be included except for the - ## excluded in excludes. - # devices = [ "/dev/ada0 -d atacam" ] + ## a scan (smartctl --scan and smartctl --scan -d nvme) for S.M.A.R.T. devices will be done + ## and all found will be included except for the excluded in excludes. + # devices = [ "/dev/ada0 -d atacam", "/dev/nvme0"] - ## Timeout for the smartctl command to complete. + ## Timeout for the cli command to complete. # timeout = "30s" ` @@ -208,50 +344,170 @@ func (m *Smart) Description() string { return "Read metrics from storage devices supporting S.M.A.R.T." } -func (m *Smart) Gather(acc telegraf.Accumulator) error { - if len(m.Path) == 0 { - return fmt.Errorf("smartctl not found: verify that smartctl is installed and that smartctl is in your PATH") +func (m *Smart) Init() error { + //if deprecated `path` (to smartctl binary) is provided in config and `path_smartctl` override does not exist + if len(m.Path) > 0 && len(m.PathSmartctl) == 0 { + m.PathSmartctl = m.Path } - devices := m.Devices - if len(devices) == 0 { - var err error - devices, err = m.scan() - if err != nil { - return err + //if `path_smartctl` is not provided in config, try to find smartctl binary in PATH + if len(m.PathSmartctl) == 0 { + m.PathSmartctl, _ = exec.LookPath("smartctl") + } + + //if `path_nvme` is not provided in config, try to find nvme binary in PATH + if len(m.PathNVMe) == 0 { + m.PathNVMe, _ = exec.LookPath("nvme") + } + + err := validatePath(m.PathSmartctl) + if err != nil { + m.PathSmartctl = "" + //without smartctl, plugin will not be able to gather basic metrics + return fmt.Errorf("smartctl not found: verify that smartctl is installed and it is in your PATH (or specified in config): %s", err.Error()) + } + + err = validatePath(m.PathNVMe) + if err != nil { + m.PathNVMe = "" + //without nvme, plugin will not be able to gather vendor specific attributes (but it can work without it) + m.Log.Warnf("nvme not found: verify that nvme is installed and it is in your PATH (or specified in config) to gather vendor specific attributes: %s", err.Error()) + } + + return nil +} + +func (m *Smart) Gather(acc telegraf.Accumulator) error { + var err error + var scannedNVMeDevices []string + var scannedNonNVMeDevices []string + + devicesFromConfig := m.Devices + isNVMe := len(m.PathNVMe) != 0 + isVendorExtension := len(m.EnableExtensions) != 0 + + if len(m.Devices) != 0 { + devicesFromConfig = excludeWrongDeviceNames(devicesFromConfig) + + m.getAttributes(acc, devicesFromConfig) + + // if nvme-cli is present, vendor specific attributes can be gathered + if isVendorExtension && isNVMe { + scannedNVMeDevices, _, err = m.scanAllDevices(true) + if err != nil { + return err + } + NVMeDevices := distinguishNVMeDevices(devicesFromConfig, scannedNVMeDevices) + + m.getVendorNVMeAttributes(acc, NVMeDevices) } + return nil + } + scannedNVMeDevices, scannedNonNVMeDevices, err = m.scanAllDevices(false) + if err != nil { + return err } + var devicesFromScan []string + devicesFromScan = append(devicesFromScan, scannedNVMeDevices...) + devicesFromScan = append(devicesFromScan, scannedNonNVMeDevices...) - m.getAttributes(acc, devices) + m.getAttributes(acc, devicesFromScan) + if isVendorExtension && isNVMe { + m.getVendorNVMeAttributes(acc, scannedNVMeDevices) + } return nil } -// Wrap with sudo -var runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { - cmd := exec.Command(command, args...) - if sudo { - cmd = exec.Command("sudo", append([]string{"-n", command}, args...)...) +// validate and exclude not correct config device names to avoid unwanted behaviours +func excludeWrongDeviceNames(devices []string) []string { + validSigns := map[string]struct{}{ + " ": {}, + "/": {}, + "\\": {}, + "-": {}, + ",": {}, } - return internal.CombinedOutputTimeout(cmd, timeout.Duration) + var wrongDevices []string + + for _, device := range devices { + for _, char := range device { + if unicode.IsLetter(char) || unicode.IsNumber(char) { + continue + } + if _, exist := validSigns[string(char)]; exist { + continue + } + wrongDevices = append(wrongDevices, device) + } + } + return difference(devices, wrongDevices) } -// Scan for S.M.A.R.T. devices -func (m *Smart) scan() ([]string, error) { - out, err := runCmd(m.Timeout, m.UseSudo, m.Path, "--scan") +func (m *Smart) scanAllDevices(ignoreExcludes bool) ([]string, []string, error) { + // this will return all devices (including NVMe devices) for smartctl version >= 7.0 + // for older versions this will return non NVMe devices + devices, err := m.scanDevices(ignoreExcludes, "--scan") + if err != nil { + return nil, nil, err + } + + // this will return only NVMe devices + NVMeDevices, err := m.scanDevices(ignoreExcludes, "--scan", "--device=nvme") if err != nil { - return []string{}, fmt.Errorf("failed to run command '%s --scan': %s - %s", m.Path, err, string(out)) + return nil, nil, err } - devices := []string{} + // to handle all versions of smartctl this will return only non NVMe devices + nonNVMeDevices := difference(devices, NVMeDevices) + return NVMeDevices, nonNVMeDevices, nil +} + +func distinguishNVMeDevices(userDevices []string, availableNVMeDevices []string) []string { + var NVMeDevices []string + + for _, userDevice := range userDevices { + for _, NVMeDevice := range availableNVMeDevices { + // double check. E.g. in case when nvme0 is equal nvme0n1, will check if "nvme0" part is present. + if strings.Contains(NVMeDevice, userDevice) || strings.Contains(userDevice, NVMeDevice) { + NVMeDevices = append(NVMeDevices, userDevice) + } + } + } + return NVMeDevices +} + +// Scan for S.M.A.R.T. devices from smartctl +func (m *Smart) scanDevices(ignoreExcludes bool, scanArgs ...string) ([]string, error) { + out, err := runCmd(m.Timeout, m.UseSudo, m.PathSmartctl, scanArgs...) + if err != nil { + return []string{}, fmt.Errorf("failed to run command '%s %s': %s - %s", m.PathSmartctl, scanArgs, err, string(out)) + } + var devices []string for _, line := range strings.Split(string(out), "\n") { dev := strings.Split(line, " ") - if len(dev) > 1 && !excludedDev(m.Excludes, strings.TrimSpace(dev[0])) { + if len(dev) <= 1 { + continue + } + if !ignoreExcludes { + if !excludedDev(m.Excludes, strings.TrimSpace(dev[0])) { + devices = append(devices, strings.TrimSpace(dev[0])) + } + } else { devices = append(devices, strings.TrimSpace(dev[0])) } } return devices, nil } +// Wrap with sudo +var runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { + cmd := exec.Command(command, args...) + if sudo { + cmd = exec.Command("sudo", append([]string{"-n", command}, args...)...) + } + return internal.CombinedOutputTimeout(cmd, timeout.Duration) +} + func excludedDev(excludes []string, deviceLine string) bool { device := strings.Split(deviceLine, " ") if len(device) != 0 { @@ -270,21 +526,137 @@ func (m *Smart) getAttributes(acc telegraf.Accumulator, devices []string) { wg.Add(len(devices)) for _, device := range devices { - go gatherDisk(acc, m.Timeout, m.UseSudo, m.Attributes, m.Path, m.Nocheck, device, &wg) + go gatherDisk(acc, m.Timeout, m.UseSudo, m.Attributes, m.PathSmartctl, m.Nocheck, device, &wg) } wg.Wait() } -// Command line parse errors are denoted by the exit code having the 0 bit set. -// All other errors are drive/communication errors and should be ignored. -func exitStatus(err error) (int, error) { - if exiterr, ok := err.(*exec.ExitError); ok { - if status, ok := exiterr.Sys().(syscall.WaitStatus); ok { - return status.ExitStatus(), nil +func (m *Smart) getVendorNVMeAttributes(acc telegraf.Accumulator, devices []string) { + NVMeDevices := getDeviceInfoForNVMeDisks(acc, devices, m.PathNVMe, m.Timeout, m.UseSudo) + + var wg sync.WaitGroup + + for _, device := range NVMeDevices { + if contains(m.EnableExtensions, "auto-on") { + switch device.vendorID { + case IntelVID: + wg.Add(1) + go gatherIntelNVMeDisk(acc, m.Timeout, m.UseSudo, m.PathNVMe, device, &wg) + } + } else if contains(m.EnableExtensions, "Intel") && device.vendorID == IntelVID { + wg.Add(1) + go gatherIntelNVMeDisk(acc, m.Timeout, m.UseSudo, m.PathNVMe, device, &wg) + } + } + wg.Wait() +} + +func getDeviceInfoForNVMeDisks(acc telegraf.Accumulator, devices []string, nvme string, timeout internal.Duration, useSudo bool) []NVMeDevice { + var NVMeDevices []NVMeDevice + + for _, device := range devices { + vid, sn, mn, err := gatherNVMeDeviceInfo(nvme, device, timeout, useSudo) + if err != nil { + acc.AddError(fmt.Errorf("cannot find device info for %s device", device)) + continue + } + newDevice := NVMeDevice{ + name: device, + vendorID: vid, + model: mn, + serialNumber: sn, + } + NVMeDevices = append(NVMeDevices, newDevice) + } + return NVMeDevices +} + +func gatherNVMeDeviceInfo(nvme, device string, timeout internal.Duration, useSudo bool) (string, string, string, error) { + args := []string{"id-ctrl"} + args = append(args, strings.Split(device, " ")...) + out, err := runCmd(timeout, useSudo, nvme, args...) + if err != nil { + return "", "", "", err + } + outStr := string(out) + + vid, sn, mn, err := findNVMeDeviceInfo(outStr) + + return vid, sn, mn, err +} + +func findNVMeDeviceInfo(output string) (string, string, string, error) { + scanner := bufio.NewScanner(strings.NewReader(output)) + var vid, sn, mn string + + for scanner.Scan() { + line := scanner.Text() + + if matches := nvmeIdCtrlExpressionPattern.FindStringSubmatch(line); len(matches) > 2 { + matches[1] = strings.TrimSpace(matches[1]) + matches[2] = strings.TrimSpace(matches[2]) + if matches[1] == "vid" { + if _, err := fmt.Sscanf(matches[2], "%s", &vid); err != nil { + return "", "", "", err + } + } + if matches[1] == "sn" { + sn = matches[2] + } + if matches[1] == "mn" { + mn = matches[2] + } + } + } + return vid, sn, mn, nil +} + +func gatherIntelNVMeDisk(acc telegraf.Accumulator, timeout internal.Duration, usesudo bool, nvme string, device NVMeDevice, wg *sync.WaitGroup) { + defer wg.Done() + + args := []string{"intel", "smart-log-add"} + args = append(args, strings.Split(device.name, " ")...) + out, e := runCmd(timeout, usesudo, nvme, args...) + outStr := string(out) + + _, er := exitStatus(e) + if er != nil { + acc.AddError(fmt.Errorf("failed to run command '%s %s': %s - %s", nvme, strings.Join(args, " "), e, outStr)) + return + } + + scanner := bufio.NewScanner(strings.NewReader(outStr)) + + for scanner.Scan() { + line := scanner.Text() + tags := map[string]string{} + fields := make(map[string]interface{}) + + tags["device"] = path.Base(device.name) + tags["model"] = device.model + tags["serial_no"] = device.serialNumber + + if matches := intelExpressionPattern.FindStringSubmatch(line); len(matches) > 3 { + matches[1] = strings.TrimSpace(matches[1]) + matches[3] = strings.TrimSpace(matches[3]) + if attr, ok := intelAttributes[matches[1]]; ok { + tags["name"] = attr.Name + if attr.ID != "" { + tags["id"] = attr.ID + } + + parse := parseCommaSeparatedIntWithAccumulator + if attr.Parse != nil { + parse = attr.Parse + } + + if err := parse(acc, fields, tags, matches[3]); err != nil { + continue + } + } } } - return 0, err } func gatherDisk(acc telegraf.Accumulator, timeout internal.Duration, usesudo, collectAttributes bool, smartctl, nocheck, device string, wg *sync.WaitGroup) { @@ -328,7 +700,7 @@ func gatherDisk(acc telegraf.Accumulator, timeout internal.Duration, usesudo, co deviceTags["wwn"] = strings.Replace(wwn[1], " ", "", -1) } - capacity := usercapacityInfo.FindStringSubmatch(line) + capacity := userCapacityInfo.FindStringSubmatch(line) if len(capacity) > 1 { deviceTags["capacity"] = strings.Replace(capacity[1], ",", "", -1) } @@ -340,7 +712,7 @@ func gatherDisk(acc telegraf.Accumulator, timeout internal.Duration, usesudo, co health := smartOverallHealth.FindStringSubmatch(line) if len(health) > 2 { - deviceFields["health_ok"] = (health[2] == "PASSED" || health[2] == "OK") + deviceFields["health_ok"] = health[2] == "PASSED" || health[2] == "OK" } tags := map[string]string{} @@ -418,6 +790,40 @@ func gatherDisk(acc telegraf.Accumulator, timeout internal.Duration, usesudo, co acc.AddFields("smart_device", deviceFields, deviceTags) } +// Command line parse errors are denoted by the exit code having the 0 bit set. +// All other errors are drive/communication errors and should be ignored. +func exitStatus(err error) (int, error) { + if exiterr, ok := err.(*exec.ExitError); ok { + if status, ok := exiterr.Sys().(syscall.WaitStatus); ok { + return status.ExitStatus(), nil + } + } + return 0, err +} + +func contains(args []string, element string) bool { + for _, arg := range args { + if arg == element { + return true + } + } + return false +} + +func difference(a, b []string) []string { + mb := make(map[string]struct{}, len(b)) + for _, x := range b { + mb[x] = struct{}{} + } + var diff []string + for _, x := range a { + if _, found := mb[x]; !found { + diff = append(diff, x) + } + } + return diff +} + func parseRawValue(rawVal string) (int64, error) { // Integer if i, err := strconv.ParseInt(rawVal, 10, 64); err == nil { @@ -428,7 +834,7 @@ func parseRawValue(rawVal string) (int64, error) { unit := regexp.MustCompile("^(.*)([hms])$") parts := strings.Split(rawVal, "+") if len(parts) == 0 { - return 0, fmt.Errorf("Couldn't parse RAW_VALUE '%s'", rawVal) + return 0, fmt.Errorf("couldn't parse RAW_VALUE '%s'", rawVal) } duration := int64(0) @@ -452,6 +858,63 @@ func parseRawValue(rawVal string) (int64, error) { return duration, nil } +func parseBytesWritten(acc telegraf.Accumulator, fields map[string]interface{}, tags map[string]string, str string) error { + var value int64 + + if _, err := fmt.Sscanf(str, "sectors: %d", &value); err != nil { + return err + } + fields["raw_value"] = value + acc.AddFields("smart_attribute", fields, tags) + return nil +} + +func parseThermalThrottle(acc telegraf.Accumulator, fields map[string]interface{}, tags map[string]string, str string) error { + var percentage float64 + var count int64 + + if _, err := fmt.Sscanf(str, "%f%%, cnt: %d", &percentage, &count); err != nil { + return err + } + + fields["raw_value"] = percentage + tags["name"] = "Thermal_Throttle_Status_Prc" + acc.AddFields("smart_attribute", fields, tags) + + fields["raw_value"] = count + tags["name"] = "Thermal_Throttle_Status_Cnt" + acc.AddFields("smart_attribute", fields, tags) + + return nil +} + +func parseWearLeveling(acc telegraf.Accumulator, fields map[string]interface{}, tags map[string]string, str string) error { + var min, max, avg int64 + + if _, err := fmt.Sscanf(str, "min: %d, max: %d, avg: %d", &min, &max, &avg); err != nil { + return err + } + values := []int64{min, max, avg} + for i, submetricName := range []string{"Min", "Max", "Avg"} { + fields["raw_value"] = values[i] + tags["name"] = fmt.Sprintf("Wear_Leveling_%s", submetricName) + acc.AddFields("smart_attribute", fields, tags) + } + + return nil +} + +func parseTimedWorkload(acc telegraf.Accumulator, fields map[string]interface{}, tags map[string]string, str string) error { + var value float64 + + if _, err := fmt.Sscanf(str, "%f", &value); err != nil { + return err + } + fields["raw_value"] = value + acc.AddFields("smart_attribute", fields, tags) + return nil +} + func parseInt(str string) int64 { if i, err := strconv.ParseInt(str, 10, 64); err == nil { return i @@ -460,6 +923,7 @@ func parseInt(str string) int64 { } func parseCommaSeparatedInt(fields, _ map[string]interface{}, str string) error { + str = strings.Join(strings.Fields(str), "") i, err := strconv.ParseInt(strings.Replace(str, ",", "", -1), 10, 64) if err != nil { return err @@ -479,6 +943,17 @@ func parseDataUnits(fields, deviceFields map[string]interface{}, str string) err return parseCommaSeparatedInt(fields, deviceFields, units) } +func parseCommaSeparatedIntWithAccumulator(acc telegraf.Accumulator, fields map[string]interface{}, tags map[string]string, str string) error { + i, err := strconv.ParseInt(strings.Replace(str, ",", "", -1), 10, 64) + if err != nil { + return err + } + + fields["raw_value"] = i + acc.AddFields("smart_attribute", fields, tags) + return nil +} + func parseTemperature(fields, deviceFields map[string]interface{}, str string) error { var temp int64 if _, err := fmt.Sscanf(str, "%d C", &temp); err != nil { @@ -491,13 +966,34 @@ func parseTemperature(fields, deviceFields map[string]interface{}, str string) e return nil } +func parseTemperatureSensor(fields, deviceFields map[string]interface{}, str string) error { + var temp int64 + if _, err := fmt.Sscanf(str, "%d C", &temp); err != nil { + return err + } + + fields["raw_value"] = temp + + return nil +} + +func validatePath(path string) error { + pathInfo, err := os.Stat(path) + if os.IsNotExist(err) { + return fmt.Errorf("provided path does not exist: [%s]", path) + } + if mode := pathInfo.Mode(); !mode.IsRegular() { + return fmt.Errorf("provided path does not point to a regular file: [%s]", path) + } + return nil +} + func init() { + // Set LC_NUMERIC to uniform numeric output from cli tools + _ = os.Setenv("LC_NUMERIC", "en_US.UTF-8") + inputs.Add("smart", func() telegraf.Input { m := NewSmart() - path, _ := exec.LookPath("smartctl") - if len(path) > 0 { - m.Path = path - } m.Nocheck = "standby" return m }) diff --git a/plugins/inputs/smart/smart_test.go b/plugins/inputs/smart/smart_test.go index 465ce9317e1e2..00d8cf0725ea7 100644 --- a/plugins/inputs/smart/smart_test.go +++ b/plugins/inputs/smart/smart_test.go @@ -15,30 +15,394 @@ import ( func TestGatherAttributes(t *testing.T) { s := NewSmart() - s.Path = "smartctl" s.Attributes = true assert.Equal(t, time.Second*30, s.Timeout.Duration) - var acc testutil.Accumulator + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { + if len(args) > 0 { + if args[0] == "--info" && args[7] == "/dev/ada0" { + return []byte(mockInfoAttributeData), nil + } else if args[0] == "--info" && args[7] == "/dev/nvme0" { + return []byte(smartctlNvmeInfoData), nil + } else if args[0] == "--scan" && len(args) == 1 { + return []byte(mockScanData), nil + } else if args[0] == "--scan" && len(args) >= 2 && args[1] == "--device=nvme" { + return []byte(mockScanNvmeData), nil + } + } + return nil, errors.New("command not found") + } + + t.Run("Wrong path to smartctl", func(t *testing.T) { + s.PathSmartctl = "this_path_to_smartctl_does_not_exist" + err := s.Init() + + assert.Error(t, err) + }) + + t.Run("Smartctl presence", func(t *testing.T) { + s.PathSmartctl = "smartctl" + s.PathNVMe = "" + + t.Run("Only non nvme device", func(t *testing.T) { + s.Devices = []string{"/dev/ada0"} + var acc testutil.Accumulator + + err := s.Gather(&acc) + + require.NoError(t, err) + assert.Equal(t, 65, acc.NFields(), "Wrong number of fields gathered") + + for _, test := range testsAda0Attributes { + acc.AssertContainsTaggedFields(t, "smart_attribute", test.fields, test.tags) + } + + for _, test := range testsAda0Device { + acc.AssertContainsTaggedFields(t, "smart_device", test.fields, test.tags) + } + }) + t.Run("Only nvme device", func(t *testing.T) { + s.Devices = []string{"/dev/nvme0"} + var acc testutil.Accumulator + + err := s.Gather(&acc) + + require.NoError(t, err) + assert.Equal(t, 32, acc.NFields(), "Wrong number of fields gathered") + + testutil.RequireMetricsEqual(t, testSmartctlNvmeAttributes, acc.GetTelegrafMetrics(), + testutil.SortMetrics(), testutil.IgnoreTime()) + }) + }) +} + +func TestGatherNoAttributes(t *testing.T) { + s := NewSmart() + s.Attributes = false + + assert.Equal(t, time.Second*30, s.Timeout.Duration) runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { if len(args) > 0 { - if args[0] == "--scan" { + if args[0] == "--scan" && len(args) == 1 { return []byte(mockScanData), nil - } else if args[0] == "--info" { + } else if args[0] == "--info" && args[7] == "/dev/ada0" { return []byte(mockInfoAttributeData), nil + } else if args[0] == "--info" && args[7] == "/dev/nvme0" { + return []byte(smartctlNvmeInfoData), nil + } else if args[0] == "--scan" && args[1] == "--device=nvme" { + return []byte(mockScanNvmeData), nil } } return nil, errors.New("command not found") } - err := s.Gather(&acc) + t.Run("scan for devices", func(t *testing.T) { + var acc testutil.Accumulator + s.PathSmartctl = "smartctl" + + err := s.Gather(&acc) + + require.NoError(t, err) + assert.Equal(t, 8, acc.NFields(), "Wrong number of fields gathered") + acc.AssertDoesNotContainMeasurement(t, "smart_attribute") + + for _, test := range testsAda0Device { + acc.AssertContainsTaggedFields(t, "smart_device", test.fields, test.tags) + } + for _, test := range testNvmeDevice { + acc.AssertContainsTaggedFields(t, "smart_device", test.fields, test.tags) + } + }) +} + +func TestExcludedDev(t *testing.T) { + assert.Equal(t, true, excludedDev([]string{"/dev/pass6"}, "/dev/pass6 -d atacam"), "Should be excluded.") + assert.Equal(t, false, excludedDev([]string{}, "/dev/pass6 -d atacam"), "Shouldn't be excluded.") + assert.Equal(t, false, excludedDev([]string{"/dev/pass6"}, "/dev/pass1 -d atacam"), "Shouldn't be excluded.") +} + +func TestGatherSATAInfo(t *testing.T) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { + return []byte(hgstSATAInfoData), nil + } + + var ( + acc = &testutil.Accumulator{} + wg = &sync.WaitGroup{} + ) + + wg.Add(1) + gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) + assert.Equal(t, 101, acc.NFields(), "Wrong number of fields gathered") + assert.Equal(t, uint64(20), acc.NMetrics(), "Wrong number of metrics gathered") +} + +func TestGatherSATAInfo65(t *testing.T) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { + return []byte(hgstSATAInfoData65), nil + } + + var ( + acc = &testutil.Accumulator{} + wg = &sync.WaitGroup{} + ) + + wg.Add(1) + gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) + assert.Equal(t, 91, acc.NFields(), "Wrong number of fields gathered") + assert.Equal(t, uint64(18), acc.NMetrics(), "Wrong number of metrics gathered") +} + +func TestGatherHgstSAS(t *testing.T) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { + return []byte(hgstSASInfoData), nil + } + + var ( + acc = &testutil.Accumulator{} + wg = &sync.WaitGroup{} + ) + + wg.Add(1) + gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) + assert.Equal(t, 6, acc.NFields(), "Wrong number of fields gathered") + assert.Equal(t, uint64(4), acc.NMetrics(), "Wrong number of metrics gathered") +} + +func TestGatherHtSAS(t *testing.T) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { + return []byte(htSASInfoData), nil + } + + var ( + acc = &testutil.Accumulator{} + wg = &sync.WaitGroup{} + ) + + wg.Add(1) + gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) + + testutil.RequireMetricsEqual(t, testHtsasAtributtes, acc.GetTelegrafMetrics(), testutil.SortMetrics(), testutil.IgnoreTime()) +} + +func TestGatherSSD(t *testing.T) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { + return []byte(ssdInfoData), nil + } + + var ( + acc = &testutil.Accumulator{} + wg = &sync.WaitGroup{} + ) + + wg.Add(1) + gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) + assert.Equal(t, 105, acc.NFields(), "Wrong number of fields gathered") + assert.Equal(t, uint64(26), acc.NMetrics(), "Wrong number of metrics gathered") +} + +func TestGatherSSDRaid(t *testing.T) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { + return []byte(ssdRaidInfoData), nil + } + + var ( + acc = &testutil.Accumulator{} + wg = &sync.WaitGroup{} + ) + + wg.Add(1) + gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) + assert.Equal(t, 74, acc.NFields(), "Wrong number of fields gathered") + assert.Equal(t, uint64(15), acc.NMetrics(), "Wrong number of metrics gathered") +} + +func TestGatherNvme(t *testing.T) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { + return []byte(smartctlNvmeInfoData), nil + } + + var ( + acc = &testutil.Accumulator{} + wg = &sync.WaitGroup{} + ) + + wg.Add(1) + gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "nvme0", wg) + + testutil.RequireMetricsEqual(t, testSmartctlNvmeAttributes, acc.GetTelegrafMetrics(), + testutil.SortMetrics(), testutil.IgnoreTime()) +} + +func TestGatherIntelNvme(t *testing.T) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { + return []byte(nvmeIntelInfoData), nil + } + + var ( + acc = &testutil.Accumulator{} + wg = &sync.WaitGroup{} + device = NVMeDevice{ + name: "nvme0", + model: mockModel, + serialNumber: mockSerial, + } + ) + + wg.Add(1) + gatherIntelNVMeDisk(acc, internal.Duration{Duration: time.Second * 30}, true, "", device, wg) + + result := acc.GetTelegrafMetrics() + testutil.RequireMetricsEqual(t, testIntelInvmeAttributes, result, + testutil.SortMetrics(), testutil.IgnoreTime()) +} - require.NoError(t, err) - assert.Equal(t, 65, acc.NFields(), "Wrong number of fields gathered") +func Test_findVIDFromNVMeOutput(t *testing.T) { + vid, sn, mn, err := findNVMeDeviceInfo(nvmeIdentifyController) - var testsAda0Attributes = []struct { + assert.Nil(t, err) + assert.Equal(t, "0x8086", vid) + assert.Equal(t, "CVFT5123456789ABCD", sn) + assert.Equal(t, "INTEL SSDPEDABCDEFG", mn) +} + +func Test_checkForNVMeDevices(t *testing.T) { + devices := []string{"sda1", "nvme0", "sda2", "nvme2"} + expectedNVMeDevices := []string{"nvme0", "nvme2"} + resultNVMeDevices := distinguishNVMeDevices(devices, expectedNVMeDevices) + assert.Equal(t, expectedNVMeDevices, resultNVMeDevices) +} + +func Test_excludeWrongDeviceNames(t *testing.T) { + devices := []string{"/dev/sda", "/dev/nvme -d nvme", "/dev/sda1 -d megaraid,1", "/dev/sda ; ./suspicious_script.sh"} + validDevices := []string{"/dev/sda", "/dev/nvme -d nvme", "/dev/sda1 -d megaraid,1"} + result := excludeWrongDeviceNames(devices) + assert.Equal(t, validDevices, result) +} + +func Test_contains(t *testing.T) { + devices := []string{"/dev/sda", "/dev/nvme1"} + device := "/dev/nvme1" + deviceNotIncluded := "/dev/nvme5" + assert.True(t, contains(devices, device)) + assert.False(t, contains(devices, deviceNotIncluded)) +} + +func Test_difference(t *testing.T) { + devices := []string{"/dev/sda", "/dev/nvme1", "/dev/nvme2"} + secondDevices := []string{"/dev/sda", "/dev/nvme1"} + expected := []string{"/dev/nvme2"} + result := difference(devices, secondDevices) + assert.Equal(t, expected, result) +} + +func Test_integerOverflow(t *testing.T) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { + return []byte(smartctlNvmeInfoDataWithOverflow), nil + } + + var ( + acc = &testutil.Accumulator{} + wg = &sync.WaitGroup{} + ) + + t.Run("If data raw_value is out of int64 range, there should be no metrics for that attribute", func(t *testing.T) { + wg.Add(1) + gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "nvme0", wg) + + result := acc.GetTelegrafMetrics() + testutil.RequireMetricsEqual(t, testOverflowAttributes, result, + testutil.SortMetrics(), testutil.IgnoreTime()) + }) +} + +var ( + testOverflowAttributes = []telegraf.Metric{ + testutil.MustMetric( + "smart_attribute", + map[string]string{ + "device": "nvme0", + "name": "Temperature_Sensor_3", + }, + map[string]interface{}{ + "raw_value": int64(9223372036854775807), + }, + time.Unix(0, 0), + ), + testutil.MustMetric( + "smart_attribute", + map[string]string{ + "device": "nvme0", + "name": "Temperature_Sensor_4", + }, + map[string]interface{}{ + "raw_value": int64(-9223372036854775808), + }, + time.Unix(0, 0), + ), + testutil.MustMetric( + "smart_device", + map[string]string{ + "device": "nvme0", + }, + map[string]interface{}{ + "exit_status": 0, + }, + time.Unix(0, 0), + ), + } + + testHtsasAtributtes = []telegraf.Metric{ + testutil.MustMetric( + "smart_attribute", + map[string]string{ + "device": ".", + "serial_no": "PDWAR9GE", + "enabled": "Enabled", + "id": "194", + "model": "HUC103030CSS600", + "name": "Temperature_Celsius", + }, + map[string]interface{}{ + "raw_value": 36, + }, + time.Unix(0, 0), + ), + testutil.MustMetric( + "smart_attribute", + map[string]string{ + "device": ".", + "serial_no": "PDWAR9GE", + "enabled": "Enabled", + "id": "4", + "model": "HUC103030CSS600", + "name": "Start_Stop_Count", + }, + map[string]interface{}{ + "raw_value": 47, + }, + time.Unix(0, 0), + ), + testutil.MustMetric( + "smart_device", + map[string]string{ + "device": ".", + "serial_no": "PDWAR9GE", + "enabled": "Enabled", + "model": "HUC103030CSS600", + }, + map[string]interface{}{ + "exit_status": 0, + "health_ok": true, + "temp_c": 36, + }, + time.Unix(0, 0), + ), + } + + testsAda0Attributes = []struct { fields map[string]interface{} tags map[string]string }{ @@ -278,83 +642,8 @@ func TestGatherAttributes(t *testing.T) { "value": int64(100), "worst": int64(253), "threshold": int64(0), - "raw_value": int64(23709323), - "exit_status": int(0), - }, - map[string]string{ - "device": "ada0", - "model": "APPLE SSD SM256E", - "serial_no": "S0X5NZBC422720", - "wwn": "5002538043584d30", - "enabled": "Enabled", - "capacity": "251000193024", - "id": "240", - "name": "Head_Flying_Hours", - "flags": "------", - "fail": "-", - }, - }, - } - - for _, test := range testsAda0Attributes { - acc.AssertContainsTaggedFields(t, "smart_attribute", test.fields, test.tags) - } - - var testsAda0Device = []struct { - fields map[string]interface{} - tags map[string]string - }{ - { - map[string]interface{}{ - "exit_status": int(0), - "health_ok": bool(true), - "read_error_rate": int64(0), - "temp_c": int64(34), - "udma_crc_errors": int64(0), - }, - map[string]string{ - "device": "ada0", - "model": "APPLE SSD SM256E", - "serial_no": "S0X5NZBC422720", - "wwn": "5002538043584d30", - "enabled": "Enabled", - "capacity": "251000193024", - }, - }, - } - - for _, test := range testsAda0Device { - acc.AssertContainsTaggedFields(t, "smart_device", test.fields, test.tags) - } -} - -func TestGatherNoAttributes(t *testing.T) { - s := NewSmart() - s.Path = "smartctl" - s.Attributes = false - - assert.Equal(t, time.Second*30, s.Timeout.Duration) - - // overwriting exec commands with mock commands - var acc testutil.Accumulator - - err := s.Gather(&acc) - - require.NoError(t, err) - assert.Equal(t, 5, acc.NFields(), "Wrong number of fields gathered") - acc.AssertDoesNotContainMeasurement(t, "smart_attribute") - - var testsAda0Device = []struct { - fields map[string]interface{} - tags map[string]string - }{ - { - map[string]interface{}{ - "exit_status": int(0), - "health_ok": bool(true), - "read_error_rate": int64(0), - "temp_c": int64(34), - "udma_crc_errors": int64(0), + "raw_value": int64(23709323), + "exit_status": int(0), }, map[string]string{ "device": "ada0", @@ -363,410 +652,624 @@ func TestGatherNoAttributes(t *testing.T) { "wwn": "5002538043584d30", "enabled": "Enabled", "capacity": "251000193024", + "id": "240", + "name": "Head_Flying_Hours", + "flags": "------", + "fail": "-", }, }, } - for _, test := range testsAda0Device { - acc.AssertContainsTaggedFields(t, "smart_device", test.fields, test.tags) - } -} - -func TestExcludedDev(t *testing.T) { - assert.Equal(t, true, excludedDev([]string{"/dev/pass6"}, "/dev/pass6 -d atacam"), "Should be excluded.") - assert.Equal(t, false, excludedDev([]string{}, "/dev/pass6 -d atacam"), "Shouldn't be excluded.") - assert.Equal(t, false, excludedDev([]string{"/dev/pass6"}, "/dev/pass1 -d atacam"), "Shouldn't be excluded.") -} - -func TestGatherSATAInfo(t *testing.T) { - runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { - return []byte(hgstSATAInfoData), nil - } - - var ( - acc = &testutil.Accumulator{} - wg = &sync.WaitGroup{} - ) - - wg.Add(1) - gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) - assert.Equal(t, 101, acc.NFields(), "Wrong number of fields gathered") - assert.Equal(t, uint64(20), acc.NMetrics(), "Wrong number of metrics gathered") -} - -func TestGatherSATAInfo65(t *testing.T) { - runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { - return []byte(hgstSATAInfoData65), nil - } - - var ( - acc = &testutil.Accumulator{} - wg = &sync.WaitGroup{} - ) - - wg.Add(1) - gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) - assert.Equal(t, 91, acc.NFields(), "Wrong number of fields gathered") - assert.Equal(t, uint64(18), acc.NMetrics(), "Wrong number of metrics gathered") -} - -func TestGatherHgstSAS(t *testing.T) { - runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { - return []byte(hgstSASInfoData), nil - } - - var ( - acc = &testutil.Accumulator{} - wg = &sync.WaitGroup{} - ) - - wg.Add(1) - gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) - assert.Equal(t, 6, acc.NFields(), "Wrong number of fields gathered") - assert.Equal(t, uint64(4), acc.NMetrics(), "Wrong number of metrics gathered") -} - -func TestGatherHtSAS(t *testing.T) { - runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { - return []byte(htSASInfoData), nil - } - - var ( - acc = &testutil.Accumulator{} - wg = &sync.WaitGroup{} - ) - - wg.Add(1) - gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) + mockModel = "INTEL SSDPEDABCDEFG" + mockSerial = "CVFT5123456789ABCD" - expected := []telegraf.Metric{ - testutil.MustMetric( - "smart_attribute", + testSmartctlNvmeAttributes = []telegraf.Metric{ + testutil.MustMetric("smart_device", map[string]string{ - "device": ".", - "serial_no": "PDWAR9GE", - "enabled": "Enabled", + "device": "nvme0", + "model": "TS128GMTE850", + "serial_no": "D704940282?", + }, + map[string]interface{}{ + "exit_status": 0, + "health_ok": true, + "temp_c": 38, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "id": "9", + "name": "Power_On_Hours", + "serial_no": "D704940282?", + "model": "TS128GMTE850", + }, + map[string]interface{}{ + "raw_value": 6038, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "id": "12", + "name": "Power_Cycle_Count", + "serial_no": "D704940282?", + "model": "TS128GMTE850", + }, + map[string]interface{}{ + "raw_value": 472, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "name": "Media_and_Data_Integrity_Errors", + "serial_no": "D704940282?", + "model": "TS128GMTE850", + }, + map[string]interface{}{ + "raw_value": 0, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "name": "Error_Information_Log_Entries", + "serial_no": "D704940282?", + "model": "TS128GMTE850", + }, + map[string]interface{}{ + "raw_value": 119699, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "name": "Available_Spare", + "serial_no": "D704940282?", + "model": "TS128GMTE850", + }, + map[string]interface{}{ + "raw_value": 100, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "name": "Available_Spare_Threshold", + "serial_no": "D704940282?", + "model": "TS128GMTE850", + }, + map[string]interface{}{ + "raw_value": 10, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", "id": "194", - "model": "HUC103030CSS600", "name": "Temperature_Celsius", + "serial_no": "D704940282?", + "model": "TS128GMTE850", }, map[string]interface{}{ - "raw_value": 36, + "raw_value": 38, }, - time.Unix(0, 0), + time.Now(), ), - testutil.MustMetric( - "smart_attribute", + testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", - "serial_no": "PDWAR9GE", - "enabled": "Enabled", - "id": "4", - "model": "HUC103030CSS600", - "name": "Start_Stop_Count", + "device": "nvme0", + "name": "Critical_Warning", + "serial_no": "D704940282?", + "model": "TS128GMTE850", }, map[string]interface{}{ - "raw_value": 47, + "raw_value": int64(9), }, - time.Unix(0, 0), + time.Now(), ), - testutil.MustMetric( - "smart_device", + testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", - "serial_no": "PDWAR9GE", - "enabled": "Enabled", - "model": "HUC103030CSS600", + "device": "nvme0", + "name": "Percentage_Used", + "serial_no": "D704940282?", + "model": "TS128GMTE850", }, map[string]interface{}{ - "exit_status": 0, - "health_ok": true, - "temp_c": 36, + "raw_value": int64(16), }, - time.Unix(0, 0), + time.Now(), ), - } - - testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics(), testutil.SortMetrics(), testutil.IgnoreTime()) -} - -func TestGatherSSD(t *testing.T) { - runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { - return []byte(ssdInfoData), nil - } - - var ( - acc = &testutil.Accumulator{} - wg = &sync.WaitGroup{} - ) - - wg.Add(1) - gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) - assert.Equal(t, 105, acc.NFields(), "Wrong number of fields gathered") - assert.Equal(t, uint64(26), acc.NMetrics(), "Wrong number of metrics gathered") -} - -func TestGatherSSDRaid(t *testing.T) { - runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { - return []byte(ssdRaidInfoData), nil - } - - var ( - acc = &testutil.Accumulator{} - wg = &sync.WaitGroup{} - ) - - wg.Add(1) - gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) - assert.Equal(t, 74, acc.NFields(), "Wrong number of fields gathered") - assert.Equal(t, uint64(15), acc.NMetrics(), "Wrong number of metrics gathered") -} - -func TestGatherNvme(t *testing.T) { - runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { - return []byte(nvmeInfoData), nil - } - - var ( - acc = &testutil.Accumulator{} - wg = &sync.WaitGroup{} - ) - - wg.Add(1) - gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) - - expected := []telegraf.Metric{ - testutil.MustMetric("smart_device", + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "name": "Data_Units_Read", + "serial_no": "D704940282?", + "model": "TS128GMTE850", + }, + map[string]interface{}{ + "raw_value": int64(11836935), + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", - "model": "TS128GMTE850", + "device": "nvme0", + "name": "Data_Units_Written", "serial_no": "D704940282?", + "model": "TS128GMTE850", }, map[string]interface{}{ - "exit_status": 0, - "health_ok": true, - "temp_c": 38, + "raw_value": int64(62288091), }, time.Now(), ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", - "id": "9", - "name": "Power_On_Hours", + "device": "nvme0", + "name": "Host_Read_Commands", "serial_no": "D704940282?", "model": "TS128GMTE850", }, map[string]interface{}{ - "raw_value": 6038, + "raw_value": int64(135924188), }, time.Now(), ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", - "id": "12", - "name": "Power_Cycle_Count", + "device": "nvme0", + "name": "Host_Write_Commands", "serial_no": "D704940282?", "model": "TS128GMTE850", }, map[string]interface{}{ - "raw_value": 472, + "raw_value": int64(7715573429), }, time.Now(), ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", - "name": "Media_and_Data_Integrity_Errors", + "device": "nvme0", + "name": "Controller_Busy_Time", "serial_no": "D704940282?", "model": "TS128GMTE850", }, map[string]interface{}{ - "raw_value": 0, + "raw_value": int64(4042), }, time.Now(), ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", - "name": "Error_Information_Log_Entries", + "device": "nvme0", + "name": "Unsafe_Shutdowns", "serial_no": "D704940282?", "model": "TS128GMTE850", }, map[string]interface{}{ - "raw_value": 119699, + "raw_value": int64(355), }, time.Now(), ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", - "name": "Available_Spare", + "device": "nvme0", + "name": "Warning_Temperature_Time", "serial_no": "D704940282?", "model": "TS128GMTE850", }, map[string]interface{}{ - "raw_value": 100, + "raw_value": int64(11), }, time.Now(), ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", - "name": "Available_Spare_Threshold", + "device": "nvme0", + "name": "Critical_Temperature_Time", "serial_no": "D704940282?", "model": "TS128GMTE850", }, map[string]interface{}{ - "raw_value": 10, + "raw_value": int64(7), + }, + time.Now(), + ), testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": "D704940282?", + "model": "TS128GMTE850", + "name": "Temperature_Sensor_1", + }, + map[string]interface{}{ + "raw_value": int64(57), }, time.Now(), ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", - "id": "194", - "name": "Temperature_Celsius", + "device": "nvme0", "serial_no": "D704940282?", "model": "TS128GMTE850", + "name": "Temperature_Sensor_2", }, map[string]interface{}{ - "raw_value": 38, + "raw_value": int64(50), }, time.Now(), ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", - "name": "Critical_Warning", + "device": "nvme0", "serial_no": "D704940282?", "model": "TS128GMTE850", + "name": "Temperature_Sensor_3", }, map[string]interface{}{ - "raw_value": int64(9), + "raw_value": int64(44), }, time.Now(), ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", - "name": "Percentage_Used", + "device": "nvme0", "serial_no": "D704940282?", "model": "TS128GMTE850", + "name": "Temperature_Sensor_4", }, map[string]interface{}{ - "raw_value": int64(16), + "raw_value": int64(43), }, time.Now(), ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", - "name": "Data_Units_Read", + "device": "nvme0", "serial_no": "D704940282?", "model": "TS128GMTE850", + "name": "Temperature_Sensor_5", }, map[string]interface{}{ - "raw_value": int64(11836935), + "raw_value": int64(57), }, time.Now(), ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", - "name": "Data_Units_Written", + "device": "nvme0", "serial_no": "D704940282?", "model": "TS128GMTE850", + "name": "Temperature_Sensor_6", }, map[string]interface{}{ - "raw_value": int64(62288091), + "raw_value": int64(50), }, time.Now(), ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", - "name": "Host_Read_Commands", + "device": "nvme0", "serial_no": "D704940282?", "model": "TS128GMTE850", + "name": "Temperature_Sensor_7", }, map[string]interface{}{ - "raw_value": int64(135924188), + "raw_value": int64(44), }, time.Now(), ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", - "name": "Host_Write_Commands", + "device": "nvme0", "serial_no": "D704940282?", "model": "TS128GMTE850", + "name": "Temperature_Sensor_8", }, map[string]interface{}{ - "raw_value": int64(7715573429), + "raw_value": int64(43), }, time.Now(), ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", - "name": "Controller_Busy_Time", + "device": "nvme0", "serial_no": "D704940282?", "model": "TS128GMTE850", + "name": "Thermal_Management_T1_Trans_Count", }, map[string]interface{}{ - "raw_value": int64(4042), + "raw_value": 0, }, time.Now(), ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", - "name": "Unsafe_Shutdowns", + "device": "nvme0", "serial_no": "D704940282?", "model": "TS128GMTE850", + "name": "Thermal_Management_T2_Trans_Count", }, map[string]interface{}{ - "raw_value": int64(355), + "raw_value": 0, }, time.Now(), ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", - "name": "Warning_Temperature_Time", + "device": "nvme0", "serial_no": "D704940282?", "model": "TS128GMTE850", + "name": "Thermal_Management_T1_Total_Time", }, map[string]interface{}{ - "raw_value": int64(11), + "raw_value": 0, }, time.Now(), ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", - "name": "Critical_Temperature_Time", + "device": "nvme0", "serial_no": "D704940282?", "model": "TS128GMTE850", + "name": "Thermal_Management_T2_Total_Time", }, map[string]interface{}{ - "raw_value": int64(7), + "raw_value": 0, }, time.Now(), ), } - testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics(), - testutil.SortMetrics(), testutil.IgnoreTime()) -} + testsAda0Device = []struct { + fields map[string]interface{} + tags map[string]string + }{ + { + map[string]interface{}{ + "exit_status": int(0), + "health_ok": bool(true), + "read_error_rate": int64(0), + "temp_c": int64(34), + "udma_crc_errors": int64(0), + }, + map[string]string{ + "device": "ada0", + "model": "APPLE SSD SM256E", + "serial_no": "S0X5NZBC422720", + "wwn": "5002538043584d30", + "enabled": "Enabled", + "capacity": "251000193024", + }, + }, + } -// smartctl output -var ( + testNvmeDevice = []struct { + fields map[string]interface{} + tags map[string]string + }{ + { + map[string]interface{}{ + "exit_status": int(0), + "temp_c": int64(38), + "health_ok": true, + }, + map[string]string{ + "device": "nvme0", + "model": "TS128GMTE850", + "serial_no": "D704940282?", + }, + }, + } + + testIntelInvmeAttributes = []telegraf.Metric{ + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Program_Fail_Count", + }, + map[string]interface{}{ + "raw_value": 0, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Erase_Fail_Count", + }, + map[string]interface{}{ + "raw_value": 0, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "End_To_End_Error_Detection_Count", + }, + map[string]interface{}{ + "raw_value": 0, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Crc_Error_Count", + }, + map[string]interface{}{ + "raw_value": 13, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Retry_Buffer_Overflow_Count", + }, + map[string]interface{}{ + "raw_value": 0, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Wear_Leveling_Min", + }, + map[string]interface{}{ + "raw_value": 39, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Wear_Leveling_Max", + }, + map[string]interface{}{ + "raw_value": 40, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Wear_Leveling_Avg", + }, + map[string]interface{}{ + "raw_value": 39, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Timed_Workload_Media_Wear", + }, + map[string]interface{}{ + "raw_value": float64(0.13), + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Timed_Workload_Host_Reads", + }, + map[string]interface{}{ + "raw_value": float64(71), + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Timed_Workload_Timer", + }, + map[string]interface{}{ + "raw_value": int64(1612952), + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Thermal_Throttle_Status_Prc", + }, + map[string]interface{}{ + "raw_value": float64(0), + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Thermal_Throttle_Status_Cnt", + }, + map[string]interface{}{ + "raw_value": int64(0), + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Pll_Lock_Loss_Count", + }, + map[string]interface{}{ + "raw_value": int64(0), + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Nand_Bytes_Written", + }, + map[string]interface{}{ + "raw_value": int64(0), + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Host_Bytes_Written", + }, + map[string]interface{}{ + "raw_value": int64(0), + }, + time.Now(), + ), + } // smartctl --scan - mockScanData = `/dev/ada0 -d atacam # /dev/ada0, ATA device -` + mockScanData = `/dev/ada0 -d atacam # /dev/ada0, ATA device` + + // smartctl --scan -d nvme + mockScanNvmeData = `/dev/nvme0 -d nvme # /dev/nvme0, NVMe device` + // smartctl --info --health --attributes --tolerance=verypermissive -n standby --format=brief [DEVICE] mockInfoAttributeData = `smartctl 6.5 2016-05-07 r4318 [Darwin 16.4.0 x86_64] (local build) Copyright (C) 2002-16, Bruce Allen, Christian Franke, www.smartmontools.org @@ -1174,8 +1677,7 @@ Selective self-test flags (0x0): After scanning selected spans, do NOT read-scan remainder of disk. If Selective self-test is pending on power-up, resume after 0 minute delay. ` - - nvmeInfoData = `smartctl 6.5 2016-05-07 r4318 [x86_64-linux-4.1.27-gvt-yocto-standard] (local build) + smartctlNvmeInfoData = `smartctl 6.5 2016-05-07 r4318 [x86_64-linux-4.1.27-gvt-yocto-standard] (local build) Copyright (C) 2002-16, Bruce Allen, Christian Franke, www.smartmontools.org === START OF INFORMATION SECTION === @@ -1211,5 +1713,108 @@ Media and Data Integrity Errors: 0 Error Information Log Entries: 119,699 Warning Comp. Temperature Time: 11 Critical Comp. Temperature Time: 7 +Thermal Temp. 1 Transition Count: 0 +Thermal Temp. 2 Transition Count: 0 +Thermal Temp. 1 Total Time: 0 +Thermal Temp. 2 Total Time: 0 +Temperature Sensor 1: 57 C +Temperature Sensor 2: 50 C +Temperature Sensor 3: 44 C +Temperature Sensor 4: 43 C +Temperature Sensor 5: 57 C +Temperature Sensor 6: 50 C +Temperature Sensor 7: 44 C +Temperature Sensor 8: 43 C +` + + smartctlNvmeInfoDataWithOverflow = ` +Temperature Sensor 1: 9223372036854775808 C +Temperature Sensor 2: -9223372036854775809 C +Temperature Sensor 3: 9223372036854775807 C +Temperature Sensor 4: -9223372036854775808 C +` + + nvmeIntelInfoData = `Additional Smart Log for NVME device:nvme0 namespace-id:ffffffff +key normalized raw +program_fail_count : 100% 0 +erase_fail_count : 100% 0 +wear_leveling : 100% min: 39, max: 40, avg: 39 +end_to_end_error_detection_count: 100% 0 +crc_error_count : 100% 13 +timed_workload_media_wear : 100% 0.130% +timed_workload_host_reads : 100% 71% +timed_workload_timer : 100% 1612952 min +thermal_throttle_status : 100% 0%, cnt: 0 +retry_buffer_overflow_count : 100% 0 +pll_lock_loss_count : 100% 0 +nand_bytes_written : 0% sectors: 0 +host_bytes_written : 0% sectors: 0 +` + + nvmeIdentifyController = `NVME Identify Controller: +vid : 0x8086 +ssvid : 0x8086 +sn : CVFT5123456789ABCD +mn : INTEL SSDPEDABCDEFG +fr : 8DV10131 +rab : 0 +ieee : 5cd2e4 +cmic : 0 +mdts : 5 +cntlid : 0 +ver : 0 +rtd3r : 0 +rtd3e : 0 +<<<<<<< HEAD +oaes : 0 +ctratt : 0 +oacs : 0x6 +acl : 3 +aerl : 3 +frmw : 0x2 +lpa : 0 +elpe : 63 +npss : 0 +avscc : 0 +apsta : 0 +wctemp : 0 +cctemp : 0 +mtfa : 0 +hmpre : 0 +hmmin : 0 +tnvmcap : 0 +unvmcap : 0 +rpmbs : 0 +edstt : 0 +dsto : 0 +fwug : 0 +kas : 0 +hctma : 0 +mntmt : 0 +mxtmt : 0 +sanicap : 0 +hmminds : 0 +hmmaxd : 0 +sqes : 0x66 +cqes : 0x44 +maxcmd : 0 +nn : 1 +oncs : 0x6 +fuses : 0 +fna : 0x7 +vwc : 0 +awun : 0 +awupf : 0 +nvscc : 0 +acwu : 0 +sgls : 0 +subnqn : +ioccsz : 0 +iorcsz : 0 +icdoff : 0 +ctrattr : 0 +msdbd : 0 +ps 0 : mp:25.00W operational enlat:0 exlat:0 rrt:0 rrl:0 + rwt:0 rwl:0 idle_power:- active_power:- ` ) diff --git a/plugins/inputs/snmp/README.md b/plugins/inputs/snmp/README.md index 0d2eb52ab44bb..a0c9155db5432 100644 --- a/plugins/inputs/snmp/README.md +++ b/plugins/inputs/snmp/README.md @@ -35,6 +35,9 @@ information. ## SNMP community string. # community = "public" + ## Agent host tag + # agent_host_tag = "agent_host" + ## Number of retries to attempt. # retries = 3 diff --git a/plugins/inputs/snmp/snmp.go b/plugins/inputs/snmp/snmp.go index 737be06f67c58..103b23d214485 100644 --- a/plugins/inputs/snmp/snmp.go +++ b/plugins/inputs/snmp/snmp.go @@ -34,6 +34,9 @@ const sampleConfig = ` ## SNMP version; can be 1, 2, or 3. # version = 2 + ## Agent host tag; the tag used to reference the source host + # agent_host_tag = "agent_host" + ## SNMP community string. # community = "public" @@ -95,6 +98,9 @@ type Snmp struct { // udp://1.2.3.4:161). If the scheme is not specified then "udp" is used. Agents []string `toml:"agents"` + // The tag used to name the agent host + AgentHostTag string `toml:"agent_host_tag"` + snmp.ClientConfig Tables []Table `toml:"table"` @@ -128,6 +134,10 @@ func (s *Snmp) init() error { } } + if len(s.AgentHostTag) == 0 { + s.AgentHostTag = "agent_host" + } + s.initialized = true return nil } @@ -374,8 +384,8 @@ func (s *Snmp) gatherTable(acc telegraf.Accumulator, gs snmpConnection, t Table, } } } - if _, ok := tr.Tags["agent_host"]; !ok { - tr.Tags["agent_host"] = gs.Host() + if _, ok := tr.Tags[s.AgentHostTag]; !ok { + tr.Tags[s.AgentHostTag] = gs.Host() } acc.AddFields(rt.Name, tr.Fields, tr.Tags, rt.Time) } diff --git a/plugins/inputs/snmp/snmp_test.go b/plugins/inputs/snmp/snmp_test.go index 9991ff7413a9a..583b2dc847282 100644 --- a/plugins/inputs/snmp/snmp_test.go +++ b/plugins/inputs/snmp/snmp_test.go @@ -90,7 +90,8 @@ func TestSampleConfig(t *testing.T) { require.NoError(t, err) expected := &Snmp{ - Agents: []string{"udp://127.0.0.1:161"}, + Agents: []string{"udp://127.0.0.1:161"}, + AgentHostTag: "", ClientConfig: config.ClientConfig{ Timeout: internal.Duration{Duration: 5 * time.Second}, Version: 2, @@ -634,7 +635,7 @@ func TestGather(t *testing.T) { m := acc.Metrics[0] assert.Equal(t, "mytable", m.Measurement) - assert.Equal(t, "tsc", m.Tags["agent_host"]) + assert.Equal(t, "tsc", m.Tags[s.AgentHostTag]) assert.Equal(t, "baz", m.Tags["myfield1"]) assert.Len(t, m.Fields, 2) assert.Equal(t, 234, m.Fields["myfield2"]) @@ -644,7 +645,7 @@ func TestGather(t *testing.T) { m2 := acc.Metrics[1] assert.Equal(t, "myOtherTable", m2.Measurement) - assert.Equal(t, "tsc", m2.Tags["agent_host"]) + assert.Equal(t, "tsc", m2.Tags[s.AgentHostTag]) assert.Equal(t, "baz", m2.Tags["myfield1"]) assert.Len(t, m2.Fields, 1) assert.Equal(t, 123456, m2.Fields["myOtherField"]) diff --git a/plugins/inputs/snmp_legacy/snmp_legacy.go b/plugins/inputs/snmp_legacy/snmp_legacy.go index 8df9cff06fa2c..62a3966fa451a 100644 --- a/plugins/inputs/snmp_legacy/snmp_legacy.go +++ b/plugins/inputs/snmp_legacy/snmp_legacy.go @@ -706,6 +706,9 @@ func (h *Host) GetSNMPClient() (*gosnmp.GoSNMP, error) { } // convert port_str to port in uint16 port_64, err := strconv.ParseUint(port_str, 10, 16) + if err != nil { + return nil, err + } port := uint16(port_64) // Get SNMP client snmpClient := &gosnmp.GoSNMP{ diff --git a/plugins/inputs/snmp_trap/README.md b/plugins/inputs/snmp_trap/README.md index 046f18e498604..0680376c400db 100644 --- a/plugins/inputs/snmp_trap/README.md +++ b/plugins/inputs/snmp_trap/README.md @@ -87,6 +87,7 @@ On Mac OS, listening on privileged ports is unrestricted on versions - version (string, "1" or "2c" or "3") - context_name (string, value from v3 trap) - engine_id (string, value from v3 trap) + - community (string, value from 1 or 2c trap) - fields: - Fields are mapped from variables in the trap. Field names are the trap variable names after MIB lookup. Field values are trap @@ -94,8 +95,8 @@ On Mac OS, listening on privileged ports is unrestricted on versions ### Example Output ``` -snmp_trap,mib=SNMPv2-MIB,name=coldStart,oid=.1.3.6.1.6.3.1.1.5.1,source=192.168.122.102,version=2c snmpTrapEnterprise.0="linux",sysUpTimeInstance=1i 1574109187723429814 -snmp_trap,mib=NET-SNMP-AGENT-MIB,name=nsNotifyShutdown,oid=.1.3.6.1.4.1.8072.4.0.2,source=192.168.122.102,version=2c sysUpTimeInstance=5803i,snmpTrapEnterprise.0="netSnmpNotificationPrefix" 1574109186555115459 +snmp_trap,mib=SNMPv2-MIB,name=coldStart,oid=.1.3.6.1.6.3.1.1.5.1,source=192.168.122.102,version=2c,community=public snmpTrapEnterprise.0="linux",sysUpTimeInstance=1i 1574109187723429814 +snmp_trap,mib=NET-SNMP-AGENT-MIB,name=nsNotifyShutdown,oid=.1.3.6.1.4.1.8072.4.0.2,source=192.168.122.102,version=2c,community=public sysUpTimeInstance=5803i,snmpTrapEnterprise.0="netSnmpNotificationPrefix" 1574109186555115459 ``` [net-snmp]: http://www.net-snmp.org/ diff --git a/plugins/inputs/snmp_trap/snmp_trap.go b/plugins/inputs/snmp_trap/snmp_trap.go index dbf0cdbf3ade2..d380d582bad66 100644 --- a/plugins/inputs/snmp_trap/snmp_trap.go +++ b/plugins/inputs/snmp_trap/snmp_trap.go @@ -357,6 +357,10 @@ func makeTrapHandler(s *SnmpTrap) handler { // SNMP RFCs like 3411 and 5343 show engine ID as a hex string tags["engine_id"] = fmt.Sprintf("%x", packet.ContextEngineID) } + } else { + if packet.Community != "" { + tags["community"] = packet.Community + } } s.acc.AddFields("snmp_trap", fields, tags, tm) diff --git a/plugins/inputs/snmp_trap/snmp_trap_test.go b/plugins/inputs/snmp_trap/snmp_trap_test.go index ee539f312a77d..b5f8da27aa7b3 100644 --- a/plugins/inputs/snmp_trap/snmp_trap_test.go +++ b/plugins/inputs/snmp_trap/snmp_trap_test.go @@ -224,11 +224,12 @@ func TestReceiveTrap(t *testing.T) { testutil.MustMetric( "snmp_trap", // name map[string]string{ // tags - "oid": ".1.3.6.1.6.3.1.1.5.1", - "name": "coldStart", - "mib": "SNMPv2-MIB", - "version": "2c", - "source": "127.0.0.1", + "oid": ".1.3.6.1.6.3.1.1.5.1", + "name": "coldStart", + "mib": "SNMPv2-MIB", + "version": "2c", + "source": "127.0.0.1", + "community": "public", }, map[string]interface{}{ // fields "sysUpTimeInstance": now, @@ -305,6 +306,7 @@ func TestReceiveTrap(t *testing.T) { "version": "1", "source": "127.0.0.1", "agent_address": "10.20.30.40", + "community": "public", }, map[string]interface{}{ // fields "sysUpTimeInstance": uint(now), @@ -358,6 +360,7 @@ func TestReceiveTrap(t *testing.T) { "version": "1", "source": "127.0.0.1", "agent_address": "10.20.30.40", + "community": "public", }, map[string]interface{}{ // fields "sysUpTimeInstance": uint(now), diff --git a/plugins/inputs/sqlserver/README.md b/plugins/inputs/sqlserver/README.md index 7f7887769d916..27c6da1cd7571 100644 --- a/plugins/inputs/sqlserver/README.md +++ b/plugins/inputs/sqlserver/README.md @@ -78,7 +78,7 @@ GO ## - AzureSQLDBResourceGovernance ## - AzureSQLDBDatabaseIO ## - AzureSQLDBServerProperties - ## - AzureSQLDBSQLOsWaitstats + ## - AzureSQLDBOsWaitstats ## - AzureSQLDBMemoryClerks ## - AzureSQLDBPerformanceCounters ## - AzureSQLDBRequests @@ -92,7 +92,7 @@ GO ## - AzureSQLMIOsWaitstats ## - AzureSQLMIMemoryClerks ## - AzureSQLMIPerformanceCounters - ## - AzureSQLMIDBRequests + ## - AzureSQLMIRequests ## - AzureSQLMISchedulers ## database_type = SQLServer by default collects the following queries diff --git a/plugins/inputs/sqlserver/azuresqlqueries.go b/plugins/inputs/sqlserver/azuresqlqueries.go index 048b20af26191..04a76cc983cb2 100644 --- a/plugins/inputs/sqlserver/azuresqlqueries.go +++ b/plugins/inputs/sqlserver/azuresqlqueries.go @@ -4,93 +4,109 @@ import ( _ "github.com/denisenkom/go-mssqldb" // go-mssqldb initialization ) +//------------------------------------------------------------------------------------------------ +//------------------ Azure SQL Database ------------------------------------------------------ +//------------------------------------------------------------------------------------------------ // Only executed if AzureDB flag is set -const sqlAzureDBResourceStats string = `SET DEADLOCK_PRIORITY -10; -IF SERVERPROPERTY('EngineEdition') = 5 -- Is this Azure SQL DB? -BEGIN - SELECT TOP(1) - 'sqlserver_azure_db_resource_stats' AS [measurement], - REPLACE(@@SERVERNAME,'\',':') AS [sql_instance], - DB_NAME() as [database_name], - cast(avg_cpu_percent as float) as avg_cpu_percent, - cast(avg_data_io_percent as float) as avg_data_io_percent, - cast(avg_log_write_percent as float) as avg_log_write_percent, - cast(avg_memory_usage_percent as float) as avg_memory_usage_percent, - cast(xtp_storage_percent as float) as xtp_storage_percent, - cast(max_worker_percent as float) as max_worker_percent, - cast(max_session_percent as float) as max_session_percent, - dtu_limit, - cast(avg_login_rate_percent as float) as avg_login_rate_percent , - end_time, - cast(avg_instance_memory_percent as float) as avg_instance_memory_percent , - cast(avg_instance_cpu_percent as float) as avg_instance_cpu_percent - FROM - sys.dm_db_resource_stats WITH (NOLOCK) - ORDER BY - end_time DESC +const sqlAzureDBResourceStats string = ` +IF SERVERPROPERTY('EngineEdition') <> 5 BEGIN /*not Azure SQL DB*/ + DECLARE @ErrorMessage AS nvarchar(500) = 'Telegraf - Connection string Server:'+ @@SERVERNAME + ',Database:' + DB_NAME() +' is not an Azure SQL DB. Check the database_type parameter in the telegraf configuration.'; + RAISERROR (@ErrorMessage,11,1) + RETURN END + +SELECT TOP(1) + 'sqlserver_azure_db_resource_stats' AS [measurement] + ,REPLACE(@@SERVERNAME,'\',':') AS [sql_instance] + ,DB_NAME() as [database_name] + ,cast([avg_cpu_percent] as float) as [avg_cpu_percent] + ,cast([avg_data_io_percent] as float) as [avg_data_io_percent] + ,cast([avg_log_write_percent] as float) as [avg_log_write_percent] + ,cast([avg_memory_usage_percent] as float) as [avg_memory_usage_percent] + ,cast([xtp_storage_percent] as float) as [xtp_storage_percent] + ,cast([max_worker_percent] as float) as [max_worker_percent] + ,cast([max_session_percent] as float) as [max_session_percent] + ,[dtu_limit] + ,cast([avg_login_rate_percent] as float) as [avg_login_rate_percent] + ,[end_time] + ,cast([avg_instance_memory_percent] as float) as [avg_instance_memory_percent] + ,cast([avg_instance_cpu_percent] as float) as [avg_instance_cpu_percent] +FROM + sys.dm_db_resource_stats WITH (NOLOCK) +ORDER BY + [end_time] DESC; ` // Resource Governamce is only relevant to Azure SQL DB into separate collector // This will only be collected for Azure SQL Database. const sqlAzureDBResourceGovernance string = ` -IF SERVERPROPERTY('EngineEdition') = 5 -- Is this Azure SQL DB? +IF SERVERPROPERTY('EngineEdition') <> 5 BEGIN /*not Azure SQL DB*/ + DECLARE @ErrorMessage AS nvarchar(500) = 'Telegraf - Connection string Server:'+ @@SERVERNAME + ',Database:' + DB_NAME() +' is not an Azure SQL DB. Check the database_type parameter in the telegraf configuration.'; + RAISERROR (@ErrorMessage,11,1) + RETURN +END + SELECT - 'sqlserver_db_resource_governance' AS [measurement], - REPLACE(@@SERVERNAME,'\',':') AS [sql_instance], - DB_NAME() as [database_name], - slo_name, - dtu_limit, - max_cpu, - cap_cpu, - instance_cap_cpu, - max_db_memory, - max_db_max_size_in_mb, - db_file_growth_in_mb, - log_size_in_mb, - instance_max_worker_threads, - primary_group_max_workers, - instance_max_log_rate, - primary_min_log_rate, - primary_max_log_rate, - primary_group_min_io, - primary_group_max_io, - primary_group_min_cpu, - primary_group_max_cpu, - primary_pool_max_workers, - pool_max_io, - checkpoint_rate_mbps, - checkpoint_rate_io, - volume_local_iops, - volume_managed_xstore_iops, - volume_external_xstore_iops, - volume_type_local_iops, - volume_type_managed_xstore_iops, - volume_type_external_xstore_iops, - volume_pfs_iops, - volume_type_pfs_iops - FROM - sys.dm_user_db_resource_governance WITH (NOLOCK); + 'sqlserver_db_resource_governance' AS [measurement] + ,REPLACE(@@SERVERNAME,'\',':') AS [sql_instance] + ,DB_NAME() as [database_name] + ,[slo_name] + ,[dtu_limit] + ,[max_cpu] + ,[cap_cpu] + ,[instance_cap_cpu] + ,[max_db_memory] + ,[max_db_max_size_in_mb] + ,[db_file_growth_in_mb] + ,[log_size_in_mb] + ,[instance_max_worker_threads] + ,[primary_group_max_workers] + ,[instance_max_log_rate] + ,[primary_min_log_rate] + ,[primary_max_log_rate] + ,[primary_group_min_io] + ,[primary_group_max_io] + ,[primary_group_min_cpu] + ,[primary_group_max_cpu] + ,[primary_pool_max_workers] + ,[pool_max_io] + ,[checkpoint_rate_mbps] + ,[checkpoint_rate_io] + ,[volume_local_iops] + ,[volume_managed_xstore_iops] + ,[volume_external_xstore_iops] + ,[volume_type_local_iops] + ,[volume_type_managed_xstore_iops] + ,[volume_type_external_xstore_iops] + ,[volume_pfs_iops] + ,[volume_type_pfs_iops] +FROM + sys.dm_user_db_resource_governance WITH (NOLOCK); ` // DB level wait stats that are only relevant to Azure SQL DB into separate collector // This will only be collected for Azure SQL Database. -const sqlAzureDBWaitStats string = `SET DEADLOCK_PRIORITY -10; -IF SERVERPROPERTY('EngineEdition') = 5 -- Is this Azure SQL DB? - SELECT - 'sqlserver_azuredb_waitstats' AS [measurement], - REPLACE(@@SERVERNAME,'\',':') AS [sql_instance], - DB_NAME() as [database_name'], - dbws.wait_type, - dbws.wait_time_ms, - dbws.wait_time_ms - signal_wait_time_ms AS [resource_wait_ms], - dbws.signal_wait_time_ms, - dbws.max_wait_time_ms, - dbws.waiting_tasks_count - FROM +const sqlAzureDBWaitStats string = ` +IF SERVERPROPERTY('EngineEdition') <> 5 BEGIN /*not Azure SQL DB*/ + DECLARE @ErrorMessage AS nvarchar(500) = 'Telegraf - Connection string Server:'+ @@SERVERNAME + ',Database:' + DB_NAME() +' is not an Azure SQL DB. Check the database_type parameter in the telegraf configuration.'; + RAISERROR (@ErrorMessage,11,1) + RETURN +END + +SELECT + 'sqlserver_azuredb_waitstats' AS [measurement] + ,REPLACE(@@SERVERNAME,'\',':') AS [sql_instance] + ,DB_NAME() as [database_name'] + ,dbws.[wait_type] + ,dbws.[wait_time_ms] + ,dbws.[wait_time_ms] - [signal_wait_time_ms] AS [resource_wait_ms] + ,dbws.[signal_wait_time_ms] + ,dbws.[max_wait_time_ms] + ,dbws.[waiting_tasks_count] +FROM sys.dm_db_wait_stats AS dbws WITH (NOLOCK) - WHERE - dbws.wait_type NOT IN ( +WHERE + dbws.[wait_type] NOT IN ( N'BROKER_EVENTHANDLER', N'BROKER_RECEIVE_WAITFOR', N'BROKER_TASK_STOP', N'BROKER_TO_FLUSH', N'BROKER_TRANSMITTER', N'CHECKPOINT_QUEUE', N'CHKPT', N'CLR_AUTO_EVENT', N'CLR_MANUAL_EVENT', N'CLR_SEMAPHORE', @@ -124,154 +140,193 @@ IF SERVERPROPERTY('EngineEdition') = 5 -- Is this Azure SQL DB? N'XE_BUFFERMGR_ALLPROCESSED_EVENT', N'XE_DISPATCHER_JOIN', N'XE_DISPATCHER_WAIT', N'XE_LIVE_TARGET_TVF', N'XE_TIMER_EVENT', N'SOS_WORK_DISPATCHER','RESERVED_MEMORY_ALLOCATION_EXT') - AND waiting_tasks_count > 0 - AND wait_time_ms > 100; + AND [waiting_tasks_count] > 0 + AND [wait_time_ms] > 100; ` const sqlAzureDBDatabaseIO = ` SET DEADLOCK_PRIORITY -10; -IF SERVERPROPERTY('EngineEdition') = 5 -- Is this Azure SQL DB? +IF SERVERPROPERTY('EngineEdition') <> 5 BEGIN /*not Azure SQL DB*/ + DECLARE @ErrorMessage AS nvarchar(500) = 'Telegraf - Connection string Server:'+ @@SERVERNAME + ',Database:' + DB_NAME() +' is not an Azure SQL DB. Check the database_type parameter in the telegraf configuration.'; + RAISERROR (@ErrorMessage,11,1) + RETURN +END + SELECT - 'sqlserver_database_io' As [measurement] - ,REPLACE(@@SERVERNAME,'\',':') AS [sql_instance] - ,DB_NAME() as database_name - ,vfs.database_id -- /*needed as tempdb is different for each Azure SQL DB as grouping has to be by logical server + db_name + database_id*/ - ,vfs.file_id - ,vfs.io_stall_read_ms AS read_latency_ms - ,vfs.num_of_reads AS reads - ,vfs.num_of_bytes_read AS read_bytes - ,vfs.io_stall_write_ms AS write_latency_ms - ,vfs.num_of_writes AS writes - ,vfs.num_of_bytes_written AS write_bytes - ,vfs.io_stall_queued_read_ms AS [rg_read_stall_ms] - ,vfs.io_stall_queued_write_ms AS [rg_write_stall_ms] - ,CASE - WHEN (vfs.database_id = 0) THEN 'RBPEX' - ELSE b.logical_filename - END as logical_filename - ,CASE - WHEN (vfs.database_id = 0) THEN 'RBPEX' - ELSE b.physical_filename - END as physical_filename - ,CASE WHEN vfs.file_id = 2 THEN 'LOG' ELSE 'DATA' END AS file_type - ,ISNULL(size,0)/128 AS current_size_mb - ,ISNULL(FILEPROPERTY(b.logical_filename,'SpaceUsed')/128,0) as space_used_mb - FROM [sys].[dm_io_virtual_file_stats](NULL,NULL) AS vfs + 'sqlserver_database_io' As [measurement] + ,REPLACE(@@SERVERNAME,'\',':') AS [sql_instance] + ,DB_NAME() as [database_name] + ,vfs.[database_id] /*needed as tempdb is different for each Azure SQL DB as grouping has to be by logical server + db_name + database_id*/ + ,vfs.[file_id] + ,vfs.[io_stall_read_ms] AS [read_latency_ms] + ,vfs.[num_of_reads] AS [reads] + ,vfs.[num_of_bytes_read] AS [read_bytes] + ,vfs.[io_stall_write_ms] AS [write_latency_ms] + ,vfs.[num_of_writes] AS [writes] + ,vfs.[num_of_bytes_written] AS [write_bytes] + ,vfs.[io_stall_queued_read_ms] AS [rg_read_stall_ms] + ,vfs.[io_stall_queued_write_ms] AS [rg_write_stall_ms] + ,CASE + WHEN (vfs.[database_id] = 0) THEN 'RBPEX' + ELSE b.[logical_filename] + END as [logical_filename] + ,CASE + WHEN (vfs.[database_id] = 0) THEN 'RBPEX' + ELSE b.[physical_filename] + END as [physical_filename] + ,CASE + WHEN vfs.[file_id] = 2 THEN 'LOG' + ELSE 'DATA' + END AS [file_type] + ,ISNULL([size],0)/128 AS [current_size_mb] + ,ISNULL(FILEPROPERTY(b.[logical_filename],'SpaceUsed')/128,0) as [space_used_mb] +FROM + [sys].[dm_io_virtual_file_stats](NULL,NULL) AS vfs -- needed to get Tempdb file names on Azure SQL DB so you can join appropriately. Without this had a bug where join was only on file_id - LEFT OUTER join - ( - SELECT DB_ID() as database_id, file_id, logical_filename=name COLLATE SQL_Latin1_General_CP1_CI_AS - , physical_filename = physical_name COLLATE SQL_Latin1_General_CP1_CI_AS, size from sys.database_files - where type <> 2 - UNION ALL - SELECT 2 as database_id, file_id, logical_filename = name , physical_filename = physical_name, size - from tempdb.sys.database_files - ) b ON b.database_id = vfs.database_id and b.file_id = vfs.file_id - where vfs.database_id IN (DB_ID(),0,2) +LEFT OUTER join ( + SELECT + DB_ID() as [database_id] + ,[file_id] + ,[logical_filename]= [name] COLLATE SQL_Latin1_General_CP1_CI_AS + ,[physical_filename] = [physical_name] COLLATE SQL_Latin1_General_CP1_CI_AS + ,[size] + FROM sys.database_files + WHERE + [type] <> 2 + UNION ALL + SELECT + 2 as [database_id] + ,[file_id] + ,[logical_filename] = [name] + ,[physical_filename] = [physical_name] + ,[size] + FROM tempdb.sys.database_files +) b + ON + b.[database_id] = vfs.[database_id] + AND b.[file_id] = vfs.[file_id] +WHERE + vfs.[database_id] IN (DB_ID(),0,2) ` const sqlAzureDBProperties = ` -DECLARE @EngineEdition AS tinyint = CAST(SERVERPROPERTY('EngineEdition') AS int) -IF @EngineEdition = 5 -- Is this Azure SQL DB? -SELECT 'sqlserver_server_properties' AS [measurement], - REPLACE(@@SERVERNAME,'\',':') AS [sql_instance], - DB_NAME() as [database_name], - (SELECT count(*) FROM sys.dm_os_schedulers WHERE status = 'VISIBLE ONLINE') AS cpu_count, - (SELECT process_memory_limit_mb FROM sys.dm_os_job_object) AS server_memory, - slo.edition as sku, - @EngineEdition AS engine_edition, - slo.service_objective AS hardware_type, - CASE - WHEN slo.edition = 'Hyperscale' then NULL - ELSE cast(DATABASEPROPERTYEX(DB_NAME(),'MaxSizeInBytes') as bigint)/(1024*1024) - END AS total_storage_mb, - CASE - WHEN slo.edition = 'Hyperscale' then NULL - ELSE - (cast(DATABASEPROPERTYEX(DB_NAME(),'MaxSizeInBytes') as bigint)/(1024*1024)- - (select SUM(size/128 - CAST(FILEPROPERTY(name, 'SpaceUsed') AS int)/128) FROM sys.database_files ) - ) - END AS available_storage_mb, - (select DATEDIFF(MINUTE,sqlserver_start_time,GETDATE()) from sys.dm_os_sys_info) as uptime - FROM sys.databases d - -- sys.databases.database_id may not match current DB_ID on Azure SQL DB - CROSS JOIN sys.database_service_objectives slo - WHERE d.name = DB_NAME() AND slo.database_id = DB_ID(); +IF SERVERPROPERTY('EngineEdition') <> 5 BEGIN /*not Azure SQL DB*/ + DECLARE @ErrorMessage AS nvarchar(500) = 'Telegraf - Connection string Server:'+ @@SERVERNAME + ',Database:' + DB_NAME() +' is not an Azure SQL DB. Check the database_type parameter in the telegraf configuration.'; + RAISERROR (@ErrorMessage,11,1) + RETURN +END + +SELECT + 'sqlserver_server_properties' AS [measurement] + ,REPLACE(@@SERVERNAME,'\',':') AS [sql_instance] + ,DB_NAME() as [database_name] + ,(SELECT count(*) FROM sys.dm_os_schedulers WHERE status = 'VISIBLE ONLINE') AS [cpu_count] + ,(SELECT [process_memory_limit_mb] FROM sys.dm_os_job_object) AS [server_memory] + ,slo.[edition] as [sku] + ,SERVERPROPERTY('EngineEdition') AS [engine_edition] + ,slo.[service_objective] AS [hardware_type] + ,CASE + WHEN slo.[edition] = 'Hyperscale' then NULL + ELSE CAST(DATABASEPROPERTYEX(DB_NAME(),'MaxSizeInBytes') as bigint)/(1024*1024) + END AS [total_storage_mb] + ,CASE + WHEN slo.[edition] = 'Hyperscale' then NULL + ELSE ( + cast(DATABASEPROPERTYEX(DB_NAME(),'MaxSizeInBytes') as bigint)/(1024*1024) - + (select SUM([size]/128 - CAST(FILEPROPERTY(name, 'SpaceUsed') AS int)/128) FROM sys.database_files) + ) + END AS [available_storage_mb] + ,(select DATEDIFF(MINUTE,sqlserver_start_time,GETDATE()) from sys.dm_os_sys_info) as [uptime] + FROM sys.[databases] AS d + -- sys.databases.database_id may not match current DB_ID on Azure SQL DB + CROSS JOIN sys.[database_service_objectives] AS slo + WHERE + d.[name] = DB_NAME() + AND slo.[database_id] = DB_ID(); ` const sqlAzureDBOsWaitStats = ` -SET DEADLOCK_PRIORITY -10; -IF SERVERPROPERTY('EngineEdition') = 5 -- Is this Azure SQL DB? +IF SERVERPROPERTY('EngineEdition') <> 5 BEGIN /*not Azure SQL DB*/ + DECLARE @ErrorMessage AS nvarchar(500) = 'Telegraf - Connection string Server:'+ @@SERVERNAME + ',Database:' + DB_NAME() +' is not an Azure SQL DB. Check the database_type parameter in the telegraf configuration.'; + RAISERROR (@ErrorMessage,11,1) + RETURN +END + SELECT -'sqlserver_waitstats' AS [measurement], -REPLACE(@@SERVERNAME,'\',':') AS [sql_instance], -DB_NAME() as [database_name], -ws.wait_type, -wait_time_ms, -wait_time_ms - signal_wait_time_ms AS [resource_wait_ms], -signal_wait_time_ms, -max_wait_time_ms, -waiting_tasks_count, -CASE - WHEN ws.wait_type LIKE 'SOS_SCHEDULER_YIELD' then 'CPU' - WHEN ws.wait_type = 'THREADPOOL' THEN 'Worker Thread' - WHEN ws.wait_type LIKE 'LCK[_]%' THEN 'Lock' - WHEN ws.wait_type LIKE 'LATCH[_]%' THEN 'Latch' - WHEN ws.wait_type LIKE 'PAGELATCH[_]%' THEN 'Buffer Latch' - WHEN ws.wait_type LIKE 'PAGEIOLATCH[_]%' THEN 'Buffer IO' - WHEN ws.wait_type LIKE 'RESOURCE_SEMAPHORE_QUERY_COMPILE%' THEN 'Compilation' - WHEN ws.wait_type LIKE 'CLR[_]%' or ws.wait_type like 'SQLCLR%' THEN 'SQL CLR' - WHEN ws.wait_type LIKE 'DBMIRROR_%' THEN 'Mirroring' - WHEN ws.wait_type LIKE 'DTC[_]%' or ws.wait_type LIKE 'DTCNEW%' or ws.wait_type LIKE 'TRAN_%' - or ws.wait_type LIKE 'XACT%' or ws.wait_type like 'MSQL_XACT%' THEN 'Transaction' - WHEN ws.wait_type LIKE 'SLEEP[_]%' or - ws.wait_type IN ('LAZYWRITER_SLEEP', 'SQLTRACE_BUFFER_FLUSH', 'SQLTRACE_INCREMENTAL_FLUSH_SLEEP' - , 'SQLTRACE_WAIT_ENTRIES', 'FT_IFTS_SCHEDULER_IDLE_WAIT', 'XE_DISPATCHER_WAIT' - , 'REQUEST_FOR_DEADLOCK_SEARCH', 'LOGMGR_QUEUE', 'ONDEMAND_TASK_QUEUE' - , 'CHECKPOINT_QUEUE', 'XE_TIMER_EVENT') THEN 'Idle' - WHEN ws.wait_type IN('ASYNC_IO_COMPLETION','BACKUPIO','CHKPT','WRITE_COMPLETION' - ,'IO_QUEUE_LIMIT', 'IO_RETRY') THEN 'Other Disk IO' - WHEN ws.wait_type LIKE 'PREEMPTIVE_%' THEN 'Preemptive' - WHEN ws.wait_type LIKE 'BROKER[_]%' THEN 'Service Broker' - WHEN ws.wait_type IN ('WRITELOG','LOGBUFFER','LOGMGR_RESERVE_APPEND' - , 'LOGMGR_FLUSH', 'LOGMGR_PMM_LOG') THEN 'Tran Log IO' - WHEN ws.wait_type LIKE 'LOG_RATE%' then 'Log Rate Governor' - WHEN ws.wait_type LIKE 'HADR_THROTTLE[_]%' - or ws.wait_type = 'THROTTLE_LOG_RATE_LOG_STORAGE' THEN 'HADR Log Rate Governor' - WHEN ws.wait_type LIKE 'RBIO_RG%' or ws.wait_type like 'WAIT_RBIO_RG%' then 'VLDB Log Rate Governor' - WHEN ws.wait_type LIKE 'RBIO[_]%' or ws.wait_type like 'WAIT_RBIO[_]%' then 'VLDB RBIO' - WHEN ws.wait_type IN('ASYNC_NETWORK_IO','EXTERNAL_SCRIPT_NETWORK_IOF' - ,'NET_WAITFOR_PACKET','PROXY_NETWORK_IO') THEN 'Network IO' - WHEN ws.wait_type IN ( 'CXPACKET', 'CXCONSUMER') - or ws.wait_type like 'HT%' or ws.wait_type like 'BMP%' - or ws.wait_type like 'BP%' THEN 'Parallelism' -WHEN ws.wait_type IN('CMEMTHREAD','CMEMPARTITIONED','EE_PMOLOCK','EXCHANGE' - ,'RESOURCE_SEMAPHORE','MEMORY_ALLOCATION_EXT' - ,'RESERVED_MEMORY_ALLOCATION_EXT', 'MEMORY_GRANT_UPDATE') THEN 'Memory' - WHEN ws.wait_type IN ('WAITFOR','WAIT_FOR_RESULTS') THEN 'User Wait' - WHEN ws.wait_type LIKE 'HADR[_]%' or ws.wait_type LIKE 'PWAIT_HADR%' - or ws.wait_type LIKE 'REPLICA[_]%' or ws.wait_type LIKE 'REPL_%' - or ws.wait_type LIKE 'SE_REPL[_]%' - or ws.wait_type LIKE 'FCB_REPLICA%' THEN 'Replication' - WHEN ws.wait_type LIKE 'SQLTRACE[_]%' or ws.wait_type - IN ('TRACEWRITE', 'SQLTRACE_LOCK', 'SQLTRACE_FILE_BUFFER', 'SQLTRACE_FILE_WRITE_IO_COMPLETION' - , 'SQLTRACE_FILE_READ_IO_COMPLETION', 'SQLTRACE_PENDING_BUFFER_WRITERS', 'SQLTRACE_SHUTDOWN' - , 'QUERY_TRACEOUT', 'TRACE_EVTNOTIF') THEN 'Tracing' - WHEN ws.wait_type IN ('FT_RESTART_CRAWL', 'FULLTEXT GATHERER', 'MSSEARCH', 'FT_METADATA_MUTEX', - 'FT_IFTSHC_MUTEX', 'FT_IFTSISM_MUTEX', 'FT_IFTS_RWLOCK', 'FT_COMPROWSET_RWLOCK' - , 'FT_MASTER_MERGE', 'FT_PROPERTYLIST_CACHE', 'FT_MASTER_MERGE_COORDINATOR' - , 'PWAIT_RESOURCE_SEMAPHORE_FT_PARALLEL_QUERY_SYNC') THEN 'Full Text Search' - ELSE 'Other' -END as wait_category -FROM -sys.dm_os_wait_stats AS ws WITH (NOLOCK) + 'sqlserver_waitstats' AS [measurement] + ,REPLACE(@@SERVERNAME,'\',':') AS [sql_instance] + ,DB_NAME() as [database_name] + ,ws.[wait_type] + ,[wait_time_ms] + ,[wait_time_ms] - [signal_wait_time_ms] AS [resource_wait_ms] + ,[signal_wait_time_ms] + ,[max_wait_time_ms] + ,[waiting_tasks_count] + ,CASE + WHEN ws.[wait_type] LIKE 'SOS_SCHEDULER_YIELD' then 'CPU' + WHEN ws.[wait_type] = 'THREADPOOL' THEN 'Worker Thread' + WHEN ws.[wait_type] LIKE 'LCK[_]%' THEN 'Lock' + WHEN ws.[wait_type] LIKE 'LATCH[_]%' THEN 'Latch' + WHEN ws.[wait_type] LIKE 'PAGELATCH[_]%' THEN 'Buffer Latch' + WHEN ws.[wait_type] LIKE 'PAGEIOLATCH[_]%' THEN 'Buffer IO' + WHEN ws.[wait_type] LIKE 'RESOURCE_SEMAPHORE_QUERY_COMPILE%' THEN 'Compilation' + WHEN ws.[wait_type] LIKE 'CLR[_]%' or ws.[wait_type] like 'SQLCLR%' THEN 'SQL CLR' + WHEN ws.[wait_type] LIKE 'DBMIRROR_%' THEN 'Mirroring' + WHEN ws.[wait_type] LIKE 'DTC[_]%' or ws.[wait_type] LIKE 'DTCNEW%' or ws.[wait_type] LIKE 'TRAN_%' + or ws.[wait_type] LIKE 'XACT%' or ws.[wait_type] like 'MSQL_XACT%' THEN 'Transaction' + WHEN ws.[wait_type] LIKE 'SLEEP[_]%' + or ws.[wait_type] IN ( + 'LAZYWRITER_SLEEP', 'SQLTRACE_BUFFER_FLUSH', 'SQLTRACE_INCREMENTAL_FLUSH_SLEEP', + 'SQLTRACE_WAIT_ENTRIES', 'FT_IFTS_SCHEDULER_IDLE_WAIT', 'XE_DISPATCHER_WAIT', + 'REQUEST_FOR_DEADLOCK_SEARCH', 'LOGMGR_QUEUE', 'ONDEMAND_TASK_QUEUE', + 'CHECKPOINT_QUEUE', 'XE_TIMER_EVENT') THEN 'Idle' + WHEN ws.[wait_type] IN( + 'ASYNC_IO_COMPLETION','BACKUPIO','CHKPT','WRITE_COMPLETION', + 'IO_QUEUE_LIMIT', 'IO_RETRY') THEN 'Other Disk IO' + WHEN ws.[wait_type] LIKE 'PREEMPTIVE_%' THEN 'Preemptive' + WHEN ws.[wait_type] LIKE 'BROKER[_]%' THEN 'Service Broker' + WHEN ws.[wait_type] IN ( + 'WRITELOG','LOGBUFFER','LOGMGR_RESERVE_APPEND', + 'LOGMGR_FLUSH', 'LOGMGR_PMM_LOG') THEN 'Tran Log IO' + WHEN ws.[wait_type] LIKE 'LOG_RATE%' then 'Log Rate Governor' + WHEN ws.[wait_type] LIKE 'HADR_THROTTLE[_]%' + or ws.[wait_type] = 'THROTTLE_LOG_RATE_LOG_STORAGE' THEN 'HADR Log Rate Governor' + WHEN ws.[wait_type] LIKE 'RBIO_RG%' or ws.[wait_type] like 'WAIT_RBIO_RG%' then 'VLDB Log Rate Governor' + WHEN ws.[wait_type] LIKE 'RBIO[_]%' or ws.[wait_type] like 'WAIT_RBIO[_]%' then 'VLDB RBIO' + WHEN ws.[wait_type] IN( + 'ASYNC_NETWORK_IO','EXTERNAL_SCRIPT_NETWORK_IOF', + 'NET_WAITFOR_PACKET','PROXY_NETWORK_IO') THEN 'Network IO' + WHEN ws.[wait_type] IN ( 'CXPACKET', 'CXCONSUMER') + or ws.[wait_type] like 'HT%' or ws.[wait_type] like 'BMP%' + or ws.[wait_type] like 'BP%' THEN 'Parallelism' + WHEN ws.[wait_type] IN( + 'CMEMTHREAD','CMEMPARTITIONED','EE_PMOLOCK','EXCHANGE', + 'RESOURCE_SEMAPHORE','MEMORY_ALLOCATION_EXT', + 'RESERVED_MEMORY_ALLOCATION_EXT', 'MEMORY_GRANT_UPDATE') THEN 'Memory' + WHEN ws.[wait_type] IN ('WAITFOR','WAIT_FOR_RESULTS') THEN 'User Wait' + WHEN ws.[wait_type] LIKE 'HADR[_]%' or ws.[wait_type] LIKE 'PWAIT_HADR%' + or ws.[wait_type] LIKE 'REPLICA[_]%' or ws.[wait_type] LIKE 'REPL_%' + or ws.[wait_type] LIKE 'SE_REPL[_]%' + or ws.[wait_type] LIKE 'FCB_REPLICA%' THEN 'Replication' + WHEN ws.[wait_type] LIKE 'SQLTRACE[_]%' + or ws.[wait_type] IN ( + 'TRACEWRITE', 'SQLTRACE_LOCK', 'SQLTRACE_FILE_BUFFER', 'SQLTRACE_FILE_WRITE_IO_COMPLETION', + 'SQLTRACE_FILE_READ_IO_COMPLETION', 'SQLTRACE_PENDING_BUFFER_WRITERS', 'SQLTRACE_SHUTDOWN', + 'QUERY_TRACEOUT', 'TRACE_EVTNOTIF') THEN 'Tracing' + WHEN ws.[wait_type] IN ( + 'FT_RESTART_CRAWL', 'FULLTEXT GATHERER', 'MSSEARCH', 'FT_METADATA_MUTEX', + 'FT_IFTSHC_MUTEX', 'FT_IFTSISM_MUTEX', 'FT_IFTS_RWLOCK', 'FT_COMPROWSET_RWLOCK', + 'FT_MASTER_MERGE', 'FT_PROPERTYLIST_CACHE', 'FT_MASTER_MERGE_COORDINATOR', + 'PWAIT_RESOURCE_SEMAPHORE_FT_PARALLEL_QUERY_SYNC') THEN 'Full Text Search' + ELSE 'Other' + END as [wait_category] +FROM sys.dm_os_wait_stats AS ws WITH (NOLOCK) WHERE -ws.wait_type NOT IN ( + ws.[wait_type] NOT IN ( N'BROKER_EVENTHANDLER', N'BROKER_RECEIVE_WAITFOR', N'BROKER_TASK_STOP', N'BROKER_TO_FLUSH', N'BROKER_TRANSMITTER', N'CHECKPOINT_QUEUE', N'CHKPT', N'CLR_AUTO_EVENT', N'CLR_MANUAL_EVENT', N'CLR_SEMAPHORE', - N'DBMIRROR_DBM_EVENT', N'DBMIRROR_EVENTS_QUEUE', N'DBMIRROR_ - _QUEUE', + N'DBMIRROR_DBM_EVENT', N'DBMIRROR_EVENTS_QUEUE', N'DBMIRROR_QUEUE', N'DBMIRRORING_CMD', N'DIRTY_PAGE_POLL', N'DISPATCHER_QUEUE_SEMAPHORE', N'EXECSYNC', N'FSAGENT', N'FT_IFTS_SCHEDULER_IDLE_WAIT', N'FT_IFTSHC_MUTEX', N'HADR_CLUSAPI_CALL', N'HADR_FILESTREAM_IOMGR_IOCOMPLETION', N'HADR_LOGCAPTURE_WAIT', @@ -294,21 +349,25 @@ ws.wait_type NOT IN ( N'SLEEP_DCOMSTARTUP', N'SLEEP_MASTERDBREADY', N'SLEEP_MASTERMDREADY', N'SLEEP_MASTERUPGRADED', N'SLEEP_MSDBSTARTUP', N'SLEEP_SYSTEMTASK', N'SLEEP_TASK', N'SLEEP_TEMPDBSTARTUP', N'SNI_HTTP_ACCEPT', N'SP_SERVER_DIAGNOSTICS_SLEEP', - N'SQLTRACE_BUFFER_FLUSH', N'SQLTRACE_INCREMENTAL_FLUSH_SLEEP', + N'SQLTRACE_BUFFER_FLUSH', N'SQLTRACE_INCREMENTAL_FLUSH_SLEEP', N'SQLTRACE_WAIT_ENTRIES', N'WAIT_FOR_RESULTS', N'WAITFOR', N'WAITFOR_TASKSHUTDOWN', N'WAIT_XTP_HOST_WAIT', N'WAIT_XTP_OFFLINE_CKPT_NEW_LOG', N'WAIT_XTP_CKPT_CLOSE', N'XE_BUFFERMGR_ALLPROCESSED_EVENT', N'XE_DISPATCHER_JOIN', N'XE_DISPATCHER_WAIT', N'XE_LIVE_TARGET_TVF', N'XE_TIMER_EVENT', - N'SOS_WORK_DISPATCHER','RESERVED_MEMORY_ALLOCATION_EXT','SQLTRACE_WAIT_ENTRIES' - , 'RBIO_COMM_RETRY') -AND waiting_tasks_count > 10 -AND wait_time_ms > 100; + N'SOS_WORK_DISPATCHER','RESERVED_MEMORY_ALLOCATION_EXT','SQLTRACE_WAIT_ENTRIES', + N'RBIO_COMM_RETRY') +AND [waiting_tasks_count] > 10 +AND [wait_time_ms] > 100; ` const sqlAzureDBMemoryClerks = ` -SET DEADLOCK_PRIORITY -10; -IF SERVERPROPERTY('EngineEdition') = 5 -- Is this Azure SQL DB? +IF SERVERPROPERTY('EngineEdition') <> 5 BEGIN /*not Azure SQL DB*/ + DECLARE @ErrorMessage AS nvarchar(500) = 'Telegraf - Connection string Server:'+ @@SERVERNAME + ',Database:' + DB_NAME() +' is not an Azure SQL DB. Check the database_type parameter in the telegraf configuration.'; + RAISERROR (@ErrorMessage,11,1) + RETURN +END + SELECT 'sqlserver_memory_clerks' AS [measurement] ,REPLACE(@@SERVERNAME, '\', ':') AS [sql_instance] @@ -316,421 +375,498 @@ SELECT ,mc.[type] AS [clerk_type] ,SUM(mc.[pages_kb]) AS [size_kb] FROM sys.[dm_os_memory_clerks] AS mc WITH (NOLOCK) -GROUP BY - mc.[type] +GROUP BY + mc.[type] HAVING - SUM(mc.[pages_kb]) >= 1024 + SUM(mc.[pages_kb]) >= 1024 OPTION(RECOMPILE); ` const sqlAzureDBPerformanceCounters = ` -IF SERVERPROPERTY('EngineEdition') = 5 -- Is this Azure SQL DB? -BEGIN +SET DEADLOCK_PRIORITY -10; +IF SERVERPROPERTY('EngineEdition') <> 5 BEGIN /*not Azure SQL DB*/ + DECLARE @ErrorMessage AS nvarchar(500) = 'Telegraf - Connection string Server:'+ @@SERVERNAME + ',Database:' + DB_NAME() +' is not an Azure SQL DB. Check the database_type parameter in the telegraf configuration.'; + RAISERROR (@ErrorMessage,11,1) + RETURN +END + DECLARE @PCounters TABLE ( - object_name nvarchar(128), - counter_name nvarchar(128), - instance_name nvarchar(128), - cntr_value bigint, - cntr_type INT , - Primary Key(object_name, counter_name,instance_name) + [object_name] nvarchar(128), + [counter_name] nvarchar(128), + [instance_name] nvarchar(128), + [cntr_value] bigint, + [cntr_type] INT , + Primary Key([object_name],[counter_name],[instance_name]) ); -WITH PerfCounters AS - ( - SELECT DISTINCT - RTrim(spi.object_name) object_name, - RTrim(spi.counter_name) counter_name, - CASE WHEN ( - RTRIM(spi.object_name) LIKE '%:Databases' - OR RTRIM(spi.object_name) LIKE '%:Database Replica' - OR RTRIM(spi.object_name) LIKE '%:Catalog Metadata' - OR RTRIM(spi.object_name) LIKE '%:Query Store' - OR RTRIM(spi.object_name) LIKE '%:Columnstore' - OR RTRIM(spi.object_name) LIKE '%:Advanced Analytics') - AND TRY_CONVERT(uniqueidentifier, spi.instance_name) - IS NOT NULL -- for cloud only - THEN ISNULL(d.name,RTRIM(spi.instance_name)) -- Elastic Pools counters exist for all databases but sys.databases only has current DB value - WHEN RTRIM(object_name) LIKE '%:Availability Replica' - AND TRY_CONVERT(uniqueidentifier, spi.instance_name) IS NOT NULL -- for cloud only - THEN ISNULL(d.name,RTRIM(spi.instance_name)) + RTRIM(SUBSTRING(spi.instance_name, 37, LEN(spi.instance_name))) - ELSE RTRIM(spi.instance_name) - END AS instance_name, - CAST(spi.cntr_value AS BIGINT) AS cntr_value, - spi.cntr_type - FROM sys.dm_os_performance_counters AS spi - LEFT JOIN sys.databases AS d - ON LEFT(spi.instance_name, 36) -- some instance_name values have an additional identifier appended after the GUID - =CASE WHEN -- in SQL DB standalone, physical_database_name for master is the GUID of the user database - d.name = 'master' AND TRY_CONVERT(uniqueidentifier, d.physical_database_name) IS NOT NULL - THEN d.name - ELSE d.physical_database_name - END - WHERE ( - counter_name IN ( - 'SQL Compilations/sec', - 'SQL Re-Compilations/sec', - 'User Connections', - 'Batch Requests/sec', - 'Logouts/sec', - 'Logins/sec', - 'Processes blocked', - 'Latch Waits/sec', - 'Full Scans/sec', - 'Index Searches/sec', - 'Page Splits/sec', - 'Page lookups/sec', - 'Page reads/sec', - 'Page writes/sec', - 'Readahead pages/sec', - 'Lazy writes/sec', - 'Checkpoint pages/sec', - 'Page life expectancy', - 'Log File(s) Size (KB)', - 'Log File(s) Used Size (KB)', - 'Data File(s) Size (KB)', - 'Transactions/sec', - 'Write Transactions/sec', - 'Active Temp Tables', - 'Temp Tables Creation Rate', - 'Temp Tables For Destruction', - 'Free Space in tempdb (KB)', - 'Version Store Size (KB)', - 'Memory Grants Pending', - 'Memory Grants Outstanding', - 'Free list stalls/sec', - 'Buffer cache hit ratio', - 'Buffer cache hit ratio base', - 'RBPEX cache hit ratio', - 'RBPEX cache hit ratio base', - 'Backup/Restore Throughput/sec', - 'Total Server Memory (KB)', - 'Target Server Memory (KB)', - 'Log Flushes/sec', - 'Log Flush Wait Time', - 'Memory broker clerk size', - 'Log Bytes Flushed/sec', - 'Bytes Sent to Replica/sec', - 'Log Send Queue', - 'Bytes Sent to Transport/sec', - 'Sends to Replica/sec', - 'Bytes Sent to Transport/sec', - 'Sends to Transport/sec', - 'Bytes Received from Replica/sec', - 'Receives from Replica/sec', - 'Flow Control Time (ms/sec)', - 'Flow Control/sec', - 'Resent Messages/sec', - 'Redone Bytes/sec', - 'XTP Memory Used (KB)', - 'Transaction Delay', - 'Log Bytes Received/sec', - 'Log Apply Pending Queue', - 'Redone Bytes/sec', - 'Recovery Queue', - 'Log Apply Ready Queue', - 'CPU usage %', - 'CPU usage % base', - 'Queued requests', - 'Requests completed/sec', - 'Blocked tasks', - 'Active memory grant amount (KB)', - 'Disk Read Bytes/sec', - 'Disk Read IO Throttled/sec', - 'Disk Read IO/sec', - 'Disk Write Bytes/sec', - 'Disk Write IO Throttled/sec', - 'Disk Write IO/sec', - 'Used memory (KB)', - 'Forwarded Records/sec', - 'Background Writer pages/sec', - 'Percent Log Used', - 'Log Send Queue KB', - 'Redo Queue KB', - 'Mirrored Write Transactions/sec', - 'Group Commit Time', - 'Group Commits/Sec' - ) - ) OR ( - object_name LIKE '%User Settable%' - OR object_name LIKE '%SQL Errors%' +WITH PerfCounters AS ( + SELECT DISTINCT + RTrim(spi.[object_name]) [object_name] + ,RTrim(spi.[counter_name]) [counter_name] + ,CASE WHEN ( + RTRIM(spi.[object_name]) LIKE '%:Databases' + OR RTRIM(spi.[object_name]) LIKE '%:Database Replica' + OR RTRIM(spi.[object_name]) LIKE '%:Catalog Metadata' + OR RTRIM(spi.[object_name]) LIKE '%:Query Store' + OR RTRIM(spi.[object_name]) LIKE '%:Columnstore' + OR RTRIM(spi.[object_name]) LIKE '%:Advanced Analytics') + AND TRY_CONVERT([uniqueidentifier], spi.[instance_name]) IS NOT NULL -- for cloud only + THEN ISNULL(d.[name],RTRIM(spi.instance_name)) -- Elastic Pools counters exist for all databases but sys.databases only has current DB value + WHEN + RTRIM([object_name]) LIKE '%:Availability Replica' + AND TRY_CONVERT([uniqueidentifier], spi.[instance_name]) IS NOT NULL -- for cloud only + THEN ISNULL(d.[name],RTRIM(spi.[instance_name])) + RTRIM(SUBSTRING(spi.[instance_name], 37, LEN(spi.[instance_name]))) + ELSE RTRIM(spi.instance_name) + END AS [instance_name] + ,CAST(spi.[cntr_value] AS BIGINT) AS [cntr_value] + ,spi.[cntr_type] + FROM sys.dm_os_performance_counters AS spi + LEFT JOIN sys.databases AS d + ON LEFT(spi.[instance_name], 36) -- some instance_name values have an additional identifier appended after the GUID + = CASE + /*in SQL DB standalone, physical_database_name for master is the GUID of the user database*/ + WHEN d.[name] = 'master' AND TRY_CONVERT([uniqueidentifier], d.[physical_database_name]) IS NOT NULL + THEN d.[name] + ELSE d.[physical_database_name] + END + WHERE + counter_name IN ( + ''SQL Compilations/sec'' + ,''SQL Re-Compilations/sec'' + ,''User Connections'' + ,''Batch Requests/sec'' + ,''Logouts/sec'' + ,''Logins/sec'' + ,''Processes blocked'' + ,''Latch Waits/sec'' + ,''Full Scans/sec'' + ,''Index Searches/sec'' + ,''Page Splits/sec'' + ,''Page lookups/sec'' + ,''Page reads/sec'' + ,''Page writes/sec'' + ,''Readahead pages/sec'' + ,''Lazy writes/sec'' + ,''Checkpoint pages/sec'' + ,''Page life expectancy'' + ,''Log File(s) Size (KB)'' + ,''Log File(s) Used Size (KB)'' + ,''Data File(s) Size (KB)'' + ,''Transactions/sec'' + ,''Write Transactions/sec'' + ,''Active Temp Tables'' + ,''Temp Tables Creation Rate'' + ,''Temp Tables For Destruction'' + ,''Free Space in tempdb (KB)'' + ,''Version Store Size (KB)'' + ,''Memory Grants Pending'' + ,''Memory Grants Outstanding'' + ,''Free list stalls/sec'' + ,''Buffer cache hit ratio'' + ,''Buffer cache hit ratio base'' + ,''Backup/Restore Throughput/sec'' + ,''Total Server Memory (KB)'' + ,''Target Server Memory (KB)'' + ,''Log Flushes/sec'' + ,''Log Flush Wait Time'' + ,''Memory broker clerk size'' + ,''Log Bytes Flushed/sec'' + ,''Bytes Sent to Replica/sec'' + ,''Log Send Queue'' + ,''Bytes Sent to Transport/sec'' + ,''Sends to Replica/sec'' + ,''Bytes Sent to Transport/sec'' + ,''Sends to Transport/sec'' + ,''Bytes Received from Replica/sec'' + ,''Receives from Replica/sec'' + ,''Flow Control Time (ms/sec)'' + ,''Flow Control/sec'' + ,''Resent Messages/sec'' + ,''Redone Bytes/sec'' + ,''XTP Memory Used (KB)'' + ,''Transaction Delay'' + ,''Log Bytes Received/sec'' + ,''Log Apply Pending Queue'' + ,''Redone Bytes/sec'' + ,''Recovery Queue'' + ,''Log Apply Ready Queue'' + ,''CPU usage %'' + ,''CPU usage % base'' + ,''Queued requests'' + ,''Requests completed/sec'' + ,''Blocked tasks'' + ,''Active memory grant amount (KB)'' + ,''Disk Read Bytes/sec'' + ,''Disk Read IO Throttled/sec'' + ,''Disk Read IO/sec'' + ,''Disk Write Bytes/sec'' + ,''Disk Write IO Throttled/sec'' + ,''Disk Write IO/sec'' + ,''Used memory (KB)'' + ,''Forwarded Records/sec'' + ,''Background Writer pages/sec'' + ,''Percent Log Used'' + ,''Log Send Queue KB'' + ,''Redo Queue KB'' + ,''Mirrored Write Transactions/sec'' + ,''Group Commit Time'' + ,''Group Commits/Sec'' ) OR ( - object_name LIKE '%Batch Resp Statistics%' + spi.[object_name] LIKE ''%User Settable%'' + OR spi.[object_name] LIKE ''%SQL Errors%'' + OR spi.[object_name] LIKE ''%Batch Resp Statistics%'' ) OR ( - instance_name IN ('_Total') - AND counter_name IN ( - 'Lock Timeouts/sec', - 'Lock Timeouts (timeout > 0)/sec', - 'Number of Deadlocks/sec', - 'Lock Waits/sec', - 'Latch Waits/sec' + spi.[instance_name] IN (''_Total'') + AND spi.[counter_name] IN ( + ''Lock Timeouts/sec'' + ,''Lock Timeouts (timeout > 0)/sec'' + ,''Number of Deadlocks/sec'' + ,''Lock Waits/sec'' + ,''Latch Waits/sec'' ) ) - ) + INSERT INTO @PCounters select * from PerfCounters -select - 'sqlserver_performance' AS [measurement], - REPLACE(@@SERVERNAME,'\',':') AS [sql_instance], - DB_NAME() as [database_name], - pc.object_name AS [object], - pc.counter_name AS [counter], - CASE pc.instance_name - WHEN '_Total' THEN 'Total' - ELSE ISNULL(pc.instance_name,'') - END AS [instance], - CAST(CASE WHEN pc.cntr_type = 537003264 AND pc1.cntr_value > 0 THEN (pc.cntr_value * 1.0) / (pc1.cntr_value * 1.0) * 100 ELSE pc.cntr_value END AS float(10)) AS [value], - -- cast to string as TAG - cast(pc.cntr_type as varchar(25)) as [counter_type] +SELECT + 'sqlserver_performance' AS [measurement] + ,REPLACE(@@SERVERNAME,'\',':') AS [sql_instance] + ,DB_NAME() as [database_name] + ,pc.[object_name] AS [object] + ,pc.[counter_name] AS [counter] + ,CASE pc.[instance_name] + WHEN '_Total' THEN 'Total' + ELSE ISNULL(pc.[instance_name],'') + END AS [instance] + ,CAST(CASE WHEN pc.[cntr_type] = 537003264 AND pc1.[cntr_value] > 0 THEN (pc.[cntr_value] * 1.0) / (pc1.[cntr_value] * 1.0) * 100 ELSE pc.[cntr_value] END AS float(10)) AS [value], + ,cast(pc.[cntr_type] as varchar(25)) as [counter_type] from @PCounters pc - LEFT OUTER JOIN @PCounters AS pc1 - ON ( - pc.counter_name = REPLACE(pc1.counter_name,' base','') - OR pc.counter_name = REPLACE(pc1.counter_name,' base',' (ms)') - ) - AND pc.object_name = pc1.object_name - AND pc.instance_name = pc1.instance_name - AND pc1.counter_name LIKE '%base' -WHERE pc.counter_name NOT LIKE '% base' -OPTION (RECOMPILE) -END +LEFT OUTER JOIN @PCounters AS pc1 + ON ( + pc.[counter_name] = REPLACE(pc1.[counter_name],' base','') + OR pc.[counter_name] = REPLACE(pc1.[counter_name],' base',' (ms)') + ) + AND pc.[object_name] = pc1.[object_name] + AND pc.[instance_name] = pc1.[instance_name] + AND pc1.[counter_name] LIKE '%base' +WHERE + pc.[counter_name] NOT LIKE '% base' +OPTION (RECOMPILE); ` const sqlAzureDBRequests string = ` -SET NOCOUNT ON; -IF SERVERPROPERTY('EngineEdition') = 5 -- Is this Azure SQL DB? -BEGIN - SELECT blocking_session_id into #blockingSessions FROM sys.dm_exec_requests WHERE blocking_session_id != 0 - create index ix_blockingSessions_1 on #blockingSessions (blocking_session_id) - SELECT - 'sqlserver_requests' AS [measurement] - , REPLACE(@@SERVERNAME,'\',':') AS [sql_instance] - , DB_NAME() as [database_name] - , s.session_id - , ISNULL(r.request_id,0) as request_id - , DB_NAME(s.database_id) as session_db_name - , COALESCE(r.status,s.status) AS status - , COALESCE(r.cpu_time,s.cpu_time) AS cpu_time_ms - , COALESCE(r.total_elapsed_time,s.total_elapsed_time) AS total_elapsed_time_ms - , COALESCE(r.logical_reads,s.logical_reads) AS logical_reads - , COALESCE(r.writes,s.writes) AS writes - , r.command - , r.wait_time as wait_time_ms - , r.wait_type - , r.wait_resource - , r.blocking_session_id - , s.program_name - , s.host_name - , s.nt_user_name - , s.open_transaction_count AS open_transaction - , LEFT (CASE COALESCE(r.transaction_isolation_level, s.transaction_isolation_level) - WHEN 0 THEN '0-Read Committed' - WHEN 1 THEN '1-Read Uncommitted (NOLOCK)' - WHEN 2 THEN '2-Read Committed' - WHEN 3 THEN '3-Repeatable Read' - WHEN 4 THEN '4-Serializable' - WHEN 5 THEN '5-Snapshot' - ELSE CONVERT (varchar(30), r.transaction_isolation_level) + '-UNKNOWN' - END, 30) AS transaction_isolation_level - , r.granted_query_memory as granted_query_memory_pages - , r.percent_complete - , SUBSTRING( - qt.text, - r.statement_start_offset / 2 + 1, - (CASE WHEN r.statement_end_offset = -1 - THEN DATALENGTH(qt.text) - ELSE r.statement_end_offset - END - r.statement_start_offset) / 2 + 1 - ) AS statement_text - , qt.objectid - , QUOTENAME(OBJECT_SCHEMA_NAME(qt.objectid,qt.dbid)) + '.' + QUOTENAME(OBJECT_NAME(qt.objectid,qt.dbid)) as stmt_object_name - , DB_NAME(qt.dbid) stmt_db_name - , CONVERT(varchar(20),[query_hash],1) as [query_hash] - , CONVERT(varchar(20),[query_plan_hash],1) as [query_plan_hash] - FROM sys.dm_exec_sessions AS s - LEFT OUTER JOIN sys.dm_exec_requests AS r - ON s.session_id = r.session_id - OUTER APPLY sys.dm_exec_sql_text(r.sql_handle) AS qt - WHERE 1 = 1 - AND (r.session_id IS NOT NULL AND (s.is_user_process = 1 OR r.status COLLATE Latin1_General_BIN NOT IN ('background', 'sleeping'))) - OR (s.session_id IN (SELECT blocking_session_id FROM #blockingSessions)) - OPTION(MAXDOP 1) +IF SERVERPROPERTY('EngineEdition') <> 5 BEGIN /*not Azure SQL DB*/ + DECLARE @ErrorMessage AS nvarchar(500) = 'Telegraf - Connection string Server:'+ @@SERVERNAME + ',Database:' + DB_NAME() +' is not an Azure SQL DB. Check the database_type parameter in the telegraf configuration.'; + RAISERROR (@ErrorMessage,11,1) + RETURN +END + +SELECT [blocking_session_id] INTO #blockingSessions FROM sys.dm_exec_requests WHERE [blocking_session_id] != 0 +CREATE INDEX ix_blockingSessions_1 on #blockingSessions ([blocking_session_id]) + +SELECT + 'sqlserver_requests' AS [measurement] + ,REPLACE(@@SERVERNAME,'\',':') AS [sql_instance] + ,DB_NAME() as [database_name] + ,s.[session_id] + ,ISNULL(r.[request_id], 0) as [request_id] + ,DB_NAME(s.[database_id]) as [session_db_name] + ,COALESCE(r.[status], s.[status]) AS [status] + ,COALESCE(r.[cpu_time], s.[cpu_time]) AS [cpu_time_ms] + ,COALESCE(r.[total_elapsed_time], s.[total_elapsed_time]) AS [total_elapsed_time_ms] + ,COALESCE(r.[logical_reads], s.[logical_reads]) AS [logical_reads] + ,COALESCE(r.[writes], s.[writes]) AS [writes] + ,r.[command] + ,r.[wait_time] as [wait_time_ms] + ,r.[wait_type] + ,r.[wait_resource] + ,r.[blocking_session_id] + ,s.[program_name] + ,s.[host_name] + ,s.[nt_user_name] + ,COALESCE(r.[open_transaction_count], s.[open_transaction_count]) AS [open_transaction] + ,LEFT (CASE COALESCE(r.[transaction_isolation_level], s.[transaction_isolation_level]) + WHEN 0 THEN '0-Read Committed' + WHEN 1 THEN '1-Read Uncommitted (NOLOCK)' + WHEN 2 THEN '2-Read Committed' + WHEN 3 THEN '3-Repeatable Read' + WHEN 4 THEN '4-Serializable' + WHEN 5 THEN '5-Snapshot' + ELSE CONVERT (varchar(30), r.[transaction_isolation_level]) + '-UNKNOWN' + END, 30) AS [transaction_isolation_level] + ,r.[granted_query_memory] as [granted_query_memory_pages] + ,r.[percent_complete] + ,SUBSTRING( + qt.[text], + r.[statement_start_offset] / 2 + 1, + (CASE WHEN r.[statement_end_offset] = -1 + THEN DATALENGTH(qt.text) + ELSE r.[statement_end_offset] + END - r.[statement_start_offset]) / 2 + 1 + ) AS [statement_text] + ,qt.[objectid] + ,QUOTENAME(OBJECT_SCHEMA_NAME(qt.[objectid], qt.[dbid])) + '.' + QUOTENAME(OBJECT_NAME(qt.[objectid], qt.[dbid])) as [stmt_object_name] + ,DB_NAME(qt.[dbid]) [stmt_db_name] + ,CONVERT(varchar(20),[query_hash],1) as [query_hash] + ,CONVERT(varchar(20),[query_plan_hash],1) as [query_plan_hash] +FROM sys.dm_exec_sessions AS s +LEFT OUTER JOIN sys.dm_exec_requests AS r + ON s.[session_id] = r.[session_id] +OUTER APPLY sys.dm_exec_sql_text(r.sql_handle) AS qt +WHERE + (s.session_id IN (SELECT blocking_session_id FROM #blockingSessions)) + OR ( + r.session_id IS NOT NULL + AND ( + s.is_user_process = 1 + OR r.status COLLATE Latin1_General_BIN NOT IN ('background', 'sleeping') + ) + ) +OPTION(MAXDOP 1); +` + +const sqlAzureDBSchedulers string = ` +IF SERVERPROPERTY('EngineEdition') <> 5 BEGIN /*not Azure SQL DB*/ + DECLARE @ErrorMessage AS nvarchar(500) = 'Telegraf - Connection string Server:'+ @@SERVERNAME + ',Database:' + DB_NAME() +' is not an Azure SQL DB. Check the database_type parameter in the telegraf configuration.'; + RAISERROR (@ErrorMessage,11,1) + RETURN END + +SELECT + 'sqlserver_schedulers' AS [measurement] + ,REPLACE(@@SERVERNAME, '\', ':') AS [sql_instance] + ,CAST(s.[scheduler_id] AS VARCHAR(4)) AS [scheduler_id] + ,CAST(s.[cpu_id] AS VARCHAR(4)) AS [cpu_id] + ,s.[is_online] + ,s.[is_idle] + ,s.[preemptive_switches_count] + ,s.[context_switches_count] + ,s.[current_tasks_count] + ,s.[runnable_tasks_count] + ,s.[current_workers_count] + ,s.[active_workers_count] + ,s.[work_queue_count] + ,s.[pending_disk_io_count] + ,s.[load_factor] + ,s.[yield_count] + ,s.[total_cpu_usage_ms] + ,s.[total_scheduler_delay_ms] +FROM sys.dm_os_schedulers AS s ` +//------------------------------------------------------------------------------------------------ +//------------------ Azure Managed Instance ------------------------------------------------------ +//------------------------------------------------------------------------------------------------ const sqlAzureMIProperties = ` -DECLARE @EngineEdition AS tinyint = CAST(SERVERPROPERTY('EngineEdition') AS int) -IF @EngineEdition = 8 /*Managed Instance*/ - SELECT TOP 1 'sqlserver_server_properties' AS [measurement], - REPLACE(@@SERVERNAME,'\',':') AS [sql_instance], - virtual_core_count AS cpu_count, - (SELECT process_memory_limit_mb FROM sys.dm_os_job_object) AS server_memory, - sku, - @EngineEdition AS engine_edition, - hardware_generation AS hardware_type, - cast(reserved_storage_mb as bigint) AS total_storage_mb, - cast((reserved_storage_mb - storage_space_used_mb) as bigint) AS available_storage_mb, - (select DATEDIFF(MINUTE,sqlserver_start_time,GETDATE()) from sys.dm_os_sys_info) as uptime, - SERVERPROPERTY('ProductVersion') AS sql_version, - db_online, - db_restoring, - db_recovering, - db_recoveryPending, - db_suspect - FROM sys.server_resource_stats - CROSS APPLY - (SELECT SUM( CASE WHEN state = 0 THEN 1 ELSE 0 END ) AS db_online, - SUM( CASE WHEN state = 1 THEN 1 ELSE 0 END ) AS db_restoring, - SUM( CASE WHEN state = 2 THEN 1 ELSE 0 END ) AS db_recovering, - SUM( CASE WHEN state = 3 THEN 1 ELSE 0 END ) AS db_recoveryPending, - SUM( CASE WHEN state = 4 THEN 1 ELSE 0 END ) AS db_suspect, - SUM( CASE WHEN state = 6 or state = 10 THEN 1 ELSE 0 END ) AS db_offline - FROM sys.databases - ) AS dbs - ORDER BY start_time DESC; +IF SERVERPROPERTY('EngineEdition') <> 8 BEGIN /*not Azure Managed Instance*/ + DECLARE @ErrorMessage AS nvarchar(500) = 'Telegraf - Connection string Server:'+ @@SERVERNAME + ',Database:' + DB_NAME() +' is not an Azure Managed Instance. Check the database_type parameter in the telegraf configuration.'; + RAISERROR (@ErrorMessage,11,1) + RETURN +END + +SELECT TOP 1 + 'sqlserver_server_properties' AS [measurement] + ,REPLACE(@@SERVERNAME,'\',':') AS [sql_instance] + ,[virtual_core_count] AS [cpu_count] + ,(SELECT [process_memory_limit_mb] FROM sys.dm_os_job_object) AS [server_memory] + ,[sku] + ,SERVERPROPERTY('EngineEdition') AS [engine_edition] + ,[hardware_generation] AS [hardware_type] + ,cast([reserved_storage_mb] as bigint) AS [total_storage_mb] + ,cast(([reserved_storage_mb] - [storage_space_used_mb]) as bigint) AS [available_storage_mb] + ,(SELECT DATEDIFF(MINUTE,[sqlserver_start_time],GETDATE()) from sys.dm_os_sys_info) as [uptime] + ,SERVERPROPERTY('ProductVersion') AS [sql_version] + ,[db_online] + ,[db_restoring] + ,[db_recovering] + ,[db_recoveryPending] + ,[db_suspect] +FROM sys.server_resource_stats +CROSS APPLY ( + SELECT + SUM( CASE WHEN [state] = 0 THEN 1 ELSE 0 END ) AS [db_online] + ,SUM( CASE WHEN [state] = 1 THEN 1 ELSE 0 END ) AS [db_restoring] + ,SUM( CASE WHEN [state] = 2 THEN 1 ELSE 0 END ) AS [db_recovering] + ,SUM( CASE WHEN [state] = 3 THEN 1 ELSE 0 END ) AS [db_recoveryPending] + ,SUM( CASE WHEN [state] = 4 THEN 1 ELSE 0 END ) AS [db_suspect] + ,SUM( CASE WHEN [state] IN (6,10) THEN 1 ELSE 0 END ) AS [db_offline] + FROM sys.databases +) AS dbs +ORDER BY + [start_time] DESC; ` const sqlAzureMIResourceStats = ` -IF SERVERPROPERTY('EngineEdition') = 8 /*Managed Instance*/ - SELECT TOP(1) - 'sqlserver_azure_db_resource_stats' AS [measurement], - REPLACE(@@SERVERNAME,'\',':') AS [sql_instance], - cast(avg_cpu_percent as float) as avg_cpu_percent - FROM - sys.server_resource_stats; +IF SERVERPROPERTY('EngineEdition') <> 8 BEGIN /*not Azure Managed Instance*/ + DECLARE @ErrorMessage AS nvarchar(500) = 'Telegraf - Connection string Server:'+ @@SERVERNAME + ',Database:' + DB_NAME() +' is not an Azure Managed Instance. Check the database_type parameter in the telegraf configuration.'; + RAISERROR (@ErrorMessage,11,1) + RETURN +END + +SELECT TOP(1) + 'sqlserver_azure_db_resource_stats' AS [measurement] + ,REPLACE(@@SERVERNAME,'\',':') AS [sql_instance] + ,cast([avg_cpu_percent] as float) as [avg_cpu_percent] +FROM + sys.server_resource_stats; ` const sqlAzureMIResourceGovernance string = ` -IF SERVERPROPERTY('EngineEdition') = 8 -- Is this Azure SQL Managed Instance? - SELECT - 'sqlserver_instance_resource_governance' AS [measurement], - REPLACE(@@SERVERNAME,'\',':') AS [sql_instance], - instance_cap_cpu, - instance_max_log_rate, - instance_max_worker_threads, - tempdb_log_file_number, - volume_local_iops, - volume_external_xstore_iops, - volume_managed_xstore_iops, - volume_type_local_iops as voltype_local_iops, - volume_type_managed_xstore_iops as voltype_man_xtore_iops, - volume_type_external_xstore_iops as voltype_ext_xtore_iops, - volume_external_xstore_iops as vol_ext_xtore_iops - from - sys.dm_instance_resource_governance; +IF SERVERPROPERTY('EngineEdition') <> 8 BEGIN /*not Azure Managed Instance*/ + DECLARE @ErrorMessage AS nvarchar(500) = 'Telegraf - Connection string Server:'+ @@SERVERNAME + ',Database:' + DB_NAME() +' is not an Azure Managed Instance. Check the database_type parameter in the telegraf configuration.'; + RAISERROR (@ErrorMessage,11,1) + RETURN +END + +SELECT + 'sqlserver_instance_resource_governance' AS [measurement] + ,REPLACE(@@SERVERNAME,'\',':') AS [sql_instance] + ,[instance_cap_cpu] + ,[instance_max_log_rate] + ,[instance_max_worker_threads] + ,[tempdb_log_file_number] + ,[volume_local_iops] + ,[volume_external_xstore_iops] + ,[volume_managed_xstore_iops] + ,[volume_type_local_iops] as [voltype_local_iops] + ,[volume_type_managed_xstore_iops] as [voltype_man_xtore_iops] + ,[volume_type_external_xstore_iops] as [voltype_ext_xtore_iops] + ,[volume_external_xstore_iops] as [vol_ext_xtore_iops] +FROM sys.dm_instance_resource_governance; ` const sqlAzureMIDatabaseIO = ` SET DEADLOCK_PRIORITY -10; -IF SERVERPROPERTY('EngineEdition') = 8 /*Managed Instance*/ - SELECT - 'sqlserver_database_io' AS [measurement] - ,REPLACE(@@SERVERNAME,'\',':') AS [sql_instance] - ,COALESCE(mf.[physical_name],'RBPEX') AS [physical_filename] --RPBEX = Resilient Buffer Pool Extension - ,COALESCE(mf.[name],'RBPEX') AS [logical_filename] --RPBEX = Resilient Buffer Pool Extension - ,mf.[type_desc] AS [file_type] - ,vfs.[io_stall_read_ms] AS [read_latency_ms] - ,vfs.[num_of_reads] AS [reads] - ,vfs.[num_of_bytes_read] AS [read_bytes] - ,vfs.[io_stall_write_ms] AS [write_latency_ms] - ,vfs.[num_of_writes] AS [writes] - ,vfs.[num_of_bytes_written] AS [write_bytes] - ,vfs.io_stall_queued_read_ms AS [rg_read_stall_ms] - ,vfs.io_stall_queued_write_ms AS [rg_write_stall_ms] - FROM sys.dm_io_virtual_file_stats(NULL, NULL) AS vfs - LEFT OUTER JOIN sys.master_files AS mf WITH (NOLOCK) - ON vfs.[database_id] = mf.[database_id] AND vfs.[file_id] = mf.[file_id] - where vfs.[database_id] < 32760 +IF SERVERPROPERTY('EngineEdition') <> 8 BEGIN /*not Azure Managed Instance*/ + DECLARE @ErrorMessage AS nvarchar(500) = 'Telegraf - Connection string Server:'+ @@SERVERNAME + ',Database:' + DB_NAME() +' is not an Azure Managed Instance. Check the database_type parameter in the telegraf configuration.'; + RAISERROR (@ErrorMessage,11,1) + RETURN +END + +SELECT + 'sqlserver_database_io' AS [measurement] + ,REPLACE(@@SERVERNAME,'\',':') AS [sql_instance] + ,COALESCE(mf.[physical_name],'RBPEX') AS [physical_filename] --RPBEX = Resilient Buffer Pool Extension + ,COALESCE(mf.[name],'RBPEX') AS [logical_filename] --RPBEX = Resilient Buffer Pool Extension + ,mf.[type_desc] AS [file_type] + ,vfs.[io_stall_read_ms] AS [read_latency_ms] + ,vfs.[num_of_reads] AS [reads] + ,vfs.[num_of_bytes_read] AS [read_bytes] + ,vfs.[io_stall_write_ms] AS [write_latency_ms] + ,vfs.[num_of_writes] AS [writes] + ,vfs.[num_of_bytes_written] AS [write_bytes] + ,vfs.io_stall_queued_read_ms AS [rg_read_stall_ms] + ,vfs.io_stall_queued_write_ms AS [rg_write_stall_ms] +FROM sys.dm_io_virtual_file_stats(NULL, NULL) AS vfs +LEFT OUTER JOIN sys.master_files AS mf WITH (NOLOCK) + ON vfs.[database_id] = mf.[database_id] + AND vfs.[file_id] = mf.[file_id] +WHERE + vfs.[database_id] < 32760 ` const sqlAzureMIMemoryClerks = ` -SET DEADLOCK_PRIORITY -10; -IF SERVERPROPERTY('EngineEdition') = 8 /*Managed Instance*/ +IF SERVERPROPERTY('EngineEdition') <> 8 BEGIN /*not Azure Managed Instance*/ + DECLARE @ErrorMessage AS nvarchar(500) = 'Telegraf - Connection string Server:'+ @@SERVERNAME + ',Database:' + DB_NAME() +' is not an Azure Managed Instance. Check the database_type parameter in the telegraf configuration.'; + RAISERROR (@ErrorMessage,11,1) + RETURN +END + SELECT 'sqlserver_memory_clerks' AS [measurement] ,REPLACE(@@SERVERNAME, '\', ':') AS [sql_instance] ,mc.[type] AS [clerk_type] ,SUM(mc.[pages_kb]) AS [size_kb] FROM sys.[dm_os_memory_clerks] AS mc WITH (NOLOCK) -GROUP BY +GROUP BY mc.[type] -HAVING - SUM(mc.[pages_kb]) >= 1024 +HAVING + SUM(mc.[pages_kb]) >= 1024 OPTION(RECOMPILE); ` const sqlAzureMIOsWaitStats = ` -SET DEADLOCK_PRIORITY -10; -IF SERVERPROPERTY('EngineEdition') = 8 /*Managed Instance*/ +IF SERVERPROPERTY('EngineEdition') <> 8 BEGIN /*not Azure Managed Instance*/ + DECLARE @ErrorMessage AS nvarchar(500) = 'Telegraf - Connection string Server:'+ @@SERVERNAME + ',Database:' + DB_NAME() +' is not an Azure Managed Instance. Check the database_type parameter in the telegraf configuration.'; + RAISERROR (@ErrorMessage,11,1) + RETURN +END + SELECT -'sqlserver_waitstats' AS [measurement], -REPLACE(@@SERVERNAME,'\',':') AS [sql_instance], -ws.wait_type, -wait_time_ms, -wait_time_ms - signal_wait_time_ms AS [resource_wait_ms], -signal_wait_time_ms, -max_wait_time_ms, -waiting_tasks_count, -CASE - WHEN ws.wait_type LIKE 'SOS_SCHEDULER_YIELD' then 'CPU' - WHEN ws.wait_type = 'THREADPOOL' THEN 'Worker Thread' - WHEN ws.wait_type LIKE 'LCK[_]%' THEN 'Lock' - WHEN ws.wait_type LIKE 'LATCH[_]%' THEN 'Latch' - WHEN ws.wait_type LIKE 'PAGELATCH[_]%' THEN 'Buffer Latch' - WHEN ws.wait_type LIKE 'PAGEIOLATCH[_]%' THEN 'Buffer IO' - WHEN ws.wait_type LIKE 'RESOURCE_SEMAPHORE_QUERY_COMPILE%' THEN 'Compilation' - WHEN ws.wait_type LIKE 'CLR[_]%' or ws.wait_type like 'SQLCLR%' THEN 'SQL CLR' - WHEN ws.wait_type LIKE 'DBMIRROR_%' THEN 'Mirroring' - WHEN ws.wait_type LIKE 'DTC[_]%' or ws.wait_type LIKE 'DTCNEW%' or ws.wait_type LIKE 'TRAN_%' - or ws.wait_type LIKE 'XACT%' or ws.wait_type like 'MSQL_XACT%' THEN 'Transaction' - WHEN ws.wait_type LIKE 'SLEEP[_]%' or - ws.wait_type IN ('LAZYWRITER_SLEEP', 'SQLTRACE_BUFFER_FLUSH', 'SQLTRACE_INCREMENTAL_FLUSH_SLEEP' - , 'SQLTRACE_WAIT_ENTRIES', 'FT_IFTS_SCHEDULER_IDLE_WAIT', 'XE_DISPATCHER_WAIT' - , 'REQUEST_FOR_DEADLOCK_SEARCH', 'LOGMGR_QUEUE', 'ONDEMAND_TASK_QUEUE' - , 'CHECKPOINT_QUEUE', 'XE_TIMER_EVENT') THEN 'Idle' - WHEN ws.wait_type IN('ASYNC_IO_COMPLETION','BACKUPIO','CHKPT','WRITE_COMPLETION' - ,'IO_QUEUE_LIMIT', 'IO_RETRY') THEN 'Other Disk IO' - WHEN ws.wait_type LIKE 'PREEMPTIVE_%' THEN 'Preemptive' - WHEN ws.wait_type LIKE 'BROKER[_]%' THEN 'Service Broker' - WHEN ws.wait_type IN ('WRITELOG','LOGBUFFER','LOGMGR_RESERVE_APPEND' - , 'LOGMGR_FLUSH', 'LOGMGR_PMM_LOG') THEN 'Tran Log IO' - WHEN ws.wait_type LIKE 'LOG_RATE%' then 'Log Rate Governor' - WHEN ws.wait_type LIKE 'HADR_THROTTLE[_]%' - or ws.wait_type = 'THROTTLE_LOG_RATE_LOG_STORAGE' THEN 'HADR Log Rate Governor' - WHEN ws.wait_type LIKE 'RBIO_RG%' or ws.wait_type like 'WAIT_RBIO_RG%' then 'VLDB Log Rate Governor' - WHEN ws.wait_type LIKE 'RBIO[_]%' or ws.wait_type like 'WAIT_RBIO[_]%' then 'VLDB RBIO' - WHEN ws.wait_type IN('ASYNC_NETWORK_IO','EXTERNAL_SCRIPT_NETWORK_IOF' - ,'NET_WAITFOR_PACKET','PROXY_NETWORK_IO') THEN 'Network IO' - WHEN ws.wait_type IN ( 'CXPACKET', 'CXCONSUMER') - or ws.wait_type like 'HT%' or ws.wait_type like 'BMP%' - or ws.wait_type like 'BP%' THEN 'Parallelism' -WHEN ws.wait_type IN('CMEMTHREAD','CMEMPARTITIONED','EE_PMOLOCK','EXCHANGE' - ,'RESOURCE_SEMAPHORE','MEMORY_ALLOCATION_EXT' - ,'RESERVED_MEMORY_ALLOCATION_EXT', 'MEMORY_GRANT_UPDATE') THEN 'Memory' - WHEN ws.wait_type IN ('WAITFOR','WAIT_FOR_RESULTS') THEN 'User Wait' - WHEN ws.wait_type LIKE 'HADR[_]%' or ws.wait_type LIKE 'PWAIT_HADR%' - or ws.wait_type LIKE 'REPLICA[_]%' or ws.wait_type LIKE 'REPL_%' - or ws.wait_type LIKE 'SE_REPL[_]%' - or ws.wait_type LIKE 'FCB_REPLICA%' THEN 'Replication' - WHEN ws.wait_type LIKE 'SQLTRACE[_]%' or ws.wait_type - IN ('TRACEWRITE', 'SQLTRACE_LOCK', 'SQLTRACE_FILE_BUFFER', 'SQLTRACE_FILE_WRITE_IO_COMPLETION' - , 'SQLTRACE_FILE_READ_IO_COMPLETION', 'SQLTRACE_PENDING_BUFFER_WRITERS', 'SQLTRACE_SHUTDOWN' - , 'QUERY_TRACEOUT', 'TRACE_EVTNOTIF') THEN 'Tracing' - WHEN ws.wait_type IN ('FT_RESTART_CRAWL', 'FULLTEXT GATHERER', 'MSSEARCH', 'FT_METADATA_MUTEX', - 'FT_IFTSHC_MUTEX', 'FT_IFTSISM_MUTEX', 'FT_IFTS_RWLOCK', 'FT_COMPROWSET_RWLOCK' - , 'FT_MASTER_MERGE', 'FT_PROPERTYLIST_CACHE', 'FT_MASTER_MERGE_COORDINATOR' - , 'PWAIT_RESOURCE_SEMAPHORE_FT_PARALLEL_QUERY_SYNC') THEN 'Full Text Search' - ELSE 'Other' -END as wait_category -FROM -sys.dm_os_wait_stats AS ws WITH (NOLOCK) + 'sqlserver_waitstats' AS [measurement] + ,REPLACE(@@SERVERNAME,'\',':') AS [sql_instance] + ,ws.[wait_type] + ,[wait_time_ms] + ,[wait_time_ms] - [signal_wait_time_ms] AS [resource_wait_ms] + ,[signal_wait_time_ms] + ,[max_wait_time_ms] + ,[waiting_tasks_count] + ,CASE + WHEN ws.[wait_type] LIKE 'SOS_SCHEDULER_YIELD' then 'CPU' + WHEN ws.[wait_type] = 'THREADPOOL' THEN 'Worker Thread' + WHEN ws.[wait_type] LIKE 'LCK[_]%' THEN 'Lock' + WHEN ws.[wait_type] LIKE 'LATCH[_]%' THEN 'Latch' + WHEN ws.[wait_type] LIKE 'PAGELATCH[_]%' THEN 'Buffer Latch' + WHEN ws.[wait_type] LIKE 'PAGEIOLATCH[_]%' THEN 'Buffer IO' + WHEN ws.[wait_type] LIKE 'RESOURCE_SEMAPHORE_QUERY_COMPILE%' THEN 'Compilation' + WHEN ws.[wait_type] LIKE 'CLR[_]%' or ws.[wait_type] like 'SQLCLR%' THEN 'SQL CLR' + WHEN ws.[wait_type] LIKE 'DBMIRROR_%' THEN 'Mirroring' + WHEN ws.[wait_type] LIKE 'DTC[_]%' or ws.[wait_type] LIKE 'DTCNEW%' or ws.[wait_type] LIKE 'TRAN_%' + or ws.[wait_type] LIKE 'XACT%' or ws.[wait_type] like 'MSQL_XACT%' THEN 'Transaction' + WHEN ws.[wait_type] LIKE 'SLEEP[_]%' + or ws.[wait_type] IN ( + 'LAZYWRITER_SLEEP', 'SQLTRACE_BUFFER_FLUSH', 'SQLTRACE_INCREMENTAL_FLUSH_SLEEP', + 'SQLTRACE_WAIT_ENTRIES', 'FT_IFTS_SCHEDULER_IDLE_WAIT', 'XE_DISPATCHER_WAIT', + 'REQUEST_FOR_DEADLOCK_SEARCH', 'LOGMGR_QUEUE', 'ONDEMAND_TASK_QUEUE', + 'CHECKPOINT_QUEUE', 'XE_TIMER_EVENT') THEN 'Idle' + WHEN ws.[wait_type] IN( + 'ASYNC_IO_COMPLETION','BACKUPIO','CHKPT','WRITE_COMPLETION', + 'IO_QUEUE_LIMIT', 'IO_RETRY') THEN 'Other Disk IO' + WHEN ws.[wait_type] LIKE 'PREEMPTIVE_%' THEN 'Preemptive' + WHEN ws.[wait_type] LIKE 'BROKER[_]%' THEN 'Service Broker' + WHEN ws.[wait_type] IN ( + 'WRITELOG','LOGBUFFER','LOGMGR_RESERVE_APPEND', + 'LOGMGR_FLUSH', 'LOGMGR_PMM_LOG') THEN 'Tran Log IO' + WHEN ws.[wait_type] LIKE 'LOG_RATE%' then 'Log Rate Governor' + WHEN ws.[wait_type] LIKE 'HADR_THROTTLE[_]%' + or ws.[wait_type] = 'THROTTLE_LOG_RATE_LOG_STORAGE' THEN 'HADR Log Rate Governor' + WHEN ws.[wait_type] LIKE 'RBIO_RG%' or ws.[wait_type] like 'WAIT_RBIO_RG%' then 'VLDB Log Rate Governor' + WHEN ws.[wait_type] LIKE 'RBIO[_]%' or ws.[wait_type] like 'WAIT_RBIO[_]%' then 'VLDB RBIO' + WHEN ws.[wait_type] IN( + 'ASYNC_NETWORK_IO','EXTERNAL_SCRIPT_NETWORK_IOF', + 'NET_WAITFOR_PACKET','PROXY_NETWORK_IO') THEN 'Network IO' + WHEN ws.[wait_type] IN ( 'CXPACKET', 'CXCONSUMER') + or ws.[wait_type] like 'HT%' or ws.[wait_type] like 'BMP%' + or ws.[wait_type] like 'BP%' THEN 'Parallelism' + WHEN ws.[wait_type] IN( + 'CMEMTHREAD','CMEMPARTITIONED','EE_PMOLOCK','EXCHANGE', + 'RESOURCE_SEMAPHORE','MEMORY_ALLOCATION_EXT', + 'RESERVED_MEMORY_ALLOCATION_EXT', 'MEMORY_GRANT_UPDATE') THEN 'Memory' + WHEN ws.[wait_type] IN ('WAITFOR','WAIT_FOR_RESULTS') THEN 'User Wait' + WHEN ws.[wait_type] LIKE 'HADR[_]%' or ws.[wait_type] LIKE 'PWAIT_HADR%' + or ws.[wait_type] LIKE 'REPLICA[_]%' or ws.[wait_type] LIKE 'REPL_%' + or ws.[wait_type] LIKE 'SE_REPL[_]%' + or ws.[wait_type] LIKE 'FCB_REPLICA%' THEN 'Replication' + WHEN ws.[wait_type] LIKE 'SQLTRACE[_]%' + or ws.[wait_type] IN ( + 'TRACEWRITE', 'SQLTRACE_LOCK', 'SQLTRACE_FILE_BUFFER', 'SQLTRACE_FILE_WRITE_IO_COMPLETION', + 'SQLTRACE_FILE_READ_IO_COMPLETION', 'SQLTRACE_PENDING_BUFFER_WRITERS', 'SQLTRACE_SHUTDOWN', + 'QUERY_TRACEOUT', 'TRACE_EVTNOTIF') THEN 'Tracing' + WHEN ws.[wait_type] IN ( + 'FT_RESTART_CRAWL', 'FULLTEXT GATHERER', 'MSSEARCH', 'FT_METADATA_MUTEX', + 'FT_IFTSHC_MUTEX', 'FT_IFTSISM_MUTEX', 'FT_IFTS_RWLOCK', 'FT_COMPROWSET_RWLOCK', + 'FT_MASTER_MERGE', 'FT_PROPERTYLIST_CACHE', 'FT_MASTER_MERGE_COORDINATOR', + 'PWAIT_RESOURCE_SEMAPHORE_FT_PARALLEL_QUERY_SYNC') THEN 'Full Text Search' + ELSE 'Other' + END as [wait_category] +FROM sys.dm_os_wait_stats AS ws WITH (NOLOCK) WHERE -ws.wait_type NOT IN ( + ws.[wait_type] NOT IN ( N'BROKER_EVENTHANDLER', N'BROKER_RECEIVE_WAITFOR', N'BROKER_TASK_STOP', N'BROKER_TO_FLUSH', N'BROKER_TRANSMITTER', N'CHECKPOINT_QUEUE', N'CHKPT', N'CLR_AUTO_EVENT', N'CLR_MANUAL_EVENT', N'CLR_SEMAPHORE', - N'DBMIRROR_DBM_EVENT', N'DBMIRROR_EVENTS_QUEUE', N'DBMIRROR_ - _QUEUE', + N'DBMIRROR_DBM_EVENT', N'DBMIRROR_EVENTS_QUEUE', N'DBMIRROR_QUEUE', N'DBMIRRORING_CMD', N'DIRTY_PAGE_POLL', N'DISPATCHER_QUEUE_SEMAPHORE', N'EXECSYNC', N'FSAGENT', N'FT_IFTS_SCHEDULER_IDLE_WAIT', N'FT_IFTSHC_MUTEX', N'HADR_CLUSAPI_CALL', N'HADR_FILESTREAM_IOMGR_IOCOMPLETION', N'HADR_LOGCAPTURE_WAIT', @@ -753,246 +889,287 @@ ws.wait_type NOT IN ( N'SLEEP_DCOMSTARTUP', N'SLEEP_MASTERDBREADY', N'SLEEP_MASTERMDREADY', N'SLEEP_MASTERUPGRADED', N'SLEEP_MSDBSTARTUP', N'SLEEP_SYSTEMTASK', N'SLEEP_TASK', N'SLEEP_TEMPDBSTARTUP', N'SNI_HTTP_ACCEPT', N'SP_SERVER_DIAGNOSTICS_SLEEP', - N'SQLTRACE_BUFFER_FLUSH', N'SQLTRACE_INCREMENTAL_FLUSH_SLEEP', + N'SQLTRACE_BUFFER_FLUSH', N'SQLTRACE_INCREMENTAL_FLUSH_SLEEP', N'SQLTRACE_WAIT_ENTRIES', N'WAIT_FOR_RESULTS', N'WAITFOR', N'WAITFOR_TASKSHUTDOWN', N'WAIT_XTP_HOST_WAIT', N'WAIT_XTP_OFFLINE_CKPT_NEW_LOG', N'WAIT_XTP_CKPT_CLOSE', N'XE_BUFFERMGR_ALLPROCESSED_EVENT', N'XE_DISPATCHER_JOIN', N'XE_DISPATCHER_WAIT', N'XE_LIVE_TARGET_TVF', N'XE_TIMER_EVENT', - N'SOS_WORK_DISPATCHER','RESERVED_MEMORY_ALLOCATION_EXT','SQLTRACE_WAIT_ENTRIES' - , 'RBIO_COMM_RETRY') -AND waiting_tasks_count > 10 -AND wait_time_ms > 100; + N'SOS_WORK_DISPATCHER','RESERVED_MEMORY_ALLOCATION_EXT','SQLTRACE_WAIT_ENTRIES', + N'RBIO_COMM_RETRY') +AND [waiting_tasks_count] > 10 +AND [wait_time_ms] > 100; ` const sqlAzureMIPerformanceCounters = ` SET DEADLOCK_PRIORITY -10; -IF SERVERPROPERTY('EngineEdition') = 8 /*Managed Instance*/ +IF SERVERPROPERTY('EngineEdition') <> 8 BEGIN /*not Azure Managed Instance*/ + DECLARE @ErrorMessage AS nvarchar(500) = 'Telegraf - Connection string Server:'+ @@SERVERNAME + ',Database:' + DB_NAME() +' is not an Azure Managed Instance. Check the database_type parameter in the telegraf configuration.'; + RAISERROR (@ErrorMessage,11,1) + RETURN +END + DECLARE @PCounters TABLE ( - object_name nvarchar(128), - counter_name nvarchar(128), - instance_name nvarchar(128), - cntr_value bigint, - cntr_type INT , - Primary Key(object_name, counter_name,instance_name) + [object_name] nvarchar(128), + [counter_name] nvarchar(128), + [instance_name] nvarchar(128), + [cntr_value] bigint, + [cntr_type] INT , + Primary Key([object_name],[counter_name],[instance_name]) ); -WITH PerfCounters AS - ( - SELECT DISTINCT - RTrim(spi.object_name) object_name, - RTrim(spi.counter_name) counter_name, - CASE WHEN ( - RTRIM(spi.object_name) LIKE '%:Databases' - OR RTRIM(spi.object_name) LIKE '%:Database Replica' - OR RTRIM(spi.object_name) LIKE '%:Catalog Metadata' - OR RTRIM(spi.object_name) LIKE '%:Query Store' - OR RTRIM(spi.object_name) LIKE '%:Columnstore' - OR RTRIM(spi.object_name) LIKE '%:Advanced Analytics') - AND TRY_CONVERT(uniqueidentifier, spi.instance_name) - IS NOT NULL -- for cloud only - THEN ISNULL(d.name,RTRIM(spi.instance_name)) -- Elastic Pools counters exist for all databases but sys.databases only has current DB value - WHEN RTRIM(object_name) LIKE '%:Availability Replica' - AND TRY_CONVERT(uniqueidentifier, spi.instance_name) IS NOT NULL -- for cloud only - THEN ISNULL(d.name,RTRIM(spi.instance_name)) + RTRIM(SUBSTRING(spi.instance_name, 37, LEN(spi.instance_name))) - ELSE RTRIM(spi.instance_name) - END AS instance_name, - CAST(spi.cntr_value AS BIGINT) AS cntr_value, - spi.cntr_type - FROM sys.dm_os_performance_counters AS spi - LEFT JOIN sys.databases AS d - ON LEFT(spi.instance_name, 36) -- some instance_name values have an additional identifier appended after the GUID - =CASE WHEN -- in SQL DB standalone, physical_database_name for master is the GUID of the user database - d.name = 'master' AND TRY_CONVERT(uniqueidentifier, d.physical_database_name) IS NOT NULL - THEN d.name - ELSE d.physical_database_name - END - WHERE ( - counter_name IN ( - 'SQL Compilations/sec', - 'SQL Re-Compilations/sec', - 'User Connections', - 'Batch Requests/sec', - 'Logouts/sec', - 'Logins/sec', - 'Processes blocked', - 'Latch Waits/sec', - 'Full Scans/sec', - 'Index Searches/sec', - 'Page Splits/sec', - 'Page lookups/sec', - 'Page reads/sec', - 'Page writes/sec', - 'Readahead pages/sec', - 'Lazy writes/sec', - 'Checkpoint pages/sec', - 'Page life expectancy', - 'Log File(s) Size (KB)', - 'Log File(s) Used Size (KB)', - 'Data File(s) Size (KB)', - 'Transactions/sec', - 'Write Transactions/sec', - 'Active Temp Tables', - 'Temp Tables Creation Rate', - 'Temp Tables For Destruction', - 'Free Space in tempdb (KB)', - 'Version Store Size (KB)', - 'Memory Grants Pending', - 'Memory Grants Outstanding', - 'Free list stalls/sec', - 'Buffer cache hit ratio', - 'Buffer cache hit ratio base', - 'RBPEX cache hit ratio', - 'RBPEX cache hit ratio base', - 'Backup/Restore Throughput/sec', - 'Total Server Memory (KB)', - 'Target Server Memory (KB)', - 'Log Flushes/sec', - 'Log Flush Wait Time', - 'Memory broker clerk size', - 'Log Bytes Flushed/sec', - 'Bytes Sent to Replica/sec', - 'Log Send Queue', - 'Bytes Sent to Transport/sec', - 'Sends to Replica/sec', - 'Bytes Sent to Transport/sec', - 'Sends to Transport/sec', - 'Bytes Received from Replica/sec', - 'Receives from Replica/sec', - 'Flow Control Time (ms/sec)', - 'Flow Control/sec', - 'Resent Messages/sec', - 'Redone Bytes/sec', - 'XTP Memory Used (KB)', - 'Transaction Delay', - 'Log Bytes Received/sec', - 'Log Apply Pending Queue', - 'Redone Bytes/sec', - 'Recovery Queue', - 'Log Apply Ready Queue', - 'CPU usage %', - 'CPU usage % base', - 'Queued requests', - 'Requests completed/sec', - 'Blocked tasks', - 'Active memory grant amount (KB)', - 'Disk Read Bytes/sec', - 'Disk Read IO Throttled/sec', - 'Disk Read IO/sec', - 'Disk Write Bytes/sec', - 'Disk Write IO Throttled/sec', - 'Disk Write IO/sec', - 'Used memory (KB)', - 'Forwarded Records/sec', - 'Background Writer pages/sec', - 'Percent Log Used', - 'Log Send Queue KB', - 'Redo Queue KB', - 'Mirrored Write Transactions/sec', - 'Group Commit Time', - 'Group Commits/Sec' - ) - ) OR ( - object_name LIKE '%User Settable%' - OR object_name LIKE '%SQL Errors%' +WITH PerfCounters AS ( + SELECT DISTINCT + RTrim(spi.[object_name]) [object_name] + ,RTrim(spi.[counter_name]) [counter_name] + ,CASE WHEN ( + RTRIM(spi.[object_name]) LIKE '%:Databases' + OR RTRIM(spi.[object_name]) LIKE '%:Database Replica' + OR RTRIM(spi.[object_name]) LIKE '%:Catalog Metadata' + OR RTRIM(spi.[object_name]) LIKE '%:Query Store' + OR RTRIM(spi.[object_name]) LIKE '%:Columnstore' + OR RTRIM(spi.[object_name]) LIKE '%:Advanced Analytics') + AND TRY_CONVERT([uniqueidentifier], spi.[instance_name]) IS NOT NULL -- for cloud only + THEN ISNULL(d.[name],RTRIM(spi.instance_name)) -- Elastic Pools counters exist for all databases but sys.databases only has current DB value + WHEN + RTRIM([object_name]) LIKE '%:Availability Replica' + AND TRY_CONVERT([uniqueidentifier], spi.[instance_name]) IS NOT NULL -- for cloud only + THEN ISNULL(d.[name],RTRIM(spi.[instance_name])) + RTRIM(SUBSTRING(spi.[instance_name], 37, LEN(spi.[instance_name]))) + ELSE RTRIM(spi.instance_name) + END AS [instance_name] + ,CAST(spi.[cntr_value] AS BIGINT) AS [cntr_value] + ,spi.[cntr_type] + FROM sys.dm_os_performance_counters AS spi + LEFT JOIN sys.databases AS d + ON LEFT(spi.[instance_name], 36) -- some instance_name values have an additional identifier appended after the GUID + = CASE + /*in SQL DB standalone, physical_database_name for master is the GUID of the user database*/ + WHEN d.[name] = 'master' AND TRY_CONVERT([uniqueidentifier], d.[physical_database_name]) IS NOT NULL + THEN d.[name] + ELSE d.[physical_database_name] + END + WHERE + counter_name IN ( + ''SQL Compilations/sec'' + ,''SQL Re-Compilations/sec'' + ,''User Connections'' + ,''Batch Requests/sec'' + ,''Logouts/sec'' + ,''Logins/sec'' + ,''Processes blocked'' + ,''Latch Waits/sec'' + ,''Full Scans/sec'' + ,''Index Searches/sec'' + ,''Page Splits/sec'' + ,''Page lookups/sec'' + ,''Page reads/sec'' + ,''Page writes/sec'' + ,''Readahead pages/sec'' + ,''Lazy writes/sec'' + ,''Checkpoint pages/sec'' + ,''Page life expectancy'' + ,''Log File(s) Size (KB)'' + ,''Log File(s) Used Size (KB)'' + ,''Data File(s) Size (KB)'' + ,''Transactions/sec'' + ,''Write Transactions/sec'' + ,''Active Temp Tables'' + ,''Temp Tables Creation Rate'' + ,''Temp Tables For Destruction'' + ,''Free Space in tempdb (KB)'' + ,''Version Store Size (KB)'' + ,''Memory Grants Pending'' + ,''Memory Grants Outstanding'' + ,''Free list stalls/sec'' + ,''Buffer cache hit ratio'' + ,''Buffer cache hit ratio base'' + ,''Backup/Restore Throughput/sec'' + ,''Total Server Memory (KB)'' + ,''Target Server Memory (KB)'' + ,''Log Flushes/sec'' + ,''Log Flush Wait Time'' + ,''Memory broker clerk size'' + ,''Log Bytes Flushed/sec'' + ,''Bytes Sent to Replica/sec'' + ,''Log Send Queue'' + ,''Bytes Sent to Transport/sec'' + ,''Sends to Replica/sec'' + ,''Bytes Sent to Transport/sec'' + ,''Sends to Transport/sec'' + ,''Bytes Received from Replica/sec'' + ,''Receives from Replica/sec'' + ,''Flow Control Time (ms/sec)'' + ,''Flow Control/sec'' + ,''Resent Messages/sec'' + ,''Redone Bytes/sec'' + ,''XTP Memory Used (KB)'' + ,''Transaction Delay'' + ,''Log Bytes Received/sec'' + ,''Log Apply Pending Queue'' + ,''Redone Bytes/sec'' + ,''Recovery Queue'' + ,''Log Apply Ready Queue'' + ,''CPU usage %'' + ,''CPU usage % base'' + ,''Queued requests'' + ,''Requests completed/sec'' + ,''Blocked tasks'' + ,''Active memory grant amount (KB)'' + ,''Disk Read Bytes/sec'' + ,''Disk Read IO Throttled/sec'' + ,''Disk Read IO/sec'' + ,''Disk Write Bytes/sec'' + ,''Disk Write IO Throttled/sec'' + ,''Disk Write IO/sec'' + ,''Used memory (KB)'' + ,''Forwarded Records/sec'' + ,''Background Writer pages/sec'' + ,''Percent Log Used'' + ,''Log Send Queue KB'' + ,''Redo Queue KB'' + ,''Mirrored Write Transactions/sec'' + ,''Group Commit Time'' + ,''Group Commits/Sec'' ) OR ( - object_name LIKE '%Batch Resp Statistics%' + spi.[object_name] LIKE ''%User Settable%'' + OR spi.[object_name] LIKE ''%SQL Errors%'' + OR spi.[object_name] LIKE ''%Batch Resp Statistics%'' ) OR ( - instance_name IN ('_Total') - AND counter_name IN ( - 'Lock Timeouts/sec', - 'Number of Deadlocks/sec', - 'Lock Waits/sec', - 'Latch Waits/sec' + spi.[instance_name] IN (''_Total'') + AND spi.[counter_name] IN ( + ''Lock Timeouts/sec'' + ,''Lock Timeouts (timeout > 0)/sec'' + ,''Number of Deadlocks/sec'' + ,''Lock Waits/sec'' + ,''Latch Waits/sec'' ) ) - ) + INSERT INTO @PCounters select * from PerfCounters -select - 'sqlserver_performance' AS [measurement], - REPLACE(@@SERVERNAME,'\',':') AS [sql_instance], - pc.object_name AS [object], - pc.counter_name AS [counter], - CASE pc.instance_name - WHEN '_Total' THEN 'Total' - ELSE ISNULL(pc.instance_name,'') - END AS [instance], - CAST(CASE WHEN pc.cntr_type = 537003264 AND pc1.cntr_value > 0 THEN (pc.cntr_value * 1.0) / (pc1.cntr_value * 1.0) * 100 ELSE pc.cntr_value END AS float(10)) AS [value], - -- cast to string as TAG - cast(pc.cntr_type as varchar(25)) as [counter_type] +SELECT + 'sqlserver_performance' AS [measurement] + ,REPLACE(@@SERVERNAME,'\',':') AS [sql_instance] + ,pc.[object_name] AS [object] + ,pc.[counter_name] AS [counter] + ,CASE pc.[instance_name] + WHEN '_Total' THEN 'Total' + ELSE ISNULL(pc.[instance_name],'') + END AS [instance] + ,CAST(CASE WHEN pc.[cntr_type] = 537003264 AND pc1.[cntr_value] > 0 THEN (pc.[cntr_value] * 1.0) / (pc1.[cntr_value] * 1.0) * 100 ELSE pc.[cntr_value] END AS float(10)) AS [value], + ,cast(pc.[cntr_type] as varchar(25)) as [counter_type] from @PCounters pc - LEFT OUTER JOIN @PCounters AS pc1 - ON ( - pc.counter_name = REPLACE(pc1.counter_name,' base','') - OR pc.counter_name = REPLACE(pc1.counter_name,' base',' (ms)') - ) - AND pc.object_name = pc1.object_name - AND pc.instance_name = pc1.instance_name - AND pc1.counter_name LIKE '%base' -WHERE pc.counter_name NOT LIKE '% base' -OPTION (RECOMPILE) +LEFT OUTER JOIN @PCounters AS pc1 + ON ( + pc.[counter_name] = REPLACE(pc1.[counter_name],' base','') + OR pc.[counter_name] = REPLACE(pc1.[counter_name],' base',' (ms)') + ) + AND pc.[object_name] = pc1.[object_name] + AND pc.[instance_name] = pc1.[instance_name] + AND pc1.[counter_name] LIKE '%base' +WHERE + pc.[counter_name] NOT LIKE '% base' +OPTION (RECOMPILE); ` const sqlAzureMIRequests string = ` -SET NOCOUNT ON; -IF SERVERPROPERTY('EngineEdition') = 8 -- Is this Azure SQL DB? -BEGIN - SELECT blocking_session_id into #blockingSessions FROM sys.dm_exec_requests WHERE blocking_session_id != 0 - create index ix_blockingSessions_1 on #blockingSessions (blocking_session_id) - SELECT - 'sqlserver_requests' AS [measurement] - , REPLACE(@@SERVERNAME,'\',':') AS [sql_instance] - , DB_NAME() as [database_name] - , s.session_id - , ISNULL(r.request_id,0) as request_id - , DB_NAME(s.database_id) as session_db_name - , COALESCE(r.status,s.status) AS status - , COALESCE(r.cpu_time,s.cpu_time) AS cpu_time_ms - , COALESCE(r.total_elapsed_time,s.total_elapsed_time) AS total_elapsed_time_ms - , COALESCE(r.logical_reads,s.logical_reads) AS logical_reads - , COALESCE(r.writes,s.writes) AS writes - , r.command - , r.wait_time as wait_time_ms - , r.wait_type - , r.wait_resource - , r.blocking_session_id - , s.program_name - , s.host_name - , s.nt_user_name - , s.open_transaction_count AS open_transaction - , LEFT (CASE COALESCE(r.transaction_isolation_level, s.transaction_isolation_level) - WHEN 0 THEN '0-Read Committed' - WHEN 1 THEN '1-Read Uncommitted (NOLOCK)' - WHEN 2 THEN '2-Read Committed' - WHEN 3 THEN '3-Repeatable Read' - WHEN 4 THEN '4-Serializable' - WHEN 5 THEN '5-Snapshot' - ELSE CONVERT (varchar(30), r.transaction_isolation_level) + '-UNKNOWN' - END, 30) AS transaction_isolation_level - , r.granted_query_memory as granted_query_memory_pages - , r.percent_complete - , SUBSTRING( - qt.text, - r.statement_start_offset / 2 + 1, - (CASE WHEN r.statement_end_offset = -1 - THEN DATALENGTH(qt.text) - ELSE r.statement_end_offset - END - r.statement_start_offset) / 2 + 1 - ) AS statement_text - , qt.objectid - , QUOTENAME(OBJECT_SCHEMA_NAME(qt.objectid,qt.dbid)) + '.' + QUOTENAME(OBJECT_NAME(qt.objectid,qt.dbid)) as stmt_object_name - , DB_NAME(qt.dbid) stmt_db_name - , CONVERT(varchar(20),[query_hash],1) as [query_hash] - , CONVERT(varchar(20),[query_plan_hash],1) as [query_plan_hash] - FROM sys.dm_exec_sessions AS s - LEFT OUTER JOIN sys.dm_exec_requests AS r - ON s.session_id = r.session_id - OUTER APPLY sys.dm_exec_sql_text(r.sql_handle) AS qt - WHERE 1 = 1 - AND (r.session_id IS NOT NULL AND (s.is_user_process = 1 OR r.status COLLATE Latin1_General_BIN NOT IN ('background', 'sleeping'))) - OR (s.session_id IN (SELECT blocking_session_id FROM #blockingSessions)) - OPTION(MAXDOP 1) +IF SERVERPROPERTY('EngineEdition') <> 8 BEGIN /*not Azure Managed Instance*/ + DECLARE @ErrorMessage AS nvarchar(500) = 'Telegraf - Connection string Server:'+ @@SERVERNAME + ',Database:' + DB_NAME() +' is not an Azure Managed Instance. Check the database_type parameter in the telegraf configuration.'; + RAISERROR (@ErrorMessage,11,1) + RETURN END + +SELECT [blocking_session_id] INTO #blockingSessions FROM sys.dm_exec_requests WHERE [blocking_session_id] != 0 +CREATE INDEX ix_blockingSessions_1 on #blockingSessions ([blocking_session_id]) + +SELECT + 'sqlserver_requests' AS [measurement] + ,REPLACE(@@SERVERNAME,'\',':') AS [sql_instance] + ,DB_NAME() as [database_name] + ,s.[session_id] + ,ISNULL(r.[request_id], 0) as [request_id] + ,DB_NAME(s.[database_id]) as [session_db_name] + ,COALESCE(r.[status], s.[status]) AS [status] + ,COALESCE(r.[cpu_time], s.[cpu_time]) AS [cpu_time_ms] + ,COALESCE(r.[total_elapsed_time], s.[total_elapsed_time]) AS [total_elapsed_time_ms] + ,COALESCE(r.[logical_reads], s.[logical_reads]) AS [logical_reads] + ,COALESCE(r.[writes], s.[writes]) AS [writes] + ,r.[command] + ,r.[wait_time] as [wait_time_ms] + ,r.[wait_type] + ,r.[wait_resource] + ,r.[blocking_session_id] + ,s.[program_name] + ,s.[host_name] + ,s.[nt_user_name] + ,COALESCE(r.[open_transaction_count], s.[open_transaction_count]) AS [open_transaction] + ,LEFT (CASE COALESCE(r.[transaction_isolation_level], s.[transaction_isolation_level]) + WHEN 0 THEN '0-Read Committed' + WHEN 1 THEN '1-Read Uncommitted (NOLOCK)' + WHEN 2 THEN '2-Read Committed' + WHEN 3 THEN '3-Repeatable Read' + WHEN 4 THEN '4-Serializable' + WHEN 5 THEN '5-Snapshot' + ELSE CONVERT (varchar(30), r.[transaction_isolation_level]) + '-UNKNOWN' + END, 30) AS [transaction_isolation_level] + ,r.[granted_query_memory] as [granted_query_memory_pages] + ,r.[percent_complete] + ,SUBSTRING( + qt.[text], + r.[statement_start_offset] / 2 + 1, + (CASE WHEN r.[statement_end_offset] = -1 + THEN DATALENGTH(qt.text) + ELSE r.[statement_end_offset] + END - r.[statement_start_offset]) / 2 + 1 + ) AS [statement_text] + ,qt.[objectid] + ,QUOTENAME(OBJECT_SCHEMA_NAME(qt.[objectid], qt.[dbid])) + '.' + QUOTENAME(OBJECT_NAME(qt.[objectid], qt.[dbid])) as [stmt_object_name] + ,DB_NAME(qt.[dbid]) [stmt_db_name] + ,CONVERT(varchar(20),[query_hash],1) as [query_hash] + ,CONVERT(varchar(20),[query_plan_hash],1) as [query_plan_hash] + ,DB_NAME(COALESCE(r.[database_id], s.[database_id])) AS [session_db_name] +FROM sys.dm_exec_sessions AS s +LEFT OUTER JOIN sys.dm_exec_requests AS r + ON s.[session_id] = r.[session_id] +OUTER APPLY sys.dm_exec_sql_text(r.sql_handle) AS qt +WHERE + (s.session_id IN (SELECT blocking_session_id FROM #blockingSessions)) + OR ( + r.session_id IS NOT NULL + AND ( + s.is_user_process = 1 + OR r.status COLLATE Latin1_General_BIN NOT IN ('background', 'sleeping') + ) + ) +OPTION(MAXDOP 1); +` + +const sqlAzureMISchedulers string = ` +IF SERVERPROPERTY('EngineEdition') <> 8 BEGIN /*not Azure Managed Instance*/ + DECLARE @ErrorMessage AS nvarchar(500) = 'Telegraf - Connection string Server:'+ @@SERVERNAME + ',Database:' + DB_NAME() +' is not an Azure Managed Instance. Check the database_type parameter in the telegraf configuration.'; + RAISERROR (@ErrorMessage,11,1) + RETURN +END + +SELECT + 'sqlserver_schedulers' AS [measurement] + ,REPLACE(@@SERVERNAME, '\', ':') AS [sql_instance] + ,CAST(s.[scheduler_id] AS VARCHAR(4)) AS [scheduler_id] + ,CAST(s.[cpu_id] AS VARCHAR(4)) AS [cpu_id] + ,s.[is_online] + ,s.[is_idle] + ,s.[preemptive_switches_count] + ,s.[context_switches_count] + ,s.[current_tasks_count] + ,s.[runnable_tasks_count] + ,s.[current_workers_count] + ,s.[active_workers_count] + ,s.[work_queue_count] + ,s.[pending_disk_io_count] + ,s.[load_factor] + ,s.[yield_count] + ,s.[total_cpu_usage_ms] + ,s.[total_scheduler_delay_ms] +FROM sys.dm_os_schedulers AS s ` diff --git a/plugins/inputs/sqlserver/sqlserver.go b/plugins/inputs/sqlserver/sqlserver.go index 942c152258cb3..75035b722e7d1 100644 --- a/plugins/inputs/sqlserver/sqlserver.go +++ b/plugins/inputs/sqlserver/sqlserver.go @@ -147,7 +147,7 @@ func initQueries(s *SQLServer) error { queries["AzureSQLMIOsWaitstats"] = Query{ScriptName: "AzureSQLMIOsWaitstats", Script: sqlAzureMIOsWaitStats, ResultByRow: false} queries["AzureSQLMIMemoryClerks"] = Query{ScriptName: "AzureSQLMIMemoryClerks", Script: sqlAzureMIMemoryClerks, ResultByRow: false} queries["AzureSQLMIPerformanceCounters"] = Query{ScriptName: "AzureSQLMIPerformanceCounters", Script: sqlAzureMIPerformanceCounters, ResultByRow: false} - queries["AzureSQLMISqlRequests"] = Query{ScriptName: "AzureSQLMISqlRequests", Script: sqlAzureMIRequests, ResultByRow: false} + queries["AzureSQLMIRequests"] = Query{ScriptName: "AzureSQLMIRequests", Script: sqlAzureMIRequests, ResultByRow: false} queries["AzureSQLMISchedulers"] = Query{ScriptName: "AzureSQLMISchedulers", Script: sqlServerSchedulers, ResultByRow: false} } else if s.DatabaseType == "SQLServer" { //These are still V2 queries and have not been refactored yet. queries["SQLServerPerformanceCounters"] = Query{ScriptName: "SQLServerPerformanceCounters", Script: sqlServerPerformanceCounters, ResultByRow: false} diff --git a/plugins/inputs/tail/README.md b/plugins/inputs/tail/README.md index 1be8a5e93a42b..7f5315038a2ea 100644 --- a/plugins/inputs/tail/README.md +++ b/plugins/inputs/tail/README.md @@ -62,6 +62,23 @@ The plugin expects messages in one of the ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md data_format = "influx" + + ## multiline parser/codec + ## https://www.elastic.co/guide/en/logstash/2.4/plugins-filters-multiline.html + #[inputs.tail.multiline] + ## The pattern should be a regexp which matches what you believe to be an indicator that the field is part of an event consisting of multiple lines of log data. + #pattern = "^\s" + + ## The field's value must be previous or next and indicates the relation to the + ## multi-line event. + #match_which_line = "previous" + + ## The invert_match can be true or false (defaults to false). + ## If true, a message not matching the pattern will constitute a match of the multiline filter and the what will be applied. (vice-versa is also true) + #invert_match = false + + #After the specified timeout, this plugin sends the multiline event even if no new pattern is found to start a new event. The default is 5s. + #timeout = 5s ``` ### Metrics diff --git a/plugins/inputs/tail/multiline.go b/plugins/inputs/tail/multiline.go new file mode 100644 index 0000000000000..7a254c1bf9676 --- /dev/null +++ b/plugins/inputs/tail/multiline.go @@ -0,0 +1,135 @@ +package tail + +import ( + "bytes" + "fmt" + "regexp" + "strings" + "time" + + "github.com/influxdata/telegraf/internal" +) + +// Indicates relation to the multiline event: previous or next +type MultilineMatchWhichLine int + +type Multiline struct { + config *MultilineConfig + enabled bool + patternRegexp *regexp.Regexp +} + +type MultilineConfig struct { + Pattern string + MatchWhichLine MultilineMatchWhichLine `toml:"match_which_line"` + InvertMatch bool + Timeout *internal.Duration +} + +const ( + // Previous => Append current line to previous line + Previous MultilineMatchWhichLine = iota + // Next => Next line will be appended to current line + Next +) + +func (m *MultilineConfig) NewMultiline() (*Multiline, error) { + enabled := false + var r *regexp.Regexp + var err error + + if m.Pattern != "" { + enabled = true + if r, err = regexp.Compile(m.Pattern); err != nil { + return nil, err + } + if m.Timeout == nil || m.Timeout.Duration.Nanoseconds() == int64(0) { + m.Timeout = &internal.Duration{Duration: 5 * time.Second} + } + } + + return &Multiline{ + config: m, + enabled: enabled, + patternRegexp: r}, nil +} + +func (m *Multiline) IsEnabled() bool { + return m.enabled +} + +func (m *Multiline) ProcessLine(text string, buffer *bytes.Buffer) string { + if m.matchString(text) { + buffer.WriteString(text) + return "" + } + + if m.config.MatchWhichLine == Previous { + previousText := buffer.String() + buffer.Reset() + buffer.WriteString(text) + text = previousText + } else { + // Next + if buffer.Len() > 0 { + buffer.WriteString(text) + text = buffer.String() + buffer.Reset() + } + } + + return text +} + +func (m *Multiline) Flush(buffer *bytes.Buffer) string { + if buffer.Len() == 0 { + return "" + } + text := buffer.String() + buffer.Reset() + return text +} + +func (m *Multiline) matchString(text string) bool { + return m.patternRegexp.MatchString(text) != m.config.InvertMatch +} + +func (w MultilineMatchWhichLine) String() string { + switch w { + case Previous: + return "previous" + case Next: + return "next" + } + return "" +} + +// UnmarshalTOML implements ability to unmarshal MultilineMatchWhichLine from TOML files. +func (w *MultilineMatchWhichLine) UnmarshalTOML(data []byte) (err error) { + return w.UnmarshalText(data) +} + +// UnmarshalText implements encoding.TextUnmarshaler +func (w *MultilineMatchWhichLine) UnmarshalText(data []byte) (err error) { + s := string(data) + switch strings.ToUpper(s) { + case `PREVIOUS`, `"PREVIOUS"`, `'PREVIOUS'`: + *w = Previous + return + + case `NEXT`, `"NEXT"`, `'NEXT'`: + *w = Next + return + } + *w = -1 + return fmt.Errorf("E! [inputs.tail] unknown multiline MatchWhichLine") +} + +// MarshalText implements encoding.TextMarshaler +func (w MultilineMatchWhichLine) MarshalText() ([]byte, error) { + s := w.String() + if s != "" { + return []byte(s), nil + } + return nil, fmt.Errorf("E! [inputs.tail] unknown multiline MatchWhichLine") +} diff --git a/plugins/inputs/tail/multiline_test.go b/plugins/inputs/tail/multiline_test.go new file mode 100644 index 0000000000000..6db50dc048b99 --- /dev/null +++ b/plugins/inputs/tail/multiline_test.go @@ -0,0 +1,235 @@ +package tail + +import ( + "bytes" + "testing" + "time" + + "github.com/influxdata/telegraf/internal" + "github.com/stretchr/testify/assert" +) + +func TestMultilineConfigOK(t *testing.T) { + c := &MultilineConfig{ + Pattern: ".*", + MatchWhichLine: Previous, + } + + _, err := c.NewMultiline() + + assert.NoError(t, err, "Configuration was OK.") +} + +func TestMultilineConfigError(t *testing.T) { + c := &MultilineConfig{ + Pattern: "\xA0", + MatchWhichLine: Previous, + } + + _, err := c.NewMultiline() + + assert.Error(t, err, "The pattern was invalid") +} + +func TestMultilineConfigTimeoutSpecified(t *testing.T) { + duration, _ := time.ParseDuration("10s") + c := &MultilineConfig{ + Pattern: ".*", + MatchWhichLine: Previous, + Timeout: &internal.Duration{Duration: duration}, + } + m, err := c.NewMultiline() + assert.NoError(t, err, "Configuration was OK.") + + assert.Equal(t, duration, m.config.Timeout.Duration) +} + +func TestMultilineConfigDefaultTimeout(t *testing.T) { + duration, _ := time.ParseDuration("5s") + c := &MultilineConfig{ + Pattern: ".*", + MatchWhichLine: Previous, + } + m, err := c.NewMultiline() + assert.NoError(t, err, "Configuration was OK.") + + assert.Equal(t, duration, m.config.Timeout.Duration) +} + +func TestMultilineIsEnabled(t *testing.T) { + c := &MultilineConfig{ + Pattern: ".*", + MatchWhichLine: Previous, + } + m, err := c.NewMultiline() + assert.NoError(t, err, "Configuration was OK.") + + isEnabled := m.IsEnabled() + + assert.True(t, isEnabled, "Should have been enabled") +} + +func TestMultilineIsDisabled(t *testing.T) { + c := &MultilineConfig{ + MatchWhichLine: Previous, + } + m, err := c.NewMultiline() + assert.NoError(t, err, "Configuration was OK.") + + isEnabled := m.IsEnabled() + + assert.False(t, isEnabled, "Should have been disabled") +} + +func TestMultilineFlushEmpty(t *testing.T) { + c := &MultilineConfig{ + Pattern: "^=>", + MatchWhichLine: Previous, + } + m, err := c.NewMultiline() + assert.NoError(t, err, "Configuration was OK.") + var buffer bytes.Buffer + + text := m.Flush(&buffer) + + assert.Empty(t, text) +} + +func TestMultilineFlush(t *testing.T) { + c := &MultilineConfig{ + Pattern: "^=>", + MatchWhichLine: Previous, + } + m, err := c.NewMultiline() + assert.NoError(t, err, "Configuration was OK.") + var buffer bytes.Buffer + buffer.WriteString("foo") + + text := m.Flush(&buffer) + + assert.Equal(t, "foo", text) + assert.Zero(t, buffer.Len()) +} + +func TestMultiLineProcessLinePrevious(t *testing.T) { + c := &MultilineConfig{ + Pattern: "^=>", + MatchWhichLine: Previous, + } + m, err := c.NewMultiline() + assert.NoError(t, err, "Configuration was OK.") + var buffer bytes.Buffer + + text := m.ProcessLine("1", &buffer) + assert.Empty(t, text) + assert.NotZero(t, buffer.Len()) + + text = m.ProcessLine("=>2", &buffer) + assert.Empty(t, text) + assert.NotZero(t, buffer.Len()) + + text = m.ProcessLine("=>3", &buffer) + assert.Empty(t, text) + assert.NotZero(t, buffer.Len()) + + text = m.ProcessLine("4", &buffer) + assert.Equal(t, "1=>2=>3", text) + assert.NotZero(t, buffer.Len()) + + text = m.ProcessLine("5", &buffer) + assert.Equal(t, "4", text) + assert.Equal(t, "5", buffer.String()) +} + +func TestMultiLineProcessLineNext(t *testing.T) { + c := &MultilineConfig{ + Pattern: "=>$", + MatchWhichLine: Next, + } + m, err := c.NewMultiline() + assert.NoError(t, err, "Configuration was OK.") + var buffer bytes.Buffer + + text := m.ProcessLine("1=>", &buffer) + assert.Empty(t, text) + assert.NotZero(t, buffer.Len()) + + text = m.ProcessLine("2=>", &buffer) + assert.Empty(t, text) + assert.NotZero(t, buffer.Len()) + + text = m.ProcessLine("3=>", &buffer) + assert.Empty(t, text) + assert.NotZero(t, buffer.Len()) + + text = m.ProcessLine("4", &buffer) + assert.Equal(t, "1=>2=>3=>4", text) + assert.Zero(t, buffer.Len()) + + text = m.ProcessLine("5", &buffer) + assert.Equal(t, "5", text) + assert.Zero(t, buffer.Len()) +} + +func TestMultiLineMatchStringWithInvertMatchFalse(t *testing.T) { + c := &MultilineConfig{ + Pattern: "=>$", + MatchWhichLine: Next, + InvertMatch: false, + } + m, err := c.NewMultiline() + assert.NoError(t, err, "Configuration was OK.") + + matches1 := m.matchString("t=>") + matches2 := m.matchString("t") + + assert.True(t, matches1) + assert.False(t, matches2) +} + +func TestMultiLineMatchStringWithInvertTrue(t *testing.T) { + c := &MultilineConfig{ + Pattern: "=>$", + MatchWhichLine: Next, + InvertMatch: true, + } + m, err := c.NewMultiline() + assert.NoError(t, err, "Configuration was OK.") + + matches1 := m.matchString("t=>") + matches2 := m.matchString("t") + + assert.False(t, matches1) + assert.True(t, matches2) +} + +func TestMultilineWhat(t *testing.T) { + var w1 MultilineMatchWhichLine + w1.UnmarshalTOML([]byte(`"previous"`)) + assert.Equal(t, Previous, w1) + + var w2 MultilineMatchWhichLine + w2.UnmarshalTOML([]byte(`previous`)) + assert.Equal(t, Previous, w2) + + var w3 MultilineMatchWhichLine + w3.UnmarshalTOML([]byte(`'previous'`)) + assert.Equal(t, Previous, w3) + + var w4 MultilineMatchWhichLine + w4.UnmarshalTOML([]byte(`"next"`)) + assert.Equal(t, Next, w4) + + var w5 MultilineMatchWhichLine + w5.UnmarshalTOML([]byte(`next`)) + assert.Equal(t, Next, w5) + + var w6 MultilineMatchWhichLine + w6.UnmarshalTOML([]byte(`'next'`)) + assert.Equal(t, Next, w6) + + var w7 MultilineMatchWhichLine + err := w7.UnmarshalTOML([]byte(`nope`)) + assert.Equal(t, MultilineMatchWhichLine(-1), w7) + assert.Error(t, err) +} diff --git a/plugins/inputs/tail/tail.go b/plugins/inputs/tail/tail.go index 70dc09e980e03..fdb5b40cc3abd 100644 --- a/plugins/inputs/tail/tail.go +++ b/plugins/inputs/tail/tail.go @@ -3,11 +3,13 @@ package tail import ( + "bytes" "context" "errors" "io" "strings" "sync" + "time" "github.com/dimchansky/utfbom" "github.com/influxdata/tail" @@ -45,11 +47,16 @@ type Tail struct { offsets map[string]int64 parserFunc parsers.ParserFunc wg sync.WaitGroup - ctx context.Context - cancel context.CancelFunc - acc telegraf.TrackingAccumulator - sem semaphore - decoder *encoding.Decoder + + acc telegraf.TrackingAccumulator + + MultilineConfig MultilineConfig `toml:"multiline"` + multiline *Multiline + + ctx context.Context + cancel context.CancelFunc + sem semaphore + decoder *encoding.Decoder } func NewTail() *Tail { @@ -107,6 +114,27 @@ const sampleConfig = ` ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md data_format = "influx" + + ## multiline parser/codec + ## https://www.elastic.co/guide/en/logstash/2.4/plugins-filters-multiline.html + #[inputs.tail.multiline] + ## The pattern should be a regexp which matches what you believe to be an + ## indicator that the field is part of an event consisting of multiple lines of log data. + #pattern = "^\s" + + ## This field must be either "previous" or "next". + ## If a line matches the pattern, "previous" indicates that it belongs to the previous line, + ## whereas "next" indicates that the line belongs to the next one. + #match_which_line = "previous" + + ## The invert_match field can be true or false (defaults to false). + ## If true, a message not matching the pattern will constitute a match of the multiline + ## filter and the what will be applied. (vice-versa is also true) + #invert_match = false + + ## After the specified timeout, this plugin sends a multiline event even if no new pattern + ## is found to start a new event. The default timeout is 5s. + #timeout = 5s ` func (t *Tail) SampleConfig() string { @@ -150,9 +178,16 @@ func (t *Tail) Start(acc telegraf.Accumulator) error { } }() + var err error + t.multiline, err = t.MultilineConfig.NewMultiline() + + if err != nil { + return err + } + t.tailers = make(map[string]*tail.Tail) - err := t.tailNewFiles(t.FromBeginning) + err = t.tailNewFiles(t.FromBeginning) // clear offsets t.offsets = make(map[string]int64) @@ -212,6 +247,7 @@ func (t *Tail) tailNewFiles(fromBeginning bool) error { return r }, }) + if err != nil { t.Log.Debugf("Failed to open file (%s): %v", file, err) continue @@ -227,6 +263,7 @@ func (t *Tail) tailNewFiles(fromBeginning bool) error { // create a goroutine for each "tailer" t.wg.Add(1) + go func() { defer t.wg.Done() t.receiver(parser, tailer) @@ -237,6 +274,7 @@ func (t *Tail) tailNewFiles(fromBeginning bool) error { t.Log.Errorf("Tailing %q: %s", tailer.Filename, err.Error()) } }() + t.tailers[tailer.Filename] = tailer } } @@ -272,18 +310,72 @@ func parseLine(parser parsers.Parser, line string, firstLine bool) ([]telegraf.M // for changes, parse any incoming msgs, and add to the accumulator. func (t *Tail) receiver(parser parsers.Parser, tailer *tail.Tail) { var firstLine = true - for line := range tailer.Lines { - if line.Err != nil { + + // holds the individual lines of multi-line log entries. + var buffer bytes.Buffer + + var timer *time.Timer + var timeout <-chan time.Time + + // The multiline mode requires a timer in order to flush the multiline buffer + // if no new lines are incoming. + if t.multiline.IsEnabled() { + timer = time.NewTimer(t.MultilineConfig.Timeout.Duration) + timeout = timer.C + } + + channelOpen := true + tailerOpen := true + var line *tail.Line + + for { + line = nil + + if timer != nil { + timer.Reset(t.MultilineConfig.Timeout.Duration) + } + + select { + case <-t.ctx.Done(): + channelOpen = false + case line, tailerOpen = <-tailer.Lines: + if !tailerOpen { + channelOpen = false + } + case <-timeout: + } + + var text string + + if line != nil { + // Fix up files with Windows line endings. + text = strings.TrimRight(line.Text, "\r") + + if t.multiline.IsEnabled() { + if text = t.multiline.ProcessLine(text, &buffer); text == "" { + continue + } + } + } + if line == nil || !channelOpen || !tailerOpen { + if text += t.multiline.Flush(&buffer); text == "" { + if !channelOpen { + return + } + + continue + } + } + + if line != nil && line.Err != nil { t.Log.Errorf("Tailing %q: %s", tailer.Filename, line.Err.Error()) continue } - // Fix up files with Windows line endings. - text := strings.TrimRight(line.Text, "\r") metrics, err := parseLine(parser, text, firstLine) if err != nil { t.Log.Errorf("Malformed log line in %q: [%q]: %s", - tailer.Filename, line.Text, err.Error()) + tailer.Filename, text, err.Error()) continue } firstLine = false @@ -292,6 +384,18 @@ func (t *Tail) receiver(parser parsers.Parser, tailer *tail.Tail) { metric.AddTag("path", tailer.Filename) } + // try writing out metric first without blocking + select { + case t.sem <- empty{}: + t.acc.AddTrackingMetricGroup(metrics) + if t.ctx.Err() != nil { + return // exit! + } + continue // next loop + default: + // no room. switch to blocking write. + } + // Block until plugin is stopping or room is available to add metrics. select { case <-t.ctx.Done(): diff --git a/plugins/inputs/tail/tail_test.go b/plugins/inputs/tail/tail_test.go index 38a7f22780a52..5669fbf2e6ea7 100644 --- a/plugins/inputs/tail/tail_test.go +++ b/plugins/inputs/tail/tail_test.go @@ -5,10 +5,13 @@ import ( "io/ioutil" "log" "os" + "runtime" + "strings" "testing" "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/plugins/parsers" "github.com/influxdata/telegraf/plugins/parsers/csv" "github.com/influxdata/telegraf/plugins/parsers/influx" @@ -88,6 +91,173 @@ func TestTailDosLineendings(t *testing.T) { }) } +func TestGrokParseLogFilesWithMultiline(t *testing.T) { + thisdir := getCurrentDir() + //we make sure the timeout won't kick in + duration, _ := time.ParseDuration("100s") + + tt := NewTail() + tt.Log = testutil.Logger{} + tt.FromBeginning = true + tt.Files = []string{thisdir + "testdata/test_multiline.log"} + tt.MultilineConfig = MultilineConfig{ + Pattern: `^[^\[]`, + MatchWhichLine: Previous, + InvertMatch: false, + Timeout: &internal.Duration{Duration: duration}, + } + tt.SetParserFunc(createGrokParser) + + err := tt.Init() + require.NoError(t, err) + + acc := testutil.Accumulator{} + assert.NoError(t, tt.Start(&acc)) + defer tt.Stop() + + acc.Wait(3) + + expectedPath := thisdir + "testdata/test_multiline.log" + acc.AssertContainsTaggedFields(t, "tail_grok", + map[string]interface{}{ + "message": "HelloExample: This is debug", + }, + map[string]string{ + "path": expectedPath, + "loglevel": "DEBUG", + }) + acc.AssertContainsTaggedFields(t, "tail_grok", + map[string]interface{}{ + "message": "HelloExample: This is info", + }, + map[string]string{ + "path": expectedPath, + "loglevel": "INFO", + }) + acc.AssertContainsTaggedFields(t, "tail_grok", + map[string]interface{}{ + "message": "HelloExample: Sorry, something wrong! java.lang.ArithmeticException: / by zero\tat com.foo.HelloExample2.divide(HelloExample2.java:24)\tat com.foo.HelloExample2.main(HelloExample2.java:14)", + }, + map[string]string{ + "path": expectedPath, + "loglevel": "ERROR", + }) + + assert.Equal(t, uint64(3), acc.NMetrics()) +} + +func TestGrokParseLogFilesWithMultilineTimeout(t *testing.T) { + tmpfile, err := ioutil.TempFile("", "") + require.NoError(t, err) + defer os.Remove(tmpfile.Name()) + + // This seems neccessary in order to get the test to read the following lines. + _, err = tmpfile.WriteString("[04/Jun/2016:12:41:48 +0100] INFO HelloExample: This is fluff\r\n") + require.NoError(t, err) + require.NoError(t, tmpfile.Sync()) + + // set tight timeout for tests + duration := 10 * time.Millisecond + + tt := NewTail() + tt.Log = testutil.Logger{} + tt.FromBeginning = true + tt.Files = []string{tmpfile.Name()} + tt.MultilineConfig = MultilineConfig{ + Pattern: `^[^\[]`, + MatchWhichLine: Previous, + InvertMatch: false, + Timeout: &internal.Duration{Duration: duration}, + } + tt.SetParserFunc(createGrokParser) + + err = tt.Init() + require.NoError(t, err) + + acc := testutil.Accumulator{} + assert.NoError(t, tt.Start(&acc)) + time.Sleep(11 * time.Millisecond) // will force timeout + _, err = tmpfile.WriteString("[04/Jun/2016:12:41:48 +0100] INFO HelloExample: This is info\r\n") + require.NoError(t, err) + require.NoError(t, tmpfile.Sync()) + acc.Wait(2) + time.Sleep(11 * time.Millisecond) // will force timeout + _, err = tmpfile.WriteString("[04/Jun/2016:12:41:48 +0100] WARN HelloExample: This is warn\r\n") + require.NoError(t, err) + require.NoError(t, tmpfile.Sync()) + acc.Wait(3) + tt.Stop() + assert.Equal(t, uint64(3), acc.NMetrics()) + expectedPath := tmpfile.Name() + + acc.AssertContainsTaggedFields(t, "tail_grok", + map[string]interface{}{ + "message": "HelloExample: This is info", + }, + map[string]string{ + "path": expectedPath, + "loglevel": "INFO", + }) + acc.AssertContainsTaggedFields(t, "tail_grok", + map[string]interface{}{ + "message": "HelloExample: This is warn", + }, + map[string]string{ + "path": expectedPath, + "loglevel": "WARN", + }) +} + +func TestGrokParseLogFilesWithMultilineTailerCloseFlushesMultilineBuffer(t *testing.T) { + thisdir := getCurrentDir() + //we make sure the timeout won't kick in + duration := 100 * time.Second + + tt := NewTail() + tt.Log = testutil.Logger{} + tt.FromBeginning = true + tt.Files = []string{thisdir + "testdata/test_multiline.log"} + tt.MultilineConfig = MultilineConfig{ + Pattern: `^[^\[]`, + MatchWhichLine: Previous, + InvertMatch: false, + Timeout: &internal.Duration{Duration: duration}, + } + tt.SetParserFunc(createGrokParser) + + err := tt.Init() + require.NoError(t, err) + + acc := testutil.Accumulator{} + assert.NoError(t, tt.Start(&acc)) + acc.Wait(3) + assert.Equal(t, uint64(3), acc.NMetrics()) + // Close tailer, so multiline buffer is flushed + tt.Stop() + acc.Wait(4) + + expectedPath := thisdir + "testdata/test_multiline.log" + acc.AssertContainsTaggedFields(t, "tail_grok", + map[string]interface{}{ + "message": "HelloExample: This is warn", + }, + map[string]string{ + "path": expectedPath, + "loglevel": "WARN", + }) +} + +func createGrokParser() (parsers.Parser, error) { + grokConfig := &parsers.Config{ + MetricName: "tail_grok", + GrokPatterns: []string{"%{TEST_LOG_MULTILINE}"}, + GrokCustomPatternFiles: []string{getCurrentDir() + "testdata/test-patterns"}, + DataFormat: "grok", + } + parser, err := parsers.NewParser(grokConfig) + return parser, err +} + // The csv parser should only parse the header line once per file. func TestCSVHeadersParsedOnce(t *testing.T) { tmpfile, err := ioutil.TempFile("", "") @@ -204,6 +374,11 @@ func TestMultipleMetricsOnFirstLine(t *testing.T) { testutil.IgnoreTime()) } +func getCurrentDir() string { + _, filename, _, _ := runtime.Caller(1) + return strings.Replace(filename, "tail_test.go", "", 1) +} + func TestCharacterEncoding(t *testing.T) { full := []telegraf.Metric{ testutil.MustMetric("cpu", diff --git a/plugins/inputs/tail/testdata/test-patterns b/plugins/inputs/tail/testdata/test-patterns new file mode 100644 index 0000000000000..a9ea98c72967f --- /dev/null +++ b/plugins/inputs/tail/testdata/test-patterns @@ -0,0 +1,3 @@ +# Test multiline +# [04/Jun/2016:12:41:45 +0100] DEBUG HelloExample: This is debug +TEST_LOG_MULTILINE \[%{HTTPDATE:timestamp:ts-httpd}\] %{WORD:loglevel:tag} %{GREEDYDATA:message} diff --git a/plugins/inputs/tail/testdata/test_multiline.log b/plugins/inputs/tail/testdata/test_multiline.log new file mode 100644 index 0000000000000..0cabca5dc4362 --- /dev/null +++ b/plugins/inputs/tail/testdata/test_multiline.log @@ -0,0 +1,7 @@ +[04/Jun/2016:12:41:45 +0100] DEBUG HelloExample: This is debug +[04/Jun/2016:12:41:48 +0100] INFO HelloExample: This is info +[04/Jun/2016:12:41:46 +0100] ERROR HelloExample: Sorry, something wrong! +java.lang.ArithmeticException: / by zero + at com.foo.HelloExample2.divide(HelloExample2.java:24) + at com.foo.HelloExample2.main(HelloExample2.java:14) +[04/Jun/2016:12:41:48 +0100] WARN HelloExample: This is warn diff --git a/plugins/inputs/win_eventlog/README.md b/plugins/inputs/win_eventlog/README.md new file mode 100644 index 0000000000000..e3c48656f79c9 --- /dev/null +++ b/plugins/inputs/win_eventlog/README.md @@ -0,0 +1,212 @@ +# Windows Eventlog Input Plugin + +## Collect Windows Event Log messages + +Supports Windows Vista and higher. + +Telegraf should have Administrator permissions to subscribe for some of the Windows Events Channels, like System Log. + +Telegraf minimum version: Telegraf 1.16.0 + +### Configuration + +```toml +[[inputs.win_eventlog]] + ## Telegraf should have Administrator permissions to subscribe for some Windows Events channels + ## (System log, for example) + + ## LCID (Locale ID) for event rendering + ## 1033 to force English language + ## 0 to use default Windows locale + # locale = 0 + + ## Name of eventlog, used only if xpath_query is empty + ## Example: "Application" + # eventlog_name = "" + + ## xpath_query can be in defined short form like "Event/System[EventID=999]" + ## or you can form a XML Query. Refer to the Consuming Events article: + ## https://docs.microsoft.com/en-us/windows/win32/wes/consuming-events + ## XML query is the recommended form, because it is most flexible + ## You can create or debug XML Query by creating Custom View in Windows Event Viewer + ## and then copying resulting XML here + xpath_query = ''' + + + + *[System[( (EventID >= 5152 and EventID <= 5158) or EventID=5379 or EventID=4672)]] + + + + + + + + + + + + + + + ''' + + ## System field names: + ## "Source", "EventID", "Version", "Level", "Task", "Opcode", "Keywords", "TimeCreated", + ## "EventRecordID", "ActivityID", "RelatedActivityID", "ProcessID", "ThreadID", "ProcessName", + ## "Channel", "Computer", "UserID", "UserName", "Message", "LevelText", "TaskText", "OpcodeText" + + ## In addition to System, Data fields can be unrolled from additional XML nodes in event. + ## Human-readable representation of those nodes is formatted into event Message field, + ## but XML is more machine-parsable + + # Process UserData XML to fields, if this node exists in Event XML + process_userdata = true + + # Process EventData XML to fields, if this node exists in Event XML + process_eventdata = true + + ## Separator character to use for unrolled XML Data field names + separator = "_" + + ## Get only first line of Message field. For most events first line is usually more than enough + only_first_line_of_message = true + + ## Fields to include as tags. Globbing supported ("Level*" for both "Level" and "LevelText") + event_tags = ["Source", "EventID", "Level", "LevelText", "Task", "TaskText", "Opcode", "OpcodeText", "Keywords", "Channel", "Computer"] + + ## Default list of fields to send. All fields are sent by default. Globbing supported + event_fields = ["*"] + + ## Fields to exclude. Also applied to data fields. Globbing supported + exclude_fields = ["Binary", "Data_Address*"] + + ## Skip those tags or fields if their value is empty or equals to zero. Globbing supported + exclude_empty = ["*ActivityID", "UserID"] +``` + +### Filtering + +There are three types of filtering: **Event Log** name, **XPath Query** and **XML Query**. + +**Event Log** name filtering is simple: + +```toml + eventlog_name = "Application" + xpath_query = ''' +``` + +For **XPath Query** filtering set the `xpath_query` value, and `eventlog_name` will be ignored: + +```toml + eventlog_name = "" + xpath_query = "Event/System[EventID=999]" +``` + +**XML Query** is the most flexible: you can Select or Suppress any values, and give ranges for other values. XML query is the recommended form, because it is most flexible. You can create or debug XML Query by creating Custom View in Windows Event Viewer and then copying resulting XML in config file. + +XML Query documentation: + + + +### Metrics + +You can send any field, *System*, *Computed* or *XML* as tag field. List of those fields is in the `event_tags` config array. Globbing is supported in this array, i.e. `Level*` for all fields beginning with `Level`, or `L?vel` for all fields where the name is `Level`, `L3vel`, `L@vel` and so on. Tag fields are converted to strings automatically. + +By default, all other fields are sent, but you can limit that either by listing it in `event_fields` config array with globbing, or by adding some field name masks in the `exclude_fields` config array. + +You can limit sending fields with empty values by adding masks of names of such fields in the `exclude_empty` config array. Value considered empty, if the System field of type `int` or `uint32` is equal to zero, or if any field of type `string` is an empty string. + +List of System fields: + +- Source (string) +- EventID (int) +- Version (int) +- Level (int) +- LevelText (string) +- Opcode (int) +- OpcodeText (string) +- Task (int) +- TaskText (string) +- Keywords (string): comma-separated in case of multiple values +- TimeCreated (string) +- EventRecordID (string) +- ActivityID (string) +- RelatedActivityID (string) +- ProcessID (int) +- ThreadID (int) +- ProcessName (string): derived from ProcessID +- Channel (string) +- Computer (string): useful if consumed from Forwarded Events +- UserID (string): SID +- UserName (string): derived from UserID, presented in form of DOMAIN\Username +- Message (string) + +### Computed fields + +Fields `Level`, `Opcode` and `Task` are converted to text and saved as computed `*Text` fields. + +`Keywords` field is converted from hex uint64 value by the `_EvtFormatMessage` WINAPI function. There can be more than one value, in that case they will be comma-separated. If keywords can't be converted (bad device driver or forwarded from another computer with unknown Event Channel), hex uint64 is saved as is. + +`ProcessName` field is found by looking up ProcessID. Can be empty if telegraf doesn't have enough permissions. + +`Username` field is found by looking up SID from UserID. + +`Message` field is rendered from the event data, and can be several kilobytes of text with line breaks. For most events the first line of this text is more then enough, and additional info is more useful to be parsed as XML fields. So, for brevity, plugin takes only the first line. You can set `only_first_line_of_message` parameter to `false` to take full message text. + +### Additional Fields + +The content of **Event Data** and **User Data** XML Nodes can be added as additional fields, and is added by default. You can disable that by setting `process_userdata` or `process_eventdata` parameters to `false`. + +For the fields from additional XML Nodes the `Name` attribute is taken as the name, and inner text is the value. Type of those fields is always string. + +Name of the field is formed from XML Path by adding _ inbetween levels. For example, if UserData XML looks like this: + +```xml + + + KB4566782 + 5112 + Installed + 0x0 + UpdateAgentLCU + + +``` + +It will be converted to following fields: + +```text +CbsPackageChangeState_PackageIdentifier = "KB4566782" +CbsPackageChangeState_IntendedPackageState = "5112" +CbsPackageChangeState_IntendedPackageStateTextized = "Installed" +CbsPackageChangeState_ErrorCode = "0x0" +CbsPackageChangeState_Client = "UpdateAgentLCU" +``` + +If there are more than one field with the same name, all those fields are given suffix with number: `_1`, `_2` and so on. + +### Localization + +Human readable Event Description is in the Message field. But it is better to be skipped in favour of the Event XML values, because they are more machine-readable. + +Keywords, LevelText, TaskText, OpcodeText and Message are saved with the current Windows locale by default. You can override this, for example, to English locale by setting `locale` config parameter to `1033`. Unfortunately, **Event Data** and **User Data** XML Nodes are in default Windows locale only. + +Locale should be present on the computer. English locale is usually available on all localized versions of modern Windows. List of locales: + + + +### Example Output + +Some values are changed for anonymity. + +```text +win_eventlog,Channel=System,Computer=PC,EventID=105,Keywords=0x8000000000000000,Level=4,LevelText=Information,Opcode=10,OpcodeText=General,Source=WudfUsbccidDriver,Task=1,TaskText=Driver,host=PC ProcessName="WUDFHost.exe",UserName="NT AUTHORITY\\LOCAL SERVICE",Data_dwMaxCCIDMessageLength="271",Data_bPINSupport="0x0",Data_bMaxCCIDBusySlots="1",EventRecordID=1914688i,UserID="S-1-5-19",Version=0i,Data_bClassGetEnvelope="0x0",Data_wLcdLayout="0x0",Data_bClassGetResponse="0x0",TimeCreated="2020-08-21T08:43:26.7481077Z",Message="The Smartcard reader reported the following class descriptor (part 2)." 1597999410000000000 + +win_eventlog,Channel=Security,Computer=PC,EventID=4798,Keywords=Audit\ Success,Level=0,LevelText=Information,Opcode=0,OpcodeText=Info,Source=Microsoft-Windows-Security-Auditing,Task=13824,TaskText=User\ Account\ Management,host=PC Data_TargetDomainName="PC",Data_SubjectUserName="User",Data_CallerProcessId="0x3d5c",Data_SubjectLogonId="0x46d14f8d",Version=0i,EventRecordID=223157i,Message="A user's local group membership was enumerated.",Data_TargetUserName="User",Data_TargetSid="S-1-5-21-.-.-.-1001",Data_SubjectUserSid="S-1-5-21-.-.-.-1001",Data_CallerProcessName="C:\\Windows\\explorer.exe",ActivityID="{0d4cc11d-7099-0002-4dc1-4c0d9970d601}",UserID="",Data_SubjectDomainName="PC",TimeCreated="2020-08-21T08:43:27.3036771Z",ProcessName="lsass.exe" 1597999410000000000 + +win_eventlog,Channel=Microsoft-Windows-Dhcp-Client/Admin,Computer=PC,EventID=1002,Keywords=0x4000000000000001,Level=2,LevelText=Error,Opcode=76,OpcodeText=IpLeaseDenied,Source=Microsoft-Windows-Dhcp-Client,Task=3,TaskText=Address\ Configuration\ State\ Event,host=PC Version=0i,Message="The IP address lease 10.20.30.40 for the Network Card with network address 0xaabbccddeeff has been denied by the DHCP server 10.20.30.1 (The DHCP Server sent a DHCPNACK message).",UserID="S-1-5-19",Data_HWLength="6",Data_HWAddress="545595B7EA01",TimeCreated="2020-08-21T08:43:42.8265853Z",EventRecordID=34i,ProcessName="svchost.exe",UserName="NT AUTHORITY\\LOCAL SERVICE" 1597999430000000000 + +win_eventlog,Channel=System,Computer=PC,EventID=10016,Keywords=Classic,Level=3,LevelText=Warning,Opcode=0,OpcodeText=Info,Source=Microsoft-Windows-DistributedCOM,Task=0,host=PC Data_param3="Активация",Data_param6="PC",Data_param8="S-1-5-21-2007059868-50816014-3139024325-1001",Version=0i,UserName="PC\\User",Data_param1="по умолчанию для компьютера",Data_param2="Локально",Data_param7="User",Data_param9="LocalHost (с использованием LRPC)",Data_param10="Microsoft.Windows.ShellExperienceHost_10.0.19041.423_neutral_neutral_cw5n1h2txyewy",ActivityID="{839cac9e-73a1-4559-a847-62f3a5e73e44}",ProcessName="svchost.exe",Message="The по умолчанию для компьютера permission settings do not grant Локально Активация permission for the COM Server application with CLSID ",Data_param5="{316CDED5-E4AE-4B15-9113-7055D84DCC97}",Data_param11="S-1-15-2-.-.-.-.-.-.-2861478708",TimeCreated="2020-08-21T08:43:45.5233759Z",EventRecordID=1914689i,UserID="S-1-5-21-.-.-.-1001",Data_param4="{C2F03A33-21F5-47FA-B4BB-156362A2F239}" 1597999430000000000 + +``` diff --git a/plugins/inputs/win_eventlog/event.go b/plugins/inputs/win_eventlog/event.go new file mode 100644 index 0000000000000..2169ce8b490b3 --- /dev/null +++ b/plugins/inputs/win_eventlog/event.go @@ -0,0 +1,70 @@ +//+build windows + +//revive:disable-next-line:var-naming +// Package win_eventlog Input plugin to collect Windows Event Log messages +package win_eventlog + +// Event is the event entry representation +// Only the most common elements are processed, human-readable data is rendered in Message +// More info on schema, if there will be need to add more: +// https://docs.microsoft.com/en-us/windows/win32/wes/eventschema-elements +type Event struct { + Source Provider `xml:"System>Provider"` + EventID int `xml:"System>EventID"` + Version int `xml:"System>Version"` + Level int `xml:"System>Level"` + Task int `xml:"System>Task"` + Opcode int `xml:"System>Opcode"` + Keywords string `xml:"System>Keywords"` + TimeCreated TimeCreated `xml:"System>TimeCreated"` + EventRecordID int `xml:"System>EventRecordID"` + Correlation Correlation `xml:"System>Correlation"` + Execution Execution `xml:"System>Execution"` + Channel string `xml:"System>Channel"` + Computer string `xml:"System>Computer"` + Security Security `xml:"System>Security"` + UserData UserData `xml:"UserData"` + EventData EventData `xml:"EventData"` + Message string + LevelText string + TaskText string + OpcodeText string +} + +// UserData Application-provided XML data +type UserData struct { + InnerXML []byte `xml:",innerxml"` +} + +// EventData Application-provided XML data +type EventData struct { + InnerXML []byte `xml:",innerxml"` +} + +// Provider is the Event provider information +type Provider struct { + Name string `xml:"Name,attr"` +} + +// Correlation is used for the event grouping +type Correlation struct { + ActivityID string `xml:"ActivityID,attr"` + RelatedActivityID string `xml:"RelatedActivityID,attr"` +} + +// Execution Info for Event +type Execution struct { + ProcessID uint32 `xml:"ProcessID,attr"` + ThreadID uint32 `xml:"ThreadID,attr"` + ProcessName string +} + +// Security Data for Event +type Security struct { + UserID string `xml:"UserID,attr"` +} + +// TimeCreated field for Event +type TimeCreated struct { + SystemTime string `xml:"SystemTime,attr"` +} diff --git a/plugins/inputs/win_eventlog/syscall_windows.go b/plugins/inputs/win_eventlog/syscall_windows.go new file mode 100644 index 0000000000000..df02913eee2af --- /dev/null +++ b/plugins/inputs/win_eventlog/syscall_windows.go @@ -0,0 +1,39 @@ +//+build windows + +//revive:disable-next-line:var-naming +// Package win_eventlog Input plugin to collect Windows Event Log messages +package win_eventlog + +import "syscall" + +// Event log error codes. +// https://msdn.microsoft.com/en-us/library/windows/desktop/ms681382(v=vs.85).aspx +const ( + //revive:disable:var-naming + ERROR_INSUFFICIENT_BUFFER syscall.Errno = 122 + ERROR_NO_MORE_ITEMS syscall.Errno = 259 + ERROR_INVALID_OPERATION syscall.Errno = 4317 + //revive:enable:var-naming +) + +// EvtSubscribeFlag defines the possible values that specify when to start subscribing to events. +type EvtSubscribeFlag uint32 + +// EVT_SUBSCRIBE_FLAGS enumeration +// https://msdn.microsoft.com/en-us/library/windows/desktop/aa385588(v=vs.85).aspx +const ( + EvtSubscribeToFutureEvents EvtSubscribeFlag = 1 +) + +// EvtRenderFlag uint32 +type EvtRenderFlag uint32 + +// EVT_RENDER_FLAGS enumeration +// https://msdn.microsoft.com/en-us/library/windows/desktop/aa385563(v=vs.85).aspx +const ( + //revive:disable:var-naming + // Render the event as an XML string. For details on the contents of the + // XML string, see the Event schema. + EvtRenderEventXml EvtRenderFlag = 1 + //revive:enable:var-naming +) diff --git a/plugins/inputs/win_eventlog/util.go b/plugins/inputs/win_eventlog/util.go new file mode 100644 index 0000000000000..f085c3c055f5c --- /dev/null +++ b/plugins/inputs/win_eventlog/util.go @@ -0,0 +1,155 @@ +//+build windows + +//revive:disable-next-line:var-naming +// Package win_eventlog Input plugin to collect Windows Event Log messages +package win_eventlog + +import ( + "bytes" + "encoding/xml" + "fmt" + "io" + "strings" + "unicode/utf16" + "unicode/utf8" + "unsafe" + + "golang.org/x/sys/windows" +) + +// DecodeUTF16 to UTF8 bytes +func DecodeUTF16(b []byte) ([]byte, error) { + + if len(b)%2 != 0 { + return nil, fmt.Errorf("must have even length byte slice") + } + + u16s := make([]uint16, 1) + + ret := &bytes.Buffer{} + + b8buf := make([]byte, 4) + + lb := len(b) + for i := 0; i < lb; i += 2 { + u16s[0] = uint16(b[i]) + (uint16(b[i+1]) << 8) + r := utf16.Decode(u16s) + n := utf8.EncodeRune(b8buf, r[0]) + ret.Write(b8buf[:n]) + } + + return ret.Bytes(), nil +} + +// GetFromSnapProcess finds information about process by the given pid +// Returns process parent pid, threads info handle and process name +func GetFromSnapProcess(pid uint32) (uint32, uint32, string, error) { + snap, err := windows.CreateToolhelp32Snapshot(windows.TH32CS_SNAPPROCESS, uint32(pid)) + if err != nil { + return 0, 0, "", err + } + defer windows.CloseHandle(snap) + var pe32 windows.ProcessEntry32 + pe32.Size = uint32(unsafe.Sizeof(pe32)) + if err = windows.Process32First(snap, &pe32); err != nil { + return 0, 0, "", err + } + for { + if pe32.ProcessID == uint32(pid) { + szexe := windows.UTF16ToString(pe32.ExeFile[:]) + return uint32(pe32.ParentProcessID), uint32(pe32.Threads), szexe, nil + } + if err = windows.Process32Next(snap, &pe32); err != nil { + break + } + } + return 0, 0, "", fmt.Errorf("couldn't find pid: %d", pid) +} + +type xmlnode struct { + XMLName xml.Name + Attrs []xml.Attr `xml:"-"` + Content []byte `xml:",innerxml"` + Text string `xml:",chardata"` + Nodes []xmlnode `xml:",any"` +} + +// EventField for unique rendering +type EventField struct { + Name string + Value string +} + +// UnmarshalXML redefined for xml elements walk +func (n *xmlnode) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + n.Attrs = start.Attr + type node xmlnode + + return d.DecodeElement((*node)(n), &start) +} + +// UnrollXMLFields extracts fields from xml data +func UnrollXMLFields(data []byte, fieldsUsage map[string]int, separator string) ([]EventField, map[string]int) { + buf := bytes.NewBuffer(data) + dec := xml.NewDecoder(buf) + var fields []EventField + for { + var node xmlnode + err := dec.Decode(&node) + if err == io.EOF { + break + } + if err != nil { + // log.Fatal(err) + break + } + var parents []string + walkXML([]xmlnode{node}, parents, separator, func(node xmlnode, parents []string, separator string) bool { + innerText := strings.TrimSpace(node.Text) + if len(innerText) > 0 { + valueName := strings.Join(parents, separator) + fieldsUsage[valueName]++ + field := EventField{Name: valueName, Value: innerText} + fields = append(fields, field) + } + return true + }) + } + return fields, fieldsUsage +} + +func walkXML(nodes []xmlnode, parents []string, separator string, f func(xmlnode, []string, string) bool) { + for _, node := range nodes { + parentName := node.XMLName.Local + for _, attr := range node.Attrs { + attrName := strings.ToLower(attr.Name.Local) + if attrName == "name" { + // Add Name attribute to parent name + parentName = strings.Join([]string{parentName, attr.Value}, separator) + } + } + nodeParents := append(parents, parentName) + if f(node, nodeParents, separator) { + walkXML(node.Nodes, nodeParents, separator, f) + } + } +} + +// UniqueFieldNames forms unique field names +// by adding _ if there are several of them +func UniqueFieldNames(fields []EventField, fieldsUsage map[string]int, separator string) []EventField { + var fieldsCounter = map[string]int{} + var fieldsUnique []EventField + for _, field := range fields { + fieldName := field.Name + if fieldsUsage[field.Name] > 1 { + fieldsCounter[field.Name]++ + fieldName = fmt.Sprint(field.Name, separator, fieldsCounter[field.Name]) + } + fieldsUnique = append(fieldsUnique, EventField{ + Name: fieldName, + Value: field.Value, + }) + } + return fieldsUnique +} diff --git a/plugins/inputs/win_eventlog/util_test.go b/plugins/inputs/win_eventlog/util_test.go new file mode 100644 index 0000000000000..ce7428dd391d2 --- /dev/null +++ b/plugins/inputs/win_eventlog/util_test.go @@ -0,0 +1,200 @@ +//+build windows + +//revive:disable-next-line:var-naming +// Package win_eventlog Input plugin to collect Windows Event Log messages +package win_eventlog + +import ( + "bytes" + "encoding/binary" + "encoding/xml" + "io" + "reflect" + "testing" + "unicode/utf16" +) + +func TestDecodeUTF16(t *testing.T) { + testString := "Test String" + utf16s := utf16.Encode([]rune(testString)) + var bytesUtf16 bytes.Buffer + writer := io.Writer(&bytesUtf16) + lb := len(utf16s) + for i := 0; i < lb; i++ { + word := make([]byte, 2) + binary.LittleEndian.PutUint16(word, utf16s[i]) + _, err := writer.Write(word) + if err != nil { + t.Errorf("error preparing UTF-16 test string") + return + } + } + type args struct { + b []byte + } + tests := []struct { + name string + args args + want []byte + wantErr bool + }{ + { + name: "Wrong UTF-16", + args: args{b: append(bytesUtf16.Bytes(), byte('\x00'))}, + wantErr: true, + }, + { + name: "UTF-16", + args: args{b: bytesUtf16.Bytes()}, + want: []byte(testString), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := DecodeUTF16(tt.args.b) + if (err != nil) != tt.wantErr { + t.Errorf("DecodeUTF16() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("DecodeUTF16() = %v, want %v", got, tt.want) + } + }) + } +} + +var xmlbroken = ` + + qq + +` + +var xmldata = ` + + + + 5111 + 0x0 + + + + 2120-07-26T15:24:25Z + RulesEngine + RulesEngine + + +` + +type testEvent struct { + UserData struct { + InnerXML []byte `xml:",innerxml"` + } `xml:"UserData"` + EventData struct { + InnerXML []byte `xml:",innerxml"` + } `xml:"EventData"` +} + +func TestUnrollXMLFields(t *testing.T) { + container := testEvent{} + err := xml.Unmarshal([]byte(xmldata), &container) + if err != nil { + t.Errorf("couldn't unmarshal precooked xml string xmldata") + return + } + + type args struct { + data []byte + fieldsUsage map[string]int + } + tests := []struct { + name string + args args + want1 []EventField + want2 map[string]int + }{ + { + name: "Broken XML", + args: args{ + data: []byte(xmlbroken), + fieldsUsage: map[string]int{}, + }, + want1: nil, + want2: map[string]int{}, + }, + { + name: "EventData with non-unique names and one Name attr", + args: args{ + data: container.EventData.InnerXML, + fieldsUsage: map[string]int{}, + }, + want1: []EventField{ + {Name: "Data", Value: "2120-07-26T15:24:25Z"}, + {Name: "Data", Value: "RulesEngine"}, + {Name: "Data_Engine", Value: "RulesEngine"}, + }, + want2: map[string]int{"Data": 2, "Data_Engine": 1}, + }, + { + name: "UserData with non-unique names and three levels of depth", + args: args{ + data: container.UserData.InnerXML, + fieldsUsage: map[string]int{}, + }, + want1: []EventField{ + {Name: "CbsPackageChangeState_IntendedPackageState", Value: "5111"}, + {Name: "CbsPackageChangeState_ErrorCode_Code", Value: "0x0"}, + }, + want2: map[string]int{ + "CbsPackageChangeState_ErrorCode_Code": 1, + "CbsPackageChangeState_IntendedPackageState": 1, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, got1 := UnrollXMLFields(tt.args.data, tt.args.fieldsUsage, "_") + if !reflect.DeepEqual(got, tt.want1) { + t.Errorf("ExtractFields() got = %v, want %v", got, tt.want1) + } + if !reflect.DeepEqual(got1, tt.want2) { + t.Errorf("ExtractFields() got1 = %v, want %v", got1, tt.want2) + } + }) + } +} + +func TestUniqueFieldNames(t *testing.T) { + type args struct { + fields []EventField + fieldsUsage map[string]int + } + tests := []struct { + name string + args args + want []EventField + }{ + { + name: "Unique values", + args: args{ + fields: []EventField{ + {Name: "Data", Value: "2120-07-26T15:24:25Z"}, + {Name: "Data", Value: "RulesEngine"}, + {Name: "Engine", Value: "RulesEngine"}, + }, + fieldsUsage: map[string]int{"Data": 2, "Engine": 1}, + }, + want: []EventField{ + {Name: "Data_1", Value: "2120-07-26T15:24:25Z"}, + {Name: "Data_2", Value: "RulesEngine"}, + {Name: "Engine", Value: "RulesEngine"}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := UniqueFieldNames(tt.args.fields, tt.args.fieldsUsage, "_"); !reflect.DeepEqual(got, tt.want) { + t.Errorf("PrintFields() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/plugins/inputs/win_eventlog/win_eventlog.go b/plugins/inputs/win_eventlog/win_eventlog.go new file mode 100644 index 0000000000000..376ef4169d902 --- /dev/null +++ b/plugins/inputs/win_eventlog/win_eventlog.go @@ -0,0 +1,518 @@ +//+build windows + +//revive:disable-next-line:var-naming +// Package win_eventlog Input plugin to collect Windows Event Log messages +package win_eventlog + +import ( + "bufio" + "bytes" + "encoding/xml" + "fmt" + "path/filepath" + "reflect" + "strings" + "syscall" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" + "golang.org/x/sys/windows" +) + +var sampleConfig = ` + ## Telegraf should have Administrator permissions to subscribe for some Windows Events channels + ## (System log, for example) + + ## LCID (Locale ID) for event rendering + ## 1033 to force English language + ## 0 to use default Windows locale + # locale = 0 + + ## Name of eventlog, used only if xpath_query is empty + ## Example: "Application" + # eventlog_name = "" + + ## xpath_query can be in defined short form like "Event/System[EventID=999]" + ## or you can form a XML Query. Refer to the Consuming Events article: + ## https://docs.microsoft.com/en-us/windows/win32/wes/consuming-events + ## XML query is the recommended form, because it is most flexible + ## You can create or debug XML Query by creating Custom View in Windows Event Viewer + ## and then copying resulting XML here + xpath_query = ''' + + + + *[System[( (EventID >= 5152 and EventID <= 5158) or EventID=5379 or EventID=4672)]] + + + + + + + + + + + + + + + ''' + + ## System field names: + ## "Source", "EventID", "Version", "Level", "Task", "Opcode", "Keywords", "TimeCreated", + ## "EventRecordID", "ActivityID", "RelatedActivityID", "ProcessID", "ThreadID", "ProcessName", + ## "Channel", "Computer", "UserID", "UserName", "Message", "LevelText", "TaskText", "OpcodeText" + + ## In addition to System, Data fields can be unrolled from additional XML nodes in event. + ## Human-readable representation of those nodes is formatted into event Message field, + ## but XML is more machine-parsable + + # Process UserData XML to fields, if this node exists in Event XML + process_userdata = true + + # Process EventData XML to fields, if this node exists in Event XML + process_eventdata = true + + ## Separator character to use for unrolled XML Data field names + separator = "_" + + ## Get only first line of Message field. For most events first line is usually more than enough + only_first_line_of_message = true + + ## Fields to include as tags. Globbing supported ("Level*" for both "Level" and "LevelText") + event_tags = ["Source", "EventID", "Level", "LevelText", "Task", "TaskText", "Opcode", "OpcodeText", "Keywords", "Channel", "Computer"] + + ## Default list of fields to send. All fields are sent by default. Globbing supported + event_fields = ["*"] + + ## Fields to exclude. Also applied to data fields. Globbing supported + exclude_fields = ["Binary", "Data_Address*"] + + ## Skip those tags or fields if their value is empty or equals to zero. Globbing supported + exclude_empty = ["*ActivityID", "UserID"] +` + +// WinEventLog config +type WinEventLog struct { + Locale uint32 `toml:"locale"` + EventlogName string `toml:"eventlog_name"` + Query string `toml:"xpath_query"` + ProcessUserData bool `toml:"process_userdata"` + ProcessEventData bool `toml:"process_eventdata"` + Separator string `toml:"separator"` + OnlyFirstLineOfMessage bool `toml:"only_first_line_of_message"` + EventTags []string `toml:"event_tags"` + EventFields []string `toml:"event_fields"` + ExcludeFields []string `toml:"exclude_fields"` + ExcludeEmpty []string `toml:"exclude_empty"` + subscription EvtHandle + buf []byte + Log telegraf.Logger +} + +var bufferSize = 1 << 14 + +var description = "Input plugin to collect Windows Event Log messages" + +// Description for win_eventlog +func (w *WinEventLog) Description() string { + return description +} + +// SampleConfig for win_eventlog +func (w *WinEventLog) SampleConfig() string { + return sampleConfig +} + +// Gather Windows Event Log entries +func (w *WinEventLog) Gather(acc telegraf.Accumulator) error { + + var err error + if w.subscription == 0 { + w.subscription, err = w.evtSubscribe(w.EventlogName, w.Query) + if err != nil { + return fmt.Errorf("Windows Event Log subscription error: %v", err.Error()) + } + } + w.Log.Debug("Subscription handle id:", w.subscription) + +loop: + for { + events, err := w.fetchEvents(w.subscription) + if err != nil { + switch { + case err == ERROR_NO_MORE_ITEMS: + break loop + case err != nil: + w.Log.Error("Error getting events:", err.Error()) + return err + } + } + + for _, event := range events { + // Prepare fields names usage counter + var fieldsUsage = map[string]int{} + + tags := map[string]string{} + fields := map[string]interface{}{} + evt := reflect.ValueOf(&event).Elem() + // Walk through all fields of Event struct to process System tags or fields + for i := 0; i < evt.NumField(); i++ { + fieldName := evt.Type().Field(i).Name + fieldType := evt.Field(i).Type().String() + fieldValue := evt.Field(i).Interface() + computedValues := map[string]interface{}{} + switch fieldName { + case "Source": + fieldValue = event.Source.Name + fieldType = reflect.TypeOf(fieldValue).String() + case "Execution": + fieldValue := event.Execution.ProcessID + fieldType = reflect.TypeOf(fieldValue).String() + fieldName = "ProcessID" + // Look up Process Name from pid + if should, _ := w.shouldProcessField("ProcessName"); should { + _, _, processName, err := GetFromSnapProcess(fieldValue) + if err == nil { + computedValues["ProcessName"] = processName + } + } + case "TimeCreated": + fieldValue = event.TimeCreated.SystemTime + fieldType = reflect.TypeOf(fieldValue).String() + case "Correlation": + if should, _ := w.shouldProcessField("ActivityID"); should { + activityID := event.Correlation.ActivityID + if len(activityID) > 0 { + computedValues["ActivityID"] = activityID + } + } + if should, _ := w.shouldProcessField("RelatedActivityID"); should { + relatedActivityID := event.Correlation.RelatedActivityID + if len(relatedActivityID) > 0 { + computedValues["RelatedActivityID"] = relatedActivityID + } + } + case "Security": + computedValues["UserID"] = event.Security.UserID + // Look up UserName and Domain from SID + if should, _ := w.shouldProcessField("UserName"); should { + sid := event.Security.UserID + usid, err := syscall.StringToSid(sid) + if err == nil { + username, domain, _, err := usid.LookupAccount("") + if err == nil { + computedValues["UserName"] = fmt.Sprint(domain, "\\", username) + } + } + } + default: + } + if should, where := w.shouldProcessField(fieldName); should { + if where == "tags" { + strValue := fmt.Sprintf("%v", fieldValue) + if !w.shouldExcludeEmptyField(fieldName, "string", strValue) { + tags[fieldName] = strValue + fieldsUsage[fieldName]++ + } + } else if where == "fields" { + if !w.shouldExcludeEmptyField(fieldName, fieldType, fieldValue) { + fields[fieldName] = fieldValue + fieldsUsage[fieldName]++ + } + } + } + + // Insert computed fields + for computedKey, computedValue := range computedValues { + if should, where := w.shouldProcessField(computedKey); should { + if where == "tags" { + tags[computedKey] = fmt.Sprintf("%v", computedValue) + fieldsUsage[computedKey]++ + } else if where == "fields" { + fields[computedKey] = computedValue + fieldsUsage[computedKey]++ + } + } + } + } + + // Unroll additional XML + var xmlFields []EventField + if w.ProcessUserData { + fieldsUserData, xmlFieldsUsage := UnrollXMLFields(event.UserData.InnerXML, fieldsUsage, w.Separator) + xmlFields = append(xmlFields, fieldsUserData...) + fieldsUsage = xmlFieldsUsage + } + if w.ProcessEventData { + fieldsEventData, xmlFieldsUsage := UnrollXMLFields(event.EventData.InnerXML, fieldsUsage, w.Separator) + xmlFields = append(xmlFields, fieldsEventData...) + fieldsUsage = xmlFieldsUsage + } + uniqueXMLFields := UniqueFieldNames(xmlFields, fieldsUsage, w.Separator) + for _, xmlField := range uniqueXMLFields { + if !w.shouldExclude(xmlField.Name) { + fields[xmlField.Name] = xmlField.Value + } + } + + // Pass collected metrics + acc.AddFields("win_eventlog", fields, tags) + } + } + + return nil +} + +func (w *WinEventLog) shouldExclude(field string) (should bool) { + for _, excludePattern := range w.ExcludeFields { + // Check if field name matches excluded list + if matched, _ := filepath.Match(excludePattern, field); matched { + return true + } + } + return false +} + +func (w *WinEventLog) shouldProcessField(field string) (should bool, list string) { + for _, pattern := range w.EventTags { + if matched, _ := filepath.Match(pattern, field); matched { + // Tags are not excluded + return true, "tags" + } + } + + for _, pattern := range w.EventFields { + if matched, _ := filepath.Match(pattern, field); matched { + if w.shouldExclude(field) { + return false, "excluded" + } + return true, "fields" + } + } + return false, "excluded" +} + +func (w *WinEventLog) shouldExcludeEmptyField(field string, fieldType string, fieldValue interface{}) (should bool) { + for _, pattern := range w.ExcludeEmpty { + if matched, _ := filepath.Match(pattern, field); matched { + switch fieldType { + case "string": + return len(fieldValue.(string)) < 1 + case "int": + return fieldValue.(int) == 0 + case "uint32": + return fieldValue.(uint32) == 0 + } + } + } + return false +} + +func (w *WinEventLog) evtSubscribe(logName, xquery string) (EvtHandle, error) { + var logNamePtr, xqueryPtr *uint16 + + sigEvent, err := windows.CreateEvent(nil, 0, 0, nil) + if err != nil { + return 0, err + } + defer windows.CloseHandle(sigEvent) + + logNamePtr, err = syscall.UTF16PtrFromString(logName) + if err != nil { + return 0, err + } + + xqueryPtr, err = syscall.UTF16PtrFromString(xquery) + if err != nil { + return 0, err + } + + subsHandle, err := _EvtSubscribe(0, uintptr(sigEvent), logNamePtr, xqueryPtr, + 0, 0, 0, EvtSubscribeToFutureEvents) + if err != nil { + return 0, err + } + + return subsHandle, nil +} + +func (w *WinEventLog) fetchEventHandles(subsHandle EvtHandle) ([]EvtHandle, error) { + var eventsNumber uint32 + var evtReturned uint32 + + eventsNumber = 5 + + eventHandles := make([]EvtHandle, eventsNumber) + + err := _EvtNext(subsHandle, eventsNumber, &eventHandles[0], 0, 0, &evtReturned) + if err != nil { + if err == ERROR_INVALID_OPERATION && evtReturned == 0 { + return nil, ERROR_NO_MORE_ITEMS + } + return nil, err + } + + return eventHandles[:evtReturned], nil +} + +func (w *WinEventLog) fetchEvents(subsHandle EvtHandle) ([]Event, error) { + var events []Event + + eventHandles, err := w.fetchEventHandles(subsHandle) + if err != nil { + return nil, err + } + + for _, eventHandle := range eventHandles { + if eventHandle != 0 { + event, err := w.renderEvent(eventHandle) + if err == nil { + // w.Log.Debugf("Got event: %v", event) + events = append(events, event) + } + } + } + + for i := 0; i < len(eventHandles); i++ { + err := _EvtClose(eventHandles[i]) + if err != nil { + return events, err + } + } + return events, nil +} + +func (w *WinEventLog) renderEvent(eventHandle EvtHandle) (Event, error) { + var bufferUsed, propertyCount uint32 + + event := Event{} + err := _EvtRender(0, eventHandle, EvtRenderEventXml, uint32(len(w.buf)), &w.buf[0], &bufferUsed, &propertyCount) + if err != nil { + return event, err + } + + eventXML, err := DecodeUTF16(w.buf[:bufferUsed]) + if err != nil { + return event, err + } + err = xml.Unmarshal([]byte(eventXML), &event) + if err != nil { + // We can return event without most text values, + // that way we will not loose information + // This can happen when processing Forwarded Events + return event, nil + } + + publisherHandle, err := openPublisherMetadata(0, event.Source.Name, w.Locale) + if err != nil { + return event, nil + } + defer _EvtClose(publisherHandle) + + // Populating text values + keywords, err := formatEventString(EvtFormatMessageKeyword, eventHandle, publisherHandle) + if err == nil { + event.Keywords = keywords + } + message, err := formatEventString(EvtFormatMessageEvent, eventHandle, publisherHandle) + if err == nil { + if w.OnlyFirstLineOfMessage { + scanner := bufio.NewScanner(strings.NewReader(message)) + scanner.Scan() + message = scanner.Text() + } + event.Message = message + } + level, err := formatEventString(EvtFormatMessageLevel, eventHandle, publisherHandle) + if err == nil { + event.LevelText = level + } + task, err := formatEventString(EvtFormatMessageTask, eventHandle, publisherHandle) + if err == nil { + event.TaskText = task + } + opcode, err := formatEventString(EvtFormatMessageOpcode, eventHandle, publisherHandle) + if err == nil { + event.OpcodeText = opcode + } + return event, nil +} + +func formatEventString( + messageFlag EvtFormatMessageFlag, + eventHandle EvtHandle, + publisherHandle EvtHandle, +) (string, error) { + var bufferUsed uint32 + err := _EvtFormatMessage(publisherHandle, eventHandle, 0, 0, 0, messageFlag, + 0, nil, &bufferUsed) + if err != nil && err != ERROR_INSUFFICIENT_BUFFER { + return "", err + } + + bufferUsed *= 2 + buffer := make([]byte, bufferUsed) + bufferUsed = 0 + + err = _EvtFormatMessage(publisherHandle, eventHandle, 0, 0, 0, messageFlag, + uint32(len(buffer)/2), &buffer[0], &bufferUsed) + bufferUsed *= 2 + if err != nil { + return "", err + } + + result, err := DecodeUTF16(buffer[:bufferUsed]) + if err != nil { + return "", err + } + + var out string + if messageFlag == EvtFormatMessageKeyword { + // Keywords are returned as array of a zero-terminated strings + splitZero := func(c rune) bool { return c == '\x00' } + eventKeywords := strings.FieldsFunc(string(result), splitZero) + // So convert them to comma-separated string + out = strings.Join(eventKeywords, ",") + } else { + result := bytes.Trim(result, "\x00") + out = string(result) + } + return out, nil +} + +// openPublisherMetadata opens a handle to the publisher's metadata. Close must +// be called on returned EvtHandle when finished with the handle. +func openPublisherMetadata( + session EvtHandle, + publisherName string, + lang uint32, +) (EvtHandle, error) { + p, err := syscall.UTF16PtrFromString(publisherName) + if err != nil { + return 0, err + } + + h, err := _EvtOpenPublisherMetadata(session, p, nil, lang, 0) + if err != nil { + return 0, err + } + + return h, nil +} + +func init() { + inputs.Add("win_eventlog", func() telegraf.Input { + return &WinEventLog{ + buf: make([]byte, bufferSize), + ProcessUserData: true, + ProcessEventData: true, + Separator: "_", + OnlyFirstLineOfMessage: true, + EventTags: []string{"Source", "EventID", "Level", "LevelText", "Keywords", "Channel", "Computer"}, + EventFields: []string{"*"}, + ExcludeEmpty: []string{"Task", "Opcode", "*ActivityID", "UserID"}, + } + }) +} diff --git a/plugins/inputs/win_eventlog/win_eventlog_notwindows.go b/plugins/inputs/win_eventlog/win_eventlog_notwindows.go new file mode 100644 index 0000000000000..005077aa64c7d --- /dev/null +++ b/plugins/inputs/win_eventlog/win_eventlog_notwindows.go @@ -0,0 +1,5 @@ +// +build !windows + +//revive:disable-next-line:var-naming +// Package win_eventlog Input plugin to collect Windows Event Log messages +package win_eventlog diff --git a/plugins/inputs/win_eventlog/win_eventlog_test.go b/plugins/inputs/win_eventlog/win_eventlog_test.go new file mode 100644 index 0000000000000..9f922431ed776 --- /dev/null +++ b/plugins/inputs/win_eventlog/win_eventlog_test.go @@ -0,0 +1,136 @@ +//+build windows + +//revive:disable-next-line:var-naming +// Package win_eventlog Input plugin to collect Windows Event Log messages +package win_eventlog + +import ( + "testing" +) + +func TestWinEventLog_shouldExcludeEmptyField(t *testing.T) { + type args struct { + field string + fieldType string + fieldValue interface{} + } + tests := []struct { + name string + w *WinEventLog + args args + wantShould bool + }{ + { + name: "Not in list", + args: args{field: "qq", fieldType: "string", fieldValue: ""}, + wantShould: false, + w: &WinEventLog{ExcludeEmpty: []string{"te*"}}, + }, + { + name: "Empty string", + args: args{field: "test", fieldType: "string", fieldValue: ""}, + wantShould: true, + w: &WinEventLog{ExcludeEmpty: []string{"te*"}}, + }, + { + name: "Non-empty string", + args: args{field: "test", fieldType: "string", fieldValue: "qq"}, + wantShould: false, + w: &WinEventLog{ExcludeEmpty: []string{"te*"}}, + }, + { + name: "Zero int", + args: args{field: "test", fieldType: "int", fieldValue: int(0)}, + wantShould: true, + w: &WinEventLog{ExcludeEmpty: []string{"te*"}}, + }, + { + name: "Non-zero int", + args: args{field: "test", fieldType: "int", fieldValue: int(-1)}, + wantShould: false, + w: &WinEventLog{ExcludeEmpty: []string{"te*"}}, + }, + { + name: "Zero uint32", + args: args{field: "test", fieldType: "uint32", fieldValue: uint32(0)}, + wantShould: true, + w: &WinEventLog{ExcludeEmpty: []string{"te*"}}, + }, + { + name: "Non-zero uint32", + args: args{field: "test", fieldType: "uint32", fieldValue: uint32(0xc0fefeed)}, + wantShould: false, + w: &WinEventLog{ExcludeEmpty: []string{"te*"}}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if gotShould := tt.w.shouldExcludeEmptyField(tt.args.field, tt.args.fieldType, tt.args.fieldValue); gotShould != tt.wantShould { + t.Errorf("WinEventLog.shouldExcludeEmptyField() = %v, want %v", gotShould, tt.wantShould) + } + }) + } +} + +func TestWinEventLog_shouldProcessField(t *testing.T) { + tags := []string{"Source", "Level*"} + fields := []string{"EventID", "Message*"} + excluded := []string{"Message*"} + type args struct { + field string + } + tests := []struct { + name string + w *WinEventLog + args args + wantShould bool + wantList string + }{ + { + name: "Not in tags", + args: args{field: "test"}, + wantShould: false, + wantList: "excluded", + w: &WinEventLog{EventTags: tags, EventFields: fields, ExcludeFields: excluded}, + }, + { + name: "In Tags", + args: args{field: "LevelText"}, + wantShould: true, + wantList: "tags", + w: &WinEventLog{EventTags: tags, EventFields: fields, ExcludeFields: excluded}, + }, + { + name: "Not in Fields", + args: args{field: "EventId"}, + wantShould: false, + wantList: "excluded", + w: &WinEventLog{EventTags: tags, EventFields: fields, ExcludeFields: excluded}, + }, + { + name: "In Fields", + args: args{field: "EventID"}, + wantShould: true, + wantList: "fields", + w: &WinEventLog{EventTags: tags, EventFields: fields, ExcludeFields: excluded}, + }, + { + name: "In Fields and Excluded", + args: args{field: "Messages"}, + wantShould: false, + wantList: "excluded", + w: &WinEventLog{EventTags: tags, EventFields: fields, ExcludeFields: excluded}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotShould, gotList := tt.w.shouldProcessField(tt.args.field) + if gotShould != tt.wantShould { + t.Errorf("WinEventLog.shouldProcessField() gotShould = %v, want %v", gotShould, tt.wantShould) + } + if gotList != tt.wantList { + t.Errorf("WinEventLog.shouldProcessField() gotList = %v, want %v", gotList, tt.wantList) + } + }) + } +} diff --git a/plugins/inputs/win_eventlog/zsyscall_windows.go b/plugins/inputs/win_eventlog/zsyscall_windows.go new file mode 100644 index 0000000000000..5c7b0a504b0bf --- /dev/null +++ b/plugins/inputs/win_eventlog/zsyscall_windows.go @@ -0,0 +1,151 @@ +//+build windows + +//revive:disable-next-line:var-naming +// Package win_eventlog Input plugin to collect Windows Event Log messages +package win_eventlog + +import ( + "syscall" + "unsafe" + + "golang.org/x/sys/windows" +) + +var _ unsafe.Pointer + +// EvtHandle uintptr +type EvtHandle uintptr + +// Do the interface allocations only once for common +// Errno values. +const ( + //revive:disable-next-line:var-naming + errnoERROR_IO_PENDING = 997 +) + +var ( + //revive:disable-next-line:var-naming + errERROR_IO_PENDING error = syscall.Errno(errnoERROR_IO_PENDING) +) + +// EvtFormatMessageFlag defines the values that specify the message string from +// the event to format. +type EvtFormatMessageFlag uint32 + +// EVT_FORMAT_MESSAGE_FLAGS enumeration +// https://msdn.microsoft.com/en-us/library/windows/desktop/aa385525(v=vs.85).aspx +const ( + //revive:disable:var-naming + // Format the event's message string. + EvtFormatMessageEvent EvtFormatMessageFlag = iota + 1 + // Format the message string of the level specified in the event. + EvtFormatMessageLevel + // Format the message string of the task specified in the event. + EvtFormatMessageTask + // Format the message string of the task specified in the event. + EvtFormatMessageOpcode + // Format the message string of the keywords specified in the event. If the + // event specifies multiple keywords, the formatted string is a list of + // null-terminated strings. Increment through the strings until your pointer + // points past the end of the used buffer. + EvtFormatMessageKeyword + //revive:enable:var-naming +) + +// errnoErr returns common boxed Errno values, to prevent +// allocations at runtime. +func errnoErr(e syscall.Errno) error { + switch e { + case 0: + return nil + case errnoERROR_IO_PENDING: + return errERROR_IO_PENDING + } + + return e +} + +var ( + modwevtapi = windows.NewLazySystemDLL("wevtapi.dll") + + procEvtSubscribe = modwevtapi.NewProc("EvtSubscribe") + procEvtRender = modwevtapi.NewProc("EvtRender") + procEvtClose = modwevtapi.NewProc("EvtClose") + procEvtNext = modwevtapi.NewProc("EvtNext") + procEvtFormatMessage = modwevtapi.NewProc("EvtFormatMessage") + procEvtOpenPublisherMetadata = modwevtapi.NewProc("EvtOpenPublisherMetadata") +) + +func _EvtSubscribe(session EvtHandle, signalEvent uintptr, channelPath *uint16, query *uint16, bookmark EvtHandle, context uintptr, callback syscall.Handle, flags EvtSubscribeFlag) (handle EvtHandle, err error) { + r0, _, e1 := syscall.Syscall9(procEvtSubscribe.Addr(), 8, uintptr(session), uintptr(signalEvent), uintptr(unsafe.Pointer(channelPath)), uintptr(unsafe.Pointer(query)), uintptr(bookmark), uintptr(context), uintptr(callback), uintptr(flags), 0) + handle = EvtHandle(r0) + if handle == 0 { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = syscall.EINVAL + } + } + return +} + +func _EvtRender(context EvtHandle, fragment EvtHandle, flags EvtRenderFlag, bufferSize uint32, buffer *byte, bufferUsed *uint32, propertyCount *uint32) (err error) { + r1, _, e1 := syscall.Syscall9(procEvtRender.Addr(), 7, uintptr(context), uintptr(fragment), uintptr(flags), uintptr(bufferSize), uintptr(unsafe.Pointer(buffer)), uintptr(unsafe.Pointer(bufferUsed)), uintptr(unsafe.Pointer(propertyCount)), 0, 0) + if r1 == 0 { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = syscall.EINVAL + } + } + return +} + +func _EvtClose(object EvtHandle) (err error) { + r1, _, e1 := syscall.Syscall(procEvtClose.Addr(), 1, uintptr(object), 0, 0) + if r1 == 0 { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = syscall.EINVAL + } + } + return +} + +func _EvtNext(resultSet EvtHandle, eventArraySize uint32, eventArray *EvtHandle, timeout uint32, flags uint32, numReturned *uint32) (err error) { + r1, _, e1 := syscall.Syscall6(procEvtNext.Addr(), 6, uintptr(resultSet), uintptr(eventArraySize), uintptr(unsafe.Pointer(eventArray)), uintptr(timeout), uintptr(flags), uintptr(unsafe.Pointer(numReturned))) + if r1 == 0 { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = syscall.EINVAL + } + } + return +} + +func _EvtFormatMessage(publisherMetadata EvtHandle, event EvtHandle, messageID uint32, valueCount uint32, values uintptr, flags EvtFormatMessageFlag, bufferSize uint32, buffer *byte, bufferUsed *uint32) (err error) { + r1, _, e1 := syscall.Syscall9(procEvtFormatMessage.Addr(), 9, uintptr(publisherMetadata), uintptr(event), uintptr(messageID), uintptr(valueCount), uintptr(values), uintptr(flags), uintptr(bufferSize), uintptr(unsafe.Pointer(buffer)), uintptr(unsafe.Pointer(bufferUsed))) + if r1 == 0 { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = syscall.EINVAL + } + } + return +} + +func _EvtOpenPublisherMetadata(session EvtHandle, publisherIdentity *uint16, logFilePath *uint16, locale uint32, flags uint32) (handle EvtHandle, err error) { + r0, _, e1 := syscall.Syscall6(procEvtOpenPublisherMetadata.Addr(), 5, uintptr(session), uintptr(unsafe.Pointer(publisherIdentity)), uintptr(unsafe.Pointer(logFilePath)), uintptr(locale), uintptr(flags), 0) + handle = EvtHandle(r0) + if handle == 0 { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = syscall.EINVAL + } + } + return +} diff --git a/plugins/outputs/dynatrace/README.md b/plugins/outputs/dynatrace/README.md index be8bf7408c5e0..78a7ee4990067 100644 --- a/plugins/outputs/dynatrace/README.md +++ b/plugins/outputs/dynatrace/README.md @@ -2,6 +2,9 @@ This plugin is sending telegraf metrics to [Dynatrace](www.dynatrace.com). It has two operational modes. +Telegraf minimum version: Telegraf 1.16 +Plugin minimum tested version: 1.16 + ## Running alongside Dynatrace OneAgent if you run the Telegraf agent on a host or VM that is monitored by the Dynatrace OneAgent then you only need to enable the plugin but need no further configuration. The Dynatrace telegraf output plugin will send all metrics to the OneAgent which will use its secure and load balanced connection to send the metrics to your Dynatrace SaaS or Managed environment. diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index 8c8fa984d82a8..596366ae8470f 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -195,17 +195,26 @@ func (d *Dynatrace) Write(metrics []telegraf.Metric) error { // write metric id,tags and value switch metric.Type() { case telegraf.Counter: + var delta float64 = 0 + + // Check if LastValue exists if lastvalue, ok := counts[metricID+tagb.String()]; ok { - // only send a counter if a lastvalue is found in the map - // if last value is found we can calc and send the delta value - if v, err := strconv.ParseFloat(lastvalue, 32); err == nil { - if v2, err := strconv.ParseFloat(value, 32); err == nil { - fmt.Fprintf(&buf, "%s%s count,delta=%f\n", metricID, tagb.String(), v2-v) - } + // Convert Strings to Floats + floatLastValue, err := strconv.ParseFloat(lastvalue, 32) + if err != nil { + d.Log.Debugf("Could not parse last value: %s", lastvalue) + } + floatCurrentValue, err := strconv.ParseFloat(value, 32) + if err != nil { + d.Log.Debugf("Could not parse current value: %s", value) + } + if floatCurrentValue > floatLastValue { + delta = floatCurrentValue - floatLastValue + fmt.Fprintf(&buf, "%s%s count,delta=%f\n", metricID, tagb.String(), delta) } } - // put the current value into the map as last value counts[metricID+tagb.String()] = value + default: fmt.Fprintf(&buf, "%s%s %v\n", metricID, tagb.String(), value) } @@ -214,6 +223,7 @@ func (d *Dynatrace) Write(metrics []telegraf.Metric) error { } sent++ // in typical interval of 10s, we will clean the counter state once in 24h which is 8640 iterations + if sent%8640 == 0 { counts = make(map[string]string) } diff --git a/plugins/outputs/influxdb/http.go b/plugins/outputs/influxdb/http.go index 19ae6f31f45c6..57e3e918b8202 100644 --- a/plugins/outputs/influxdb/http.go +++ b/plugins/outputs/influxdb/http.go @@ -202,10 +202,12 @@ func (c *httpClient) Database() string { // Note that some names are not allowed by the server, notably those with // non-printable characters or slashes. func (c *httpClient) CreateDatabase(ctx context.Context, database string) error { - query := fmt.Sprintf(`CREATE DATABASE "%s"`, - escapeIdentifier.Replace(database)) + query := fmt.Sprintf(`CREATE DATABASE "%s"`, escapeIdentifier.Replace(database)) req, err := c.makeQueryRequest(query) + if err != nil { + return err + } resp, err := c.client.Do(req.WithContext(ctx)) if err != nil { diff --git a/plugins/outputs/influxdb_v2/README.md b/plugins/outputs/influxdb_v2/README.md index 1605bda0c725a..b176fffcd31e1 100644 --- a/plugins/outputs/influxdb_v2/README.md +++ b/plugins/outputs/influxdb_v2/README.md @@ -12,7 +12,7 @@ The InfluxDB output plugin writes metrics to the [InfluxDB v2.x] HTTP service. ## Multiple URLs can be specified for a single cluster, only ONE of the ## urls will be written to each interval. ## ex: urls = ["https://us-west-2-1.aws.cloud2.influxdata.com"] - urls = ["http://127.0.0.1:9999"] + urls = ["http://127.0.0.1:8086"] ## Token for authentication. token = "" diff --git a/plugins/outputs/influxdb_v2/influxdb.go b/plugins/outputs/influxdb_v2/influxdb.go index ccafbc4bd685a..6076297f8c83a 100644 --- a/plugins/outputs/influxdb_v2/influxdb.go +++ b/plugins/outputs/influxdb_v2/influxdb.go @@ -17,7 +17,7 @@ import ( ) var ( - defaultURL = "http://localhost:9999" + defaultURL = "http://localhost:8086" ErrMissingURL = errors.New("missing URL") ) @@ -28,7 +28,7 @@ var sampleConfig = ` ## Multiple URLs can be specified for a single cluster, only ONE of the ## urls will be written to each interval. ## ex: urls = ["https://us-west-2-1.aws.cloud2.influxdata.com"] - urls = ["http://127.0.0.1:9999"] + urls = ["http://127.0.0.1:8086"] ## Token for authentication. token = "" diff --git a/plugins/outputs/influxdb_v2/influxdb_test.go b/plugins/outputs/influxdb_v2/influxdb_test.go index 25af4fe6a4c6e..90a3823915a5b 100644 --- a/plugins/outputs/influxdb_v2/influxdb_test.go +++ b/plugins/outputs/influxdb_v2/influxdb_test.go @@ -16,7 +16,7 @@ func TestDefaultURL(t *testing.T) { if len(output.URLs) < 1 { t.Fatal("Default URL failed to get set") } - require.Equal(t, "http://localhost:9999", output.URLs[0]) + require.Equal(t, "http://localhost:8086", output.URLs[0]) } func TestConnect(t *testing.T) { tests := []struct { @@ -26,7 +26,7 @@ func TestConnect(t *testing.T) { { out: influxdb.InfluxDB{ URLs: []string{"http://localhost:1234"}, - HTTPProxy: "http://localhost:9999", + HTTPProxy: "http://localhost:8086", HTTPHeaders: map[string]string{ "x": "y", }, @@ -36,7 +36,7 @@ func TestConnect(t *testing.T) { err: true, out: influxdb.InfluxDB{ URLs: []string{"!@#$qwert"}, - HTTPProxy: "http://localhost:9999", + HTTPProxy: "http://localhost:8086", HTTPHeaders: map[string]string{ "x": "y", }, @@ -56,7 +56,7 @@ func TestConnect(t *testing.T) { err: true, out: influxdb.InfluxDB{ URLs: []string{"!@#$%^&*()_+"}, - HTTPProxy: "http://localhost:9999", + HTTPProxy: "http://localhost:8086", HTTPHeaders: map[string]string{ "x": "y", }, @@ -66,7 +66,7 @@ func TestConnect(t *testing.T) { err: true, out: influxdb.InfluxDB{ URLs: []string{":::@#$qwert"}, - HTTPProxy: "http://localhost:9999", + HTTPProxy: "http://localhost:8086", HTTPHeaders: map[string]string{ "x": "y", }, diff --git a/plugins/outputs/kafka/kafka.go b/plugins/outputs/kafka/kafka.go index d7071f257babc..26a0c5bdb9a65 100644 --- a/plugins/outputs/kafka/kafka.go +++ b/plugins/outputs/kafka/kafka.go @@ -78,8 +78,9 @@ type DebugLogger struct { func (*DebugLogger) Print(v ...interface{}) { args := make([]interface{}, 0, len(v)+1) - args = append(args, "D! [sarama] ") - log.Print(v...) + args = append(append(args, "D! [sarama] "), v...) + log.Print(args...) + } func (*DebugLogger) Printf(format string, v ...interface{}) { @@ -88,7 +89,7 @@ func (*DebugLogger) Printf(format string, v ...interface{}) { func (*DebugLogger) Println(v ...interface{}) { args := make([]interface{}, 0, len(v)+1) - args = append(args, "D! [sarama] ") + args = append(append(args, "D! [sarama] "), v...) log.Println(args...) } diff --git a/plugins/outputs/sumologic/README.md b/plugins/outputs/sumologic/README.md index 165315121f434..d3a90df373686 100644 --- a/plugins/outputs/sumologic/README.md +++ b/plugins/outputs/sumologic/README.md @@ -3,6 +3,8 @@ This plugin sends metrics to [Sumo Logic HTTP Source](https://help.sumologic.com/03Send-Data/Sources/02Sources-for-Hosted-Collectors/HTTP-Source/Upload-Metrics-to-an-HTTP-Source) in HTTP messages, encoded using one of the output data formats. +Telegraf minimum version: Telegraf 1.16.0 + Currently metrics can be sent using one of the following data formats, supported by Sumologic HTTP Source: diff --git a/plugins/outputs/sumologic/sumologic.go b/plugins/outputs/sumologic/sumologic.go index 42ffc3dd3ef6d..5497da6066478 100644 --- a/plugins/outputs/sumologic/sumologic.go +++ b/plugins/outputs/sumologic/sumologic.go @@ -122,11 +122,19 @@ func (s *SumoLogic) SetSerializer(serializer serializers.Serializer) { s.headers = make(map[string]string) } - switch serializer.(type) { + switch sr := serializer.(type) { case *carbon2.Serializer: s.headers[contentTypeHeader] = carbon2ContentType + + // In case Carbon2 is used and the metrics format was unset, default to + // include field in metric name. + if sr.IsMetricsFormatUnset() { + sr.SetMetricsFormat(carbon2.Carbon2FormatMetricIncludesField) + } + case *graphite.GraphiteSerializer: s.headers[contentTypeHeader] = graphiteContentType + case *prometheus.Serializer: s.headers[contentTypeHeader] = prometheusContentType diff --git a/plugins/outputs/sumologic/sumologic_test.go b/plugins/outputs/sumologic/sumologic_test.go index ff9e39d8f58bf..9c86e0b80549f 100644 --- a/plugins/outputs/sumologic/sumologic_test.go +++ b/plugins/outputs/sumologic/sumologic_test.go @@ -2,6 +2,7 @@ package sumologic import ( "bufio" + "bytes" "compress/gzip" "fmt" "io" @@ -20,7 +21,6 @@ import ( "github.com/influxdata/telegraf/config" "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/metric" - "github.com/influxdata/telegraf/plugins/serializers" "github.com/influxdata/telegraf/plugins/serializers/carbon2" "github.com/influxdata/telegraf/plugins/serializers/graphite" "github.com/influxdata/telegraf/plugins/serializers/prometheus" @@ -135,7 +135,7 @@ func TestMethod(t *testing.T) { w.WriteHeader(http.StatusOK) }) - serializer, err := carbon2.NewSerializer(carbon2.Carbon2FormatFieldSeparate) + serializer, err := carbon2.NewSerializer(string(carbon2.Carbon2FormatFieldSeparate)) require.NoError(t, err) plugin := tt.plugin() @@ -212,7 +212,7 @@ func TestStatusCode(t *testing.T) { w.WriteHeader(tt.statusCode) }) - serializer, err := carbon2.NewSerializer(carbon2.Carbon2FormatFieldSeparate) + serializer, err := carbon2.NewSerializer(string(carbon2.Carbon2FormatFieldSeparate)) require.NoError(t, err) tt.plugin.SetSerializer(serializer) @@ -226,77 +226,88 @@ func TestStatusCode(t *testing.T) { } func TestContentType(t *testing.T) { - ts := httptest.NewServer(http.NotFoundHandler()) - defer ts.Close() - - ts.Config.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.WriteHeader(http.StatusOK) - }) - - u, err := url.Parse(fmt.Sprintf("http://%s", ts.Listener.Addr().String())) - require.NoError(t, err) - - carbon2Serializer, err := carbon2.NewSerializer(carbon2.Carbon2FormatFieldSeparate) - require.NoError(t, err) - tests := []struct { - name string - plugin func() *SumoLogic - expectedErr bool - serializer serializers.Serializer + name string + plugin func() *SumoLogic + expectedBody []byte }{ { - name: "carbon2 is supported", + name: "carbon2 (data format = field separate) is supported", plugin: func() *SumoLogic { s := Default() - s.URL = u.String() s.headers = map[string]string{ contentTypeHeader: carbon2ContentType, } + sr, err := carbon2.NewSerializer(string(carbon2.Carbon2FormatFieldSeparate)) + require.NoError(t, err) + s.SetSerializer(sr) return s }, - serializer: carbon2Serializer, - expectedErr: false, + expectedBody: []byte("metric=cpu field=value 42 0\n"), + }, + { + name: "carbon2 (data format = metric includes field) is supported", + plugin: func() *SumoLogic { + s := Default() + s.headers = map[string]string{ + contentTypeHeader: carbon2ContentType, + } + sr, err := carbon2.NewSerializer(string(carbon2.Carbon2FormatMetricIncludesField)) + require.NoError(t, err) + s.SetSerializer(sr) + return s + }, + expectedBody: []byte("metric=cpu_value 42 0\n"), }, { name: "graphite is supported", plugin: func() *SumoLogic { s := Default() - s.URL = u.String() s.headers = map[string]string{ contentTypeHeader: graphiteContentType, } + s.SetSerializer(&graphite.GraphiteSerializer{}) return s }, - serializer: &graphite.GraphiteSerializer{}, - expectedErr: false, }, { name: "prometheus is supported", plugin: func() *SumoLogic { s := Default() - s.URL = u.String() s.headers = map[string]string{ contentTypeHeader: prometheusContentType, } + s.SetSerializer(&prometheus.Serializer{}) return s }, - serializer: &prometheus.Serializer{}, - expectedErr: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - plugin := tt.plugin() - - plugin.SetSerializer(tt.serializer) + var body bytes.Buffer + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gz, err := gzip.NewReader(r.Body) + require.NoError(t, err) + io.Copy(&body, gz) + w.WriteHeader(http.StatusOK) + })) + defer ts.Close() - err := plugin.Connect() + u, err := url.Parse(fmt.Sprintf("http://%s", ts.Listener.Addr().String())) require.NoError(t, err) + plugin := tt.plugin() + plugin.URL = u.String() + + require.NoError(t, plugin.Connect()) + err = plugin.Write([]telegraf.Metric{getMetric(t)}) require.NoError(t, err) + + if tt.expectedBody != nil { + require.Equal(t, string(tt.expectedBody), body.String()) + } }) } } @@ -338,7 +349,7 @@ func TestContentEncodingGzip(t *testing.T) { w.WriteHeader(http.StatusNoContent) }) - serializer, err := carbon2.NewSerializer(carbon2.Carbon2FormatFieldSeparate) + serializer, err := carbon2.NewSerializer(string(carbon2.Carbon2FormatFieldSeparate)) require.NoError(t, err) plugin := tt.plugin() @@ -374,7 +385,7 @@ func TestDefaultUserAgent(t *testing.T) { MaxRequstBodySize: Default().MaxRequstBodySize, } - serializer, err := carbon2.NewSerializer(carbon2.Carbon2FormatFieldSeparate) + serializer, err := carbon2.NewSerializer(string(carbon2.Carbon2FormatFieldSeparate)) require.NoError(t, err) plugin.SetSerializer(serializer) @@ -627,7 +638,7 @@ func TestMaxRequestBodySize(t *testing.T) { w.WriteHeader(http.StatusOK) }) - serializer, err := carbon2.NewSerializer(carbon2.Carbon2FormatFieldSeparate) + serializer, err := carbon2.NewSerializer(string(carbon2.Carbon2FormatFieldSeparate)) require.NoError(t, err) plugin := tt.plugin() @@ -659,7 +670,7 @@ func TestTryingToSendEmptyMetricsDoesntFail(t *testing.T) { plugin := Default() plugin.URL = u.String() - serializer, err := carbon2.NewSerializer(carbon2.Carbon2FormatFieldSeparate) + serializer, err := carbon2.NewSerializer(string(carbon2.Carbon2FormatFieldSeparate)) require.NoError(t, err) plugin.SetSerializer(serializer) diff --git a/plugins/outputs/warp10/warp10.go b/plugins/outputs/warp10/warp10.go index 73eefbf722deb..b5996f6380a40 100644 --- a/plugins/outputs/warp10/warp10.go +++ b/plugins/outputs/warp10/warp10.go @@ -138,7 +138,12 @@ func (w *Warp10) Write(metrics []telegraf.Metric) error { return nil } - req, err := http.NewRequest("POST", w.WarpURL+"/api/v0/update", bytes.NewBufferString(payload)) + addr := w.WarpURL + "/api/v0/update" + req, err := http.NewRequest("POST", addr, bytes.NewBufferString(payload)) + if err != nil { + return fmt.Errorf("unable to create new request '%s': %s", addr, err) + } + req.Header.Set("X-Warp10-Token", w.Token) req.Header.Set("Content-Type", "text/plain") diff --git a/plugins/parsers/influx/parser.go b/plugins/parsers/influx/parser.go index 620104ac6b93b..f85435ed54644 100644 --- a/plugins/parsers/influx/parser.go +++ b/plugins/parsers/influx/parser.go @@ -33,9 +33,9 @@ type ParseError struct { func (e *ParseError) Error() string { buffer := e.buf[e.LineOffset:] - eol := strings.IndexAny(buffer, "\r\n") + eol := strings.IndexAny(buffer, "\n") if eol >= 0 { - buffer = buffer[:eol] + buffer = strings.TrimSuffix(buffer[:eol], "\r") } if len(buffer) > maxErrorBufferSize { startEllipsis := true diff --git a/plugins/parsers/influx/parser_test.go b/plugins/parsers/influx/parser_test.go index 569eb3a22e7c0..5c780f070fce5 100644 --- a/plugins/parsers/influx/parser_test.go +++ b/plugins/parsers/influx/parser_test.go @@ -751,6 +751,18 @@ func TestSeriesParser(t *testing.T) { buf: "cpu,a=", }, }, + { + name: "error with carriage return in long line", + input: []byte("cpu,a=" + strings.Repeat("x", maxErrorBufferSize) + "\rcd,b"), + metrics: []telegraf.Metric{}, + err: &ParseError{ + Offset: 1031, + LineNumber: 1, + Column: 1032, + msg: "parse error", + buf: "cpu,a=" + strings.Repeat("x", maxErrorBufferSize) + "\rcd,b", + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -762,6 +774,9 @@ func TestSeriesParser(t *testing.T) { metrics, err := parser.Parse(tt.input) require.Equal(t, tt.err, err) + if err != nil { + require.Equal(t, tt.err.Error(), err.Error()) + } require.Equal(t, len(tt.metrics), len(metrics)) for i, expected := range tt.metrics { diff --git a/plugins/processors/starlark/README.md b/plugins/processors/starlark/README.md index 961151a7fc092..1b541c33857ed 100644 --- a/plugins/processors/starlark/README.md +++ b/plugins/processors/starlark/README.md @@ -91,6 +91,14 @@ While Starlark is similar to Python, there are important differences to note: except is yield ``` +### Libraries available + +The ability to load external scripts other than your own is pretty limited. The following libraries are available for loading: + +* json: `load("json.star", "json")` provides the following functions: `json.encode()`, `json.decode()`, `json.indent()`. See [json.star](/plugins/processors/starlark/testdata/json.star) for an example. + +If you would like to see support for something else here, please open an issue. + ### Common Questions **How can I drop/delete a metric?** @@ -149,11 +157,12 @@ Attempting to modify the global scope will fail with an error. ### Examples +- [json](/plugins/processors/starlark/testdata/json.star) - an example of processing JSON from a field in a metric +- [number logic](/plugins/processors/starlark/testdata/number_logic.star) - transform a numerical value to another numerical value +- [pivot](/plugins/processors/starlark/testdata/pivot.star) - Pivots a key's value to be the key for another key. - [ratio](/plugins/processors/starlark/testdata/ratio.star) - Compute the ratio of two integer fields - [rename](/plugins/processors/starlark/testdata/rename.star) - Rename tags or fields using a name mapping. - [scale](/plugins/processors/starlark/testdata/scale.star) - Multiply any field by a number -- [number logic](/plugins/processors/starlark/testdata/number_logic.star) - transform a numerical value to another numerical value -- [pivot](/plugins/processors/starlark/testdata/pivot.star) - Pivots a key's value to be the key for another key. - [value filter](/plugins/processors/starlark/testdata/value_filter.star) - remove a metric based on a field value. [All examples](/plugins/processors/starlark/testdata) are in the testdata folder. diff --git a/plugins/processors/starlark/starlark.go b/plugins/processors/starlark/starlark.go index e2002a146aa68..cf791b3f155e3 100644 --- a/plugins/processors/starlark/starlark.go +++ b/plugins/processors/starlark/starlark.go @@ -9,6 +9,7 @@ import ( "github.com/influxdata/telegraf/plugins/processors" "go.starlark.net/resolve" "go.starlark.net/starlark" + "go.starlark.net/starlarkjson" ) const ( @@ -51,6 +52,7 @@ func (s *Starlark) Init() error { s.thread = &starlark.Thread{ Print: func(_ *starlark.Thread, msg string) { s.Log.Debug(msg) }, + Load: loadFunc, } builtins := starlark.StringDict{} @@ -213,3 +215,14 @@ func init() { return &Starlark{} }) } + +func loadFunc(thread *starlark.Thread, module string) (starlark.StringDict, error) { + switch module { + case "json.star": + return starlark.StringDict{ + "json": starlarkjson.Module, + }, nil + default: + return nil, errors.New("module " + module + " is not available") + } +} diff --git a/plugins/processors/starlark/testdata/json.star b/plugins/processors/starlark/testdata/json.star new file mode 100644 index 0000000000000..78b6a8458b7de --- /dev/null +++ b/plugins/processors/starlark/testdata/json.star @@ -0,0 +1,18 @@ +# Example of parsing json out of a field and modifying the metric with it. +# this is great to use in conjunction with the value parser. +# +# Example Input: +# json value="{\"label\": \"hero\", \"count\": 14}" 1465839830100400201 +# +# Example Output: +# json,label=hero count=14i 1465839830100400201 + +load("json.star", "json") +# loads json.encode(), json.decode(), json.indent() + +def apply(metric): + j = json.decode(metric.fields.get('value')) + metric.fields.pop('value') + metric.tags["label"] = j["label"] + metric.fields["count"] = j["count"] + return metric diff --git a/plugins/processors/streamingprocessor.go b/plugins/processors/streamingprocessor.go index 95b2e0748d771..95ebae2142b7a 100644 --- a/plugins/processors/streamingprocessor.go +++ b/plugins/processors/streamingprocessor.go @@ -2,6 +2,7 @@ package processors import ( "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/models" ) // NewStreamingProcessorFromProcessor is a converter that turns a standard @@ -16,6 +17,7 @@ func NewStreamingProcessorFromProcessor(p telegraf.Processor) telegraf.Streaming type streamingProcessor struct { processor telegraf.Processor acc telegraf.Accumulator + Log telegraf.Logger } func (sp *streamingProcessor) SampleConfig() string { @@ -46,6 +48,7 @@ func (sp *streamingProcessor) Stop() error { // to call the Init method of the wrapped processor if // needed func (sp *streamingProcessor) Init() error { + models.SetLoggerOnPlugin(sp.processor, sp.Log) if p, ok := sp.processor.(telegraf.Initializer); ok { err := p.Init() if err != nil { diff --git a/plugins/serializers/carbon2/carbon2.go b/plugins/serializers/carbon2/carbon2.go index 10611815b8a7e..1b05d4cb2d4c7 100644 --- a/plugins/serializers/carbon2/carbon2.go +++ b/plugins/serializers/carbon2/carbon2.go @@ -12,35 +12,35 @@ import ( type format string const ( - Carbon2FormatFieldSeparate string = "field_separate" - Carbon2FormatMetricIncludesField string = "metric_includes_field" - - formatFieldSeparate = format(Carbon2FormatFieldSeparate) - formatMetricIncludesField = format(Carbon2FormatMetricIncludesField) + carbon2FormatFieldEmpty = format("") + Carbon2FormatFieldSeparate = format("field_separate") + Carbon2FormatMetricIncludesField = format("metric_includes_field") ) -var formats = map[string]format{ - // Field separate is the default when no format specified. - "": formatFieldSeparate, - Carbon2FormatFieldSeparate: formatFieldSeparate, - Carbon2FormatMetricIncludesField: formatMetricIncludesField, +var formats = map[format]struct{}{ + carbon2FormatFieldEmpty: {}, + Carbon2FormatFieldSeparate: {}, + Carbon2FormatMetricIncludesField: {}, } type Serializer struct { metricsFormat format } -func NewSerializer(f string) (*Serializer, error) { - var ( - ok bool - metricsFormat format - ) - if metricsFormat, ok = formats[f]; !ok { +func NewSerializer(metricsFormat string) (*Serializer, error) { + var f = format(metricsFormat) + + if _, ok := formats[f]; !ok { return nil, fmt.Errorf("unknown carbon2 format: %s", f) } + // When unset, default to field separate. + if f == carbon2FormatFieldEmpty { + f = Carbon2FormatFieldSeparate + } + return &Serializer{ - metricsFormat: metricsFormat, + metricsFormat: f, }, nil } @@ -58,17 +58,20 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { func (s *Serializer) createObject(metric telegraf.Metric) []byte { var m bytes.Buffer + metricsFormat := s.getMetricsFormat() + for fieldName, fieldValue := range metric.Fields() { - if !isNumeric(fieldValue) { + if isString(fieldValue) { continue } - switch s.metricsFormat { - case formatFieldSeparate: + switch metricsFormat { + case Carbon2FormatFieldSeparate: m.WriteString(serializeMetricFieldSeparate( metric.Name(), fieldName, )) - case formatMetricIncludesField: + + case Carbon2FormatMetricIncludesField: m.WriteString(serializeMetricIncludeField( metric.Name(), fieldName, )) @@ -85,7 +88,7 @@ func (s *Serializer) createObject(metric telegraf.Metric) []byte { m.WriteString(" ") } m.WriteString(" ") - m.WriteString(fmt.Sprintf("%v", fieldValue)) + m.WriteString(formatValue(fieldValue)) m.WriteString(" ") m.WriteString(strconv.FormatInt(metric.Time().Unix(), 10)) m.WriteString("\n") @@ -93,6 +96,18 @@ func (s *Serializer) createObject(metric telegraf.Metric) []byte { return m.Bytes() } +func (s *Serializer) SetMetricsFormat(f format) { + s.metricsFormat = f +} + +func (s *Serializer) getMetricsFormat() format { + return s.metricsFormat +} + +func (s *Serializer) IsMetricsFormatUnset() bool { + return s.metricsFormat == carbon2FormatFieldEmpty +} + func serializeMetricFieldSeparate(name, fieldName string) string { return fmt.Sprintf("metric=%s field=%s ", strings.Replace(name, " ", "_", -1), @@ -107,11 +122,33 @@ func serializeMetricIncludeField(name, fieldName string) string { ) } -func isNumeric(v interface{}) bool { +func formatValue(fieldValue interface{}) string { + switch v := fieldValue.(type) { + case bool: + // Print bools as 0s and 1s + return fmt.Sprintf("%d", bool2int(v)) + default: + return fmt.Sprintf("%v", v) + } +} + +func isString(v interface{}) bool { switch v.(type) { case string: - return false - default: return true + default: + return false + } +} + +func bool2int(b bool) int { + // Slightly more optimized than a usual if ... return ... else return ... . + // See: https://0x0f.me/blog/golang-compiler-optimization/ + var i int + if b { + i = 1 + } else { + i = 0 } + return i } diff --git a/plugins/serializers/carbon2/carbon2_test.go b/plugins/serializers/carbon2/carbon2_test.go index aadc55f7ede96..7ed98d6e6d6da 100644 --- a/plugins/serializers/carbon2/carbon2_test.go +++ b/plugins/serializers/carbon2/carbon2_test.go @@ -31,7 +31,7 @@ func TestSerializeMetricFloat(t *testing.T) { require.NoError(t, err) testcases := []struct { - format string + format format expected string }{ { @@ -45,8 +45,8 @@ func TestSerializeMetricFloat(t *testing.T) { } for _, tc := range testcases { - t.Run(tc.format, func(t *testing.T) { - s, err := NewSerializer(tc.format) + t.Run(string(tc.format), func(t *testing.T) { + s, err := NewSerializer(string(tc.format)) require.NoError(t, err) buf, err := s.Serialize(m) @@ -69,7 +69,7 @@ func TestSerializeMetricWithEmptyStringTag(t *testing.T) { require.NoError(t, err) testcases := []struct { - format string + format format expected string }{ { @@ -83,8 +83,8 @@ func TestSerializeMetricWithEmptyStringTag(t *testing.T) { } for _, tc := range testcases { - t.Run(tc.format, func(t *testing.T) { - s, err := NewSerializer(tc.format) + t.Run(string(tc.format), func(t *testing.T) { + s, err := NewSerializer(string(tc.format)) require.NoError(t, err) buf, err := s.Serialize(m) @@ -107,7 +107,7 @@ func TestSerializeWithSpaces(t *testing.T) { require.NoError(t, err) testcases := []struct { - format string + format format expected string }{ { @@ -121,8 +121,8 @@ func TestSerializeWithSpaces(t *testing.T) { } for _, tc := range testcases { - t.Run(tc.format, func(t *testing.T) { - s, err := NewSerializer(tc.format) + t.Run(string(tc.format), func(t *testing.T) { + s, err := NewSerializer(string(tc.format)) require.NoError(t, err) buf, err := s.Serialize(m) @@ -145,7 +145,7 @@ func TestSerializeMetricInt(t *testing.T) { require.NoError(t, err) testcases := []struct { - format string + format format expected string }{ { @@ -159,8 +159,8 @@ func TestSerializeMetricInt(t *testing.T) { } for _, tc := range testcases { - t.Run(tc.format, func(t *testing.T) { - s, err := NewSerializer(tc.format) + t.Run(string(tc.format), func(t *testing.T) { + s, err := NewSerializer(string(tc.format)) require.NoError(t, err) buf, err := s.Serialize(m) @@ -183,7 +183,7 @@ func TestSerializeMetricString(t *testing.T) { assert.NoError(t, err) testcases := []struct { - format string + format format expected string }{ { @@ -196,12 +196,69 @@ func TestSerializeMetricString(t *testing.T) { }, } + for _, tc := range testcases { + t.Run(string(tc.format), func(t *testing.T) { + s, err := NewSerializer(string(tc.format)) + require.NoError(t, err) + + buf, err := s.Serialize(m) + require.NoError(t, err) + + assert.Equal(t, tc.expected, string(buf)) + }) + } +} + +func TestSerializeMetricBool(t *testing.T) { + requireMetric := func(t *testing.T, tim time.Time, value bool) telegraf.Metric { + tags := map[string]string{ + "tag_name": "tag_value", + } + fields := map[string]interface{}{ + "java_lang_GarbageCollector_Valid": value, + } + + m, err := metric.New("cpu", tags, fields, tim) + require.NoError(t, err) + + return m + } + + now := time.Now() + + testcases := []struct { + metric telegraf.Metric + format string + expected string + }{ + { + metric: requireMetric(t, now, false), + format: string(Carbon2FormatFieldSeparate), + expected: fmt.Sprintf("metric=cpu field=java_lang_GarbageCollector_Valid tag_name=tag_value 0 %d\n", now.Unix()), + }, + { + metric: requireMetric(t, now, false), + format: string(Carbon2FormatMetricIncludesField), + expected: fmt.Sprintf("metric=cpu_java_lang_GarbageCollector_Valid tag_name=tag_value 0 %d\n", now.Unix()), + }, + { + metric: requireMetric(t, now, true), + format: string(Carbon2FormatFieldSeparate), + expected: fmt.Sprintf("metric=cpu field=java_lang_GarbageCollector_Valid tag_name=tag_value 1 %d\n", now.Unix()), + }, + { + metric: requireMetric(t, now, true), + format: string(Carbon2FormatMetricIncludesField), + expected: fmt.Sprintf("metric=cpu_java_lang_GarbageCollector_Valid tag_name=tag_value 1 %d\n", now.Unix()), + }, + } + for _, tc := range testcases { t.Run(tc.format, func(t *testing.T) { s, err := NewSerializer(tc.format) require.NoError(t, err) - buf, err := s.Serialize(m) + buf, err := s.Serialize(tc.metric) require.NoError(t, err) assert.Equal(t, tc.expected, string(buf)) @@ -224,7 +281,7 @@ func TestSerializeBatch(t *testing.T) { metrics := []telegraf.Metric{m, m} testcases := []struct { - format string + format format expected string }{ { @@ -242,8 +299,8 @@ metric=cpu_value 42 0 } for _, tc := range testcases { - t.Run(tc.format, func(t *testing.T) { - s, err := NewSerializer(tc.format) + t.Run(string(tc.format), func(t *testing.T) { + s, err := NewSerializer(string(tc.format)) require.NoError(t, err) buf, err := s.SerializeBatch(metrics) diff --git a/scripts/alpine.docker b/scripts/alpine.docker index 395cbd8a33bc7..4c83e322d277e 100644 --- a/scripts/alpine.docker +++ b/scripts/alpine.docker @@ -1,4 +1,4 @@ -FROM golang:1.14.7 as builder +FROM golang:1.15.2 as builder WORKDIR /go/src/github.com/influxdata/telegraf COPY . /go/src/github.com/influxdata/telegraf diff --git a/scripts/buster.docker b/scripts/buster.docker new file mode 100644 index 0000000000000..3919d8ca5fd20 --- /dev/null +++ b/scripts/buster.docker @@ -0,0 +1,15 @@ +FROM golang:1.15.2-buster as builder +WORKDIR /go/src/github.com/influxdata/telegraf + +COPY . /go/src/github.com/influxdata/telegraf +RUN make go-install + +FROM buildpack-deps:buster-curl +COPY --from=builder /go/bin/* /usr/bin/ +COPY etc/telegraf.conf /etc/telegraf/telegraf.conf + +EXPOSE 8125/udp 8092/udp 8094 + +COPY scripts/docker-entrypoint.sh /entrypoint.sh +ENTRYPOINT ["/entrypoint.sh"] +CMD ["telegraf"] diff --git a/scripts/ci-1.14.docker b/scripts/ci-1.14.docker index 8f7a0fd449bd0..af3559460b3bd 100644 --- a/scripts/ci-1.14.docker +++ b/scripts/ci-1.14.docker @@ -1,4 +1,4 @@ -FROM golang:1.14.5 +FROM golang:1.14.9 RUN chmod -R 755 "$GOPATH" diff --git a/scripts/ci-1.13.docker b/scripts/ci-1.15.docker similarity index 66% rename from scripts/ci-1.13.docker rename to scripts/ci-1.15.docker index 6f175a1215429..65230db5f6f3b 100644 --- a/scripts/ci-1.13.docker +++ b/scripts/ci-1.15.docker @@ -1,4 +1,4 @@ -FROM golang:1.13.13 +FROM golang:1.15.2 RUN chmod -R 755 "$GOPATH" @@ -21,8 +21,3 @@ RUN locale-gen C.UTF-8 || true ENV LANG=C.UTF-8 RUN gem install fpm - -RUN go get -d github.com/golang/dep && \ - cd src/github.com/golang/dep && \ - git checkout -q v0.5.0 && \ - go install -ldflags="-X main.version=v0.5.0" ./cmd/dep diff --git a/scripts/stretch.docker b/scripts/stretch.docker index 642421513c65d..39c6e6c1a49d3 100644 --- a/scripts/stretch.docker +++ b/scripts/stretch.docker @@ -1,4 +1,4 @@ -FROM golang:1.14.7-stretch as builder +FROM golang:1.14.9-stretch as builder WORKDIR /go/src/github.com/influxdata/telegraf COPY . /go/src/github.com/influxdata/telegraf From 6b00413a1eb64c4e082b228d5594caf5ccfaf6dd Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Fri, 9 Oct 2020 11:44:52 +0200 Subject: [PATCH 49/51] removed blank line --- plugins/outputs/dynatrace/dynatrace.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index 596366ae8470f..2a837c3d54a36 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -300,4 +300,4 @@ func init() { Timeout: internal.Duration{Duration: time.Second * 5}, } }) -} +} \ No newline at end of file From 18a60d3b67246dfcf4e7b9c20b1b3ca30a277e83 Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Fri, 9 Oct 2020 12:23:06 +0200 Subject: [PATCH 50/51] go fmtted file --- plugins/outputs/dynatrace/dynatrace.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/outputs/dynatrace/dynatrace.go b/plugins/outputs/dynatrace/dynatrace.go index 2a837c3d54a36..596366ae8470f 100644 --- a/plugins/outputs/dynatrace/dynatrace.go +++ b/plugins/outputs/dynatrace/dynatrace.go @@ -300,4 +300,4 @@ func init() { Timeout: internal.Duration{Duration: time.Second * 5}, } }) -} \ No newline at end of file +} From 06f02599b68b4dc261565260cafa8ddedcccc246 Mon Sep 17 00:00:00 2001 From: Thomas Schuetz Date: Wed, 14 Oct 2020 07:38:00 +0200 Subject: [PATCH 51/51] fixed broken link --- plugins/outputs/dynatrace/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/outputs/dynatrace/README.md b/plugins/outputs/dynatrace/README.md index 78a7ee4990067..ea4b42777752d 100644 --- a/plugins/outputs/dynatrace/README.md +++ b/plugins/outputs/dynatrace/README.md @@ -1,6 +1,6 @@ # Dynatrace Output Plugin -This plugin is sending telegraf metrics to [Dynatrace](www.dynatrace.com). It has two operational modes. +This plugin is sending telegraf metrics to [Dynatrace](https://www.dynatrace.com). It has two operational modes. Telegraf minimum version: Telegraf 1.16 Plugin minimum tested version: 1.16