From dcd1357eacede40e45f674ba8740d3020960f04f Mon Sep 17 00:00:00 2001 From: ilyam8 Date: Wed, 4 Jan 2023 12:19:50 +0200 Subject: [PATCH] feat(nvidia-smi): collect Voltage (XML only) --- modules/nvidia_smi/README.md | 1 + modules/nvidia_smi/charts.go | 16 ++++++++++++++++ modules/nvidia_smi/collect_xml.go | 4 ++++ modules/nvidia_smi/nvidia_smi_test.go | 1 + 4 files changed, 22 insertions(+) diff --git a/modules/nvidia_smi/README.md b/modules/nvidia_smi/README.md index 24aef6b1a..c8b02c632 100644 --- a/modules/nvidia_smi/README.md +++ b/modules/nvidia_smi/README.md @@ -34,6 +34,7 @@ Labels per scope: | gpu_frame_buffer_memory_usage | gpu | free, used, reserved | B | yes | yes | | gpu_bar1_memory_usage | gpu | free, used | B | yes | no | | gpu_temperature | gpu | temperature | Celsius | yes | yes | +| gpu_voltage | gpu | voltage | V | yes | no | | gpu_clock_freq | gpu | graphics, video, sm, mem | MHz | yes | yes | | gpu_power_draw | gpu | power_draw | Watts | yes | yes | | gpu_performance_state | gpu | P0-P15 | state | yes | yes | diff --git a/modules/nvidia_smi/charts.go b/modules/nvidia_smi/charts.go index 6fd49e902..aeaec83e8 100644 --- a/modules/nvidia_smi/charts.go +++ b/modules/nvidia_smi/charts.go @@ -19,6 +19,7 @@ const ( prioGPUFBMemoryUsage prioGPUBAR1MemoryUsage prioGPUTemperatureChart + prioGPUVoltageChart prioGPUClockFreq prioGPUPowerDraw prioGPUPerformanceState @@ -34,6 +35,7 @@ var ( gpuEncoderUtilizationChartTmpl.Copy(), gpuFrameBufferMemoryUsageChartTmpl.Copy(), gpuBAR1MemoryUsageChartTmpl.Copy(), + gpuVoltageChartTmpl.Copy(), gpuTemperatureChartTmpl.Copy(), gpuClockFreqChartTmpl.Copy(), gpuPowerDrawChartTmpl.Copy(), @@ -158,6 +160,17 @@ var ( {ID: "gpu_%s_temperature", Name: "temperature"}, }, } + gpuVoltageChartTmpl = module.Chart{ + ID: "gpu_%s_voltage", + Title: "Voltage", + Units: "V", + Fam: "voltage", + Ctx: "nvidia_smi.gpu_voltage", + Priority: prioGPUVoltageChart, + Dims: module.Dims{ + {ID: "gpu_%s_voltage", Name: "voltage", Div: 1000}, // mV => V + }, + } gpuClockFreqChartTmpl = module.Chart{ ID: "gpu_%s_clock_freq", Title: "Clock current frequency", @@ -219,6 +232,9 @@ func (nv *NvidiaSMI) addGPUXMLCharts(gpu xmlGPUInfo) { if !isValidValue(gpu.PowerReadings.PowerDraw) { _ = charts.Remove(gpuPowerDrawChartTmpl.ID) } + if !isValidValue(gpu.Voltage.GraphicsVolt) { + _ = charts.Remove(gpuVoltageChartTmpl.ID) + } for _, c := range *charts { c.ID = fmt.Sprintf(c.ID, strings.ToLower(gpu.UUID)) diff --git a/modules/nvidia_smi/collect_xml.go b/modules/nvidia_smi/collect_xml.go index f76f0700e..02fa69235 100644 --- a/modules/nvidia_smi/collect_xml.go +++ b/modules/nvidia_smi/collect_xml.go @@ -53,6 +53,7 @@ func (nv *NvidiaSMI) collectGPUInfoXML(mx map[string]int64) error { addMetric(mx, px+"sm_clock", gpu.Clocks.SmClock, 0) addMetric(mx, px+"mem_clock", gpu.Clocks.MemClock, 0) addMetric(mx, px+"power_draw", gpu.PowerReadings.PowerDraw, 0) + addMetric(mx, px+"voltage", gpu.Voltage.GraphicsVolt, 0) for i := 0; i < 16; i++ { if s := "P" + strconv.Itoa(i); gpu.PerformanceState == s { mx[px+"performance_state_"+s] = 1 @@ -130,6 +131,9 @@ type ( MinPowerLimit string `xml:"min_power_limit"` MaxPowerLimit string `xml:"max_power_limit"` } `xml:"power_readings"` + Voltage struct { + GraphicsVolt string `xml:"graphics_volt"` + } `xml:"voltage"` Processes struct { ProcessInfo []struct { PID string `xml:"pid"` diff --git a/modules/nvidia_smi/nvidia_smi_test.go b/modules/nvidia_smi/nvidia_smi_test.go index c6dcd36a9..276f3cf59 100644 --- a/modules/nvidia_smi/nvidia_smi_test.go +++ b/modules/nvidia_smi/nvidia_smi_test.go @@ -161,6 +161,7 @@ func TestNvidiaSMI_Collect(t *testing.T) { "gpu_GPU-473d8d0f-d462-185c-6b36-6fc23e23e571_sm_clock": 210, "gpu_GPU-473d8d0f-d462-185c-6b36-6fc23e23e571_temperature": 45, "gpu_GPU-473d8d0f-d462-185c-6b36-6fc23e23e571_video_clock": 555, + "gpu_GPU-473d8d0f-d462-185c-6b36-6fc23e23e571_voltage": 631, } assert.Equal(t, expected, mx)