From d0063e7d05747e21e6c4d14aad5f461709f15b57 Mon Sep 17 00:00:00 2001
From: technowhizz <7688823+technowhizz@users.noreply.github.com>
Date: Fri, 11 Oct 2024 11:11:53 +0100
Subject: [PATCH 01/16] Convert smartmon script to python

---
 etc/kayobe/ansible/scripts/smartmon.py | 156 +++++++++++++++++++++++++
 1 file changed, 156 insertions(+)
 create mode 100644 etc/kayobe/ansible/scripts/smartmon.py

diff --git a/etc/kayobe/ansible/scripts/smartmon.py b/etc/kayobe/ansible/scripts/smartmon.py
new file mode 100644
index 000000000..2a50c9187
--- /dev/null
+++ b/etc/kayobe/ansible/scripts/smartmon.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python3
+
+import subprocess
+import json
+from datetime import datetime
+
+SMARTCTL_PATH = "/usr/sbin/smartctl"
+
+def run_command(command, parse_json=False):
+    result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+    if parse_json:
+        return json.loads(result.stdout)
+    else:
+        return result.stdout.strip()
+
+def parse_smartctl_attributes(disk, disk_type, serial, json_data):
+    labels = f'disk="{disk}",type="{disk_type}",serial_number="{serial}"'
+    metrics = []
+    smartmon_attrs = set([
+        "airflow_temperature_cel", "command_timeout", "current_pending_sector", "end_to_end_error", "erase_fail_count",
+        "g_sense_error_rate", "hardware_ecc_recovered", "host_reads_32mib", "host_reads_mib", "host_writes_32mib",
+        "host_writes_mib", "load_cycle_count", "media_wearout_indicator", "nand_writes_1gib", "offline_uncorrectable",
+        "power_cycle_count", "power_on_hours", "program_fail_cnt_total", "program_fail_count", "raw_read_error_rate",
+        "reallocated_event_count", "reallocated_sector_ct", "reported_uncorrect", "runtime_bad_block", "sata_downshift_count",
+        "seek_error_rate", "spin_retry_count", "spin_up_time", "start_stop_count", "temperature_case", "temperature_celsius",
+        "temperature_internal", "total_lbas_read", "total_lbas_written", "udma_crc_error_count", "unsafe_shutdown_count",
+        "unused_rsvd_blk_cnt_tot", "wear_leveling_count", "workld_host_reads_perc", "workld_media_wear_indic", "workload_minutes",
+        "critical_warning", "temperature", "available_spare", "available_spare_threshold", "percentage_used",
+        "data_units_read", "data_units_written", "host_reads", "host_writes", "controller_busy_time",
+        "power_cycles", "unsafe_shutdowns", "media_errors", "num_err_log_entries",
+        "warning_temp_time", "critical_comp_time"
+    ])
+    if 'nvme_smart_health_information_log' in json_data:
+        smart_log = json_data['nvme_smart_health_information_log']
+        for attr_name, value in smart_log.items():
+            attr_name = attr_name.replace(' ', '_').lower()
+            if attr_name in smartmon_attrs:
+                metrics.append(f"{attr_name}{{{labels}}} {value}")
+    elif 'scsi_grown_defect_list' in json_data:
+        scsi_attrs = json_data.get('scsi_grown_defect_list', {})
+        for attr_name, value in scsi_attrs.items():
+            attr_name = attr_name.replace(' ', '_').lower()
+            if attr_name in smartmon_attrs:
+                metrics.append(f"{attr_name}{{{labels}}} {value}")
+    elif 'ata_smart_attributes' in json_data and 'table' in json_data['ata_smart_attributes']:
+        for attr in json_data['ata_smart_attributes']['table']:
+            attr_name = attr['name'].replace('-', '_').lower()
+            if attr_name in smartmon_attrs:
+                attr_id = attr.get('id', '')
+                value = attr.get('value', '')
+                worst = attr.get('worst', '')
+                threshold = attr.get('thresh', '')
+                raw_value = attr.get('raw', {}).get('value', '')
+                metrics.append(f"{attr_name}_value{{{labels},smart_id=\"{attr_id}\"}} {value}")
+                metrics.append(f"{attr_name}_worst{{{labels},smart_id=\"{attr_id}\"}} {worst}")
+                metrics.append(f"{attr_name}_threshold{{{labels},smart_id=\"{attr_id}\"}} {threshold}")
+                metrics.append(f"{attr_name}_raw_value{{{labels},smart_id=\"{attr_id}\"}} {raw_value}")
+    return metrics
+
+def parse_smartctl_info(disk, disk_type, json_data):
+    info = json_data.get('device', {})
+    smart_status = json_data.get('smart_status', {})
+    labels = {
+        'disk': disk,
+        'type': disk_type,
+        'vendor': info.get('vendor', ''),
+        'product': info.get('product', ''),
+        'revision': info.get('revision', ''),
+        'lun_id': info.get('lun_id', ''),
+        'model_family': json_data.get('model_family', ''),
+        'device_model': json_data.get('model_name', ''),
+        'serial_number': json_data.get('serial_number', '').lower(),
+        'firmware_version': json_data.get('firmware_version', '')
+    }
+    label_str = ','.join(f'{k}="{v}"' for k, v in labels.items())
+    metrics = [
+        f'device_info{{{label_str}}} 1',
+        f'device_smart_available{{disk="{disk}",type="{disk_type}",serial_number="{labels["serial_number"]}"}} {1 if smart_status.get("available", False) else 0}',
+    ]
+    if smart_status.get("available", False):
+        metrics.append(f'device_smart_enabled{{disk="{disk}",type="{disk_type}",serial_number="{labels["serial_number"]}"}} {1 if smart_status.get("enabled", False) else 0}')
+        if 'passed' in smart_status:
+            metrics.append(f'device_smart_healthy{{disk="{disk}",type="{disk_type}",serial_number="{labels["serial_number"]}"}} {1 if smart_status.get("passed", False) else 0}')
+    return metrics
+
+def format_output(metrics):
+    output = []
+    last_metric = ""
+    for metric in sorted(metrics):
+        metric_name = metric.split('{')[0]
+        if metric_name != last_metric:
+            output.append(f"# HELP smartmon_{metric_name} SMART metric {metric_name}")
+            output.append(f"# TYPE smartmon_{metric_name} gauge")
+            last_metric = metric_name
+        output.append(f"smartmon_{metric}")
+    return '\n'.join(output)
+
+def main():
+    try:
+        version_output = run_command([SMARTCTL_PATH, '-j'], parse_json=True)
+        smartctl_version_list = version_output.get('smartctl', {}).get('version', [])
+        if smartctl_version_list:
+            smartctl_version_str = '.'.join(map(str, smartctl_version_list))
+        else:
+            smartctl_version_str = "unknown"
+    except json.JSONDecodeError:
+        smartctl_version_str = "unknown"
+    metrics = [f'smartctl_version{{version="{smartctl_version_str}"}} 1']
+
+    try:
+        device_list_output = run_command([SMARTCTL_PATH, '--scan-open', '-j'], parse_json=True)
+        devices = []
+        for device in device_list_output.get('devices', []):
+            disk = device.get('name', '')
+            disk_type = device.get('type', 'auto')
+            if disk:
+                devices.append((disk, disk_type))
+    except json.JSONDecodeError:
+        devices = []
+
+    for disk, disk_type in devices:
+        serial_number = ''
+        active = 1
+        metrics.append(f'smartctl_run{{disk="{disk}",type="{disk_type}"}} {int(datetime.utcnow().timestamp())}')
+
+        try:
+            standby_output = run_command([SMARTCTL_PATH, '-n', 'standby', '-d', disk_type, '-j', disk], parse_json=True)
+            power_mode = standby_output.get('power_mode', '')
+            if power_mode == 'standby':
+                active = 0
+        except json.JSONDecodeError:
+            active = 0  # Assume device is inactive if we can't parse the output
+
+        metrics.append(f'device_active{{disk="{disk}",type="{disk_type}"}} {active}')
+
+        if active == 0:
+            continue
+
+        try:
+            info_output = run_command([SMARTCTL_PATH, '-i', '-H', '-d', disk_type, '-j', disk], parse_json=True)
+        except json.JSONDecodeError:
+            continue
+        metrics.extend(parse_smartctl_info(disk, disk_type, info_output))
+        serial_number = info_output.get('serial_number', '').lower()
+
+        try:
+            attributes_output = run_command([SMARTCTL_PATH, '-A', '-d', disk_type, '-j', disk], parse_json=True)
+        except json.JSONDecodeError:
+            continue
+        metrics.extend(parse_smartctl_attributes(disk, disk_type, serial_number, attributes_output))
+
+    formatted_output = format_output(metrics)
+    print(formatted_output)
+
+if __name__ == "__main__":
+    main()

From 46216b54c41ea412a8d5c9e1d13d057b3e12cdd4 Mon Sep 17 00:00:00 2001
From: technowhizz <7688823+technowhizz@users.noreply.github.com>
Date: Fri, 11 Oct 2024 11:12:32 +0100
Subject: [PATCH 02/16] Create tests for smartmon

---
 etc/kayobe/ansible/scripts/test_smartmon.py | 265 ++++++++++++++++++++
 1 file changed, 265 insertions(+)
 create mode 100644 etc/kayobe/ansible/scripts/test_smartmon.py

diff --git a/etc/kayobe/ansible/scripts/test_smartmon.py b/etc/kayobe/ansible/scripts/test_smartmon.py
new file mode 100644
index 000000000..a771a7ee6
--- /dev/null
+++ b/etc/kayobe/ansible/scripts/test_smartmon.py
@@ -0,0 +1,265 @@
+import unittest
+from unittest.mock import patch
+from smartmon import (
+    parse_smartctl_info,
+    parse_smartctl_attributes,
+    main,
+)
+
+class TestSmartMon(unittest.TestCase):
+    @patch('smartmon.run_command')
+    def test_parse_smartctl_info(self, mock_run_command):
+        devices_info = [
+            {
+                'disk': '/dev/nvme0',
+                'disk_type': 'nvme',
+                'json_output': {
+                    'device': {
+                        'name': '/dev/nvme0',
+                        'info_name': '/dev/nvme0',
+                        'type': 'nvme',
+                        'protocol': 'NVMe',
+                    },
+                    'model_name': 'Dell Ent NVMe CM6 RI 7.68TB',
+                    'serial_number': 'Y2Q0A0BGTCF8',
+                    'firmware_version': '2.2.0',
+                    'smart_status': {
+                        'passed': True,
+                        'available': True,
+                        'enabled': True
+                    },
+                }
+            },
+            {
+                'disk': '/dev/nvme1',
+                'disk_type': 'nvme',
+                'json_output': {
+                    'device': {
+                        'name': '/dev/nvme1',
+                        'info_name': '/dev/nvme1',
+                        'type': 'nvme',
+                        'protocol': 'NVMe',
+                    },
+                    'model_name': 'Dell Ent NVMe CM6 RI 7.68TB',
+                    'serial_number': 'Y2Q0A09PTCF8',
+                    'firmware_version': '2.2.0',
+                    'smart_status': {
+                        'passed': True,
+                        'available': True,
+                        'enabled': True
+                    },
+                }
+            },
+        ]
+
+        for device_info in devices_info:
+            disk = device_info['disk']
+            disk_type = device_info['disk_type']
+            json_output = device_info['json_output']
+            serial_number = json_output.get('serial_number', '').lower()
+
+            expected_metrics = [
+                f'device_info{{disk="{disk}",type="{disk_type}",vendor="",product="",revision="",lun_id="",model_family="",device_model="{json_output.get("model_name", "")}",serial_number="{serial_number}",firmware_version="{json_output.get("firmware_version", "")}"}} 1',
+                f'device_smart_available{{disk="{disk}",type="{disk_type}",serial_number="{serial_number}"}} 1',
+                f'device_smart_enabled{{disk="{disk}",type="{disk_type}",serial_number="{serial_number}"}} 1',
+                f'device_smart_healthy{{disk="{disk}",type="{disk_type}",serial_number="{serial_number}"}} 1',
+            ]
+
+            metrics = parse_smartctl_info(disk, disk_type, json_output)
+            for expected_metric in expected_metrics:
+                self.assertIn(expected_metric, metrics)
+
+    @patch('smartmon.run_command')
+    def test_parse_smartctl_attributes(self, mock_run_command):
+        devices_attributes = [
+            {
+                'disk': '/dev/nvme0',
+                'disk_type': 'nvme',
+                'serial': 'y2q0a0bgtcf8',
+                'json_output': {
+                    'nvme_smart_health_information_log': {
+                        'critical_warning': 0,
+                        'temperature': 36,
+                        'available_spare': 100,
+                        'available_spare_threshold': 10,
+                        'percentage_used': 0,
+                        'data_units_read': 117446405,
+                        'data_units_written': 84630284,
+                        'host_reads': 634894145,
+                        'host_writes': 4502620984,
+                        'controller_busy_time': 92090,
+                        'power_cycles': 746,
+                        'power_on_hours': 12494,
+                        'unsafe_shutdowns': 35,
+                        'media_errors': 0,
+                        'num_err_log_entries': 827,
+                        'warning_temp_time': 0,
+                        'critical_comp_time': 0
+                    }
+                }
+            },
+            {
+                'disk': '/dev/nvme1',
+                'disk_type': 'nvme',
+                'serial': 'y2q0a09ptcf8',
+                'json_output': {
+                    'nvme_smart_health_information_log': {
+                        'critical_warning': 0,
+                        'temperature': 35,
+                        'available_spare': 99,
+                        'available_spare_threshold': 10,
+                        'percentage_used': 1,
+                        'data_units_read': 50000000,
+                        'data_units_written': 40000000,
+                        'host_reads': 300000000,
+                        'host_writes': 2000000000,
+                        'controller_busy_time': 80000,
+                        'power_cycles': 700,
+                        'power_on_hours': 12000,
+                        'unsafe_shutdowns': 30,
+                        'media_errors': 0,
+                        'num_err_log_entries': 800,
+                        'warning_temp_time': 0,
+                        'critical_comp_time': 0
+                    }
+                }
+            },
+        ]
+
+        for device_attr in devices_attributes:
+            disk = device_attr['disk']
+            disk_type = device_attr['disk_type']
+            serial = device_attr['serial']
+            json_output = device_attr['json_output']
+
+            metrics = parse_smartctl_attributes(disk, disk_type, serial, json_output)
+
+            expected_metrics = [
+                f'temperature{{disk="{disk}",type="{disk_type}",serial_number="{serial}"}} {json_output["nvme_smart_health_information_log"]["temperature"]}',
+                f'available_spare{{disk="{disk}",type="{disk_type}",serial_number="{serial}"}} {json_output["nvme_smart_health_information_log"]["available_spare"]}',
+            ]
+
+            for expected_metric in expected_metrics:
+                self.assertIn(expected_metric, metrics)
+
+    @patch('smartmon.run_command')
+    def test_main(self, mock_run_command):
+        def side_effect(command, parse_json=False):
+            if '--scan-open' in command:
+                return {
+                    'devices': [
+                        {'name': '/dev/nvme0', 'info_name': '/dev/nvme0', 'type': 'nvme'},
+                        {'name': '/dev/nvme1', 'info_name': '/dev/nvme1', 'type': 'nvme'},
+                    ]
+                } if parse_json else ''
+            elif '-n' in command:
+                return {'power_mode': 'active'} if parse_json else ''
+            elif '-i' in command:
+                if '/dev/nvme0' in command:
+                    return {
+                        'device': {
+                            'name': '/dev/nvme0',
+                            'info_name': '/dev/nvme0',
+                            'type': 'nvme',
+                            'protocol': 'NVMe',
+                        },
+                        'model_name': 'Dell Ent NVMe CM6 RI 7.68TB',
+                        'serial_number': 'Y2Q0A0BGTCF8',
+                        'firmware_version': '2.2.0',
+                        'smart_status': {
+                            'passed': True,
+                            'available': True,
+                            'enabled': True
+                        },
+                    } if parse_json else ''
+                elif '/dev/nvme1' in command:
+                    return {
+                        'device': {
+                            'name': '/dev/nvme1',
+                            'info_name': '/dev/nvme1',
+                            'type': 'nvme',
+                            'protocol': 'NVMe',
+                        },
+                        'model_name': 'Dell Ent NVMe CM6 RI 7.68TB',
+                        'serial_number': 'Y2Q0A09PTCF8',
+                        'firmware_version': '2.2.0',
+                        'smart_status': {
+                            'passed': True,
+                            'available': True,
+                            'enabled': True
+                        },
+                    } if parse_json else ''
+            elif '-A' in command:
+                if '/dev/nvme0' in command:
+                    return {
+                        'nvme_smart_health_information_log': {
+                            'critical_warning': 0,
+                            'temperature': 36,
+                            'available_spare': 100,
+                            'available_spare_threshold': 10,
+                            'percentage_used': 0,
+                            'data_units_read': 117446405,
+                            'data_units_written': 84630284,
+                            'host_reads': 634894145,
+                            'host_writes': 4502620984,
+                            'controller_busy_time': 92090,
+                            'power_cycles': 746,
+                            'power_on_hours': 12494,
+                            'unsafe_shutdowns': 35,
+                            'media_errors': 0,
+                            'num_err_log_entries': 827,
+                            'warning_temp_time': 0,
+                            'critical_comp_time': 0
+                        }
+                    } if parse_json else ''
+                elif '/dev/nvme1' in command:
+                    return {
+                        'nvme_smart_health_information_log': {
+                            'critical_warning': 0,
+                            'temperature': 35,
+                            'available_spare': 99,
+                            'available_spare_threshold': 10,
+                            'percentage_used': 1,
+                            'data_units_read': 50000000,
+                            'data_units_written': 40000000,
+                            'host_reads': 300000000,
+                            'host_writes': 2000000000,
+                            'controller_busy_time': 80000,
+                            'power_cycles': 700,
+                            'power_on_hours': 12000,
+                            'unsafe_shutdowns': 30,
+                            'media_errors': 0,
+                            'num_err_log_entries': 800,
+                            'warning_temp_time': 0,
+                            'critical_comp_time': 0
+                        }
+                    } if parse_json else ''
+            elif '-j' in command and len(command) == 2:
+                return {
+                    'smartctl': {
+                        'version': [7, 2],
+                        'svn_revision': '5155',
+                        'platform_info': 'x86_64-linux-5.15.0-122-generic',
+                        'build_info': '(local build)',
+                    }
+                } if parse_json else ''
+            else:
+                return {} if parse_json else ''
+
+        mock_run_command.side_effect = side_effect
+
+        with patch('builtins.print') as mock_print:
+            main()
+            output_lines = []
+            for call in mock_print.call_args_list:
+                output_lines.extend(call[0][0].split('\n'))
+            expected_metrics = [
+                'smartmon_device_info{disk="/dev/nvme0",type="nvme",vendor="",product="",revision="",lun_id="",model_family="",device_model="Dell Ent NVMe CM6 RI 7.68TB",serial_number="y2q0a0bgtcf8",firmware_version="2.2.0"} 1',
+                'smartmon_device_info{disk="/dev/nvme1",type="nvme",vendor="",product="",revision="",lun_id="",model_family="",device_model="Dell Ent NVMe CM6 RI 7.68TB",serial_number="y2q0a09ptcf8",firmware_version="2.2.0"} 1',
+            ]
+            for expected_metric in expected_metrics:
+                self.assertIn(expected_metric, output_lines)
+
+
+if __name__ == '__main__':
+    unittest.main()

From 23fc74779411246c89e502ff2675eab506c69a35 Mon Sep 17 00:00:00 2001
From: technowhizz <7688823+technowhizz@users.noreply.github.com>
Date: Fri, 17 Jan 2025 16:08:49 +0000
Subject: [PATCH 03/16] Use pySMART

---
 etc/kayobe/ansible/scripts/smartmon.py | 279 +++++++++++++++----------
 1 file changed, 168 insertions(+), 111 deletions(-)

diff --git a/etc/kayobe/ansible/scripts/smartmon.py b/etc/kayobe/ansible/scripts/smartmon.py
index 2a50c9187..bd4bb36bc 100644
--- a/etc/kayobe/ansible/scripts/smartmon.py
+++ b/etc/kayobe/ansible/scripts/smartmon.py
@@ -2,155 +2,212 @@
 
 import subprocess
 import json
+import re
 from datetime import datetime
 
+from pySMART import DeviceList
+
 SMARTCTL_PATH = "/usr/sbin/smartctl"
 
+SMARTMON_ATTRS = {
+    "airflow_temperature_cel",
+    "command_timeout",
+    "current_pending_sector",
+    "end_to_end_error",
+    "erase_fail_count",
+    "g_sense_error_rate",
+    "hardware_ecc_recovered",
+    "host_reads_32mib",
+    "host_reads_mib",
+    "host_writes_32mib",
+    "host_writes_mib",
+    "load_cycle_count",
+    "media_wearout_indicator",
+    "nand_writes_1gib",
+    "offline_uncorrectable",
+    "power_cycle_count",
+    "power_on_hours",
+    "program_fail_cnt_total",
+    "program_fail_count",
+    "raw_read_error_rate",
+    "reallocated_event_count",
+    "reallocated_sector_ct",
+    "reported_uncorrect",
+    "runtime_bad_block",
+    "sata_downshift_count",
+    "seek_error_rate",
+    "spin_retry_count",
+    "spin_up_time",
+    "start_stop_count",
+    "temperature_case",
+    "temperature_celsius",
+    "temperature_internal",
+    "total_lbas_read",
+    "total_lbas_written",
+    "udma_crc_error_count",
+    "unsafe_shutdown_count",
+    "unused_rsvd_blk_cnt_tot",
+    "wear_leveling_count",
+    "workld_host_reads_perc",
+    "workld_media_wear_indic",
+    "workload_minutes",
+    "critical_warning",
+    "temperature",
+    "available_spare",
+    "available_spare_threshold",
+    "percentage_used",
+    "data_units_read",
+    "data_units_written",
+    "host_reads",
+    "host_writes",
+    "controller_busy_time",
+    "power_cycles",
+    "unsafe_shutdowns",
+    "media_errors",
+    "num_err_log_entries",
+    "warning_temp_time",
+    "critical_comp_time",
+}
+
 def run_command(command, parse_json=False):
+    """
+    Helper to run a subprocess command and optionally parse JSON output.
+    """
     result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
     if parse_json:
         return json.loads(result.stdout)
-    else:
-        return result.stdout.strip()
-
-def parse_smartctl_attributes(disk, disk_type, serial, json_data):
-    labels = f'disk="{disk}",type="{disk_type}",serial_number="{serial}"'
-    metrics = []
-    smartmon_attrs = set([
-        "airflow_temperature_cel", "command_timeout", "current_pending_sector", "end_to_end_error", "erase_fail_count",
-        "g_sense_error_rate", "hardware_ecc_recovered", "host_reads_32mib", "host_reads_mib", "host_writes_32mib",
-        "host_writes_mib", "load_cycle_count", "media_wearout_indicator", "nand_writes_1gib", "offline_uncorrectable",
-        "power_cycle_count", "power_on_hours", "program_fail_cnt_total", "program_fail_count", "raw_read_error_rate",
-        "reallocated_event_count", "reallocated_sector_ct", "reported_uncorrect", "runtime_bad_block", "sata_downshift_count",
-        "seek_error_rate", "spin_retry_count", "spin_up_time", "start_stop_count", "temperature_case", "temperature_celsius",
-        "temperature_internal", "total_lbas_read", "total_lbas_written", "udma_crc_error_count", "unsafe_shutdown_count",
-        "unused_rsvd_blk_cnt_tot", "wear_leveling_count", "workld_host_reads_perc", "workld_media_wear_indic", "workload_minutes",
-        "critical_warning", "temperature", "available_spare", "available_spare_threshold", "percentage_used",
-        "data_units_read", "data_units_written", "host_reads", "host_writes", "controller_busy_time",
-        "power_cycles", "unsafe_shutdowns", "media_errors", "num_err_log_entries",
-        "warning_temp_time", "critical_comp_time"
-    ])
-    if 'nvme_smart_health_information_log' in json_data:
-        smart_log = json_data['nvme_smart_health_information_log']
-        for attr_name, value in smart_log.items():
-            attr_name = attr_name.replace(' ', '_').lower()
-            if attr_name in smartmon_attrs:
-                metrics.append(f"{attr_name}{{{labels}}} {value}")
-    elif 'scsi_grown_defect_list' in json_data:
-        scsi_attrs = json_data.get('scsi_grown_defect_list', {})
-        for attr_name, value in scsi_attrs.items():
-            attr_name = attr_name.replace(' ', '_').lower()
-            if attr_name in smartmon_attrs:
-                metrics.append(f"{attr_name}{{{labels}}} {value}")
-    elif 'ata_smart_attributes' in json_data and 'table' in json_data['ata_smart_attributes']:
-        for attr in json_data['ata_smart_attributes']['table']:
-            attr_name = attr['name'].replace('-', '_').lower()
-            if attr_name in smartmon_attrs:
-                attr_id = attr.get('id', '')
-                value = attr.get('value', '')
-                worst = attr.get('worst', '')
-                threshold = attr.get('thresh', '')
-                raw_value = attr.get('raw', {}).get('value', '')
-                metrics.append(f"{attr_name}_value{{{labels},smart_id=\"{attr_id}\"}} {value}")
-                metrics.append(f"{attr_name}_worst{{{labels},smart_id=\"{attr_id}\"}} {worst}")
-                metrics.append(f"{attr_name}_threshold{{{labels},smart_id=\"{attr_id}\"}} {threshold}")
-                metrics.append(f"{attr_name}_raw_value{{{labels},smart_id=\"{attr_id}\"}} {raw_value}")
-    return metrics
-
-def parse_smartctl_info(disk, disk_type, json_data):
-    info = json_data.get('device', {})
-    smart_status = json_data.get('smart_status', {})
+    return result.stdout.strip()
+
+def parse_device_info(device):
+    """
+    Produce Prometheus lines describing the device's identity and SMART status:
+    - device_info
+    - device_smart_available
+    - device_smart_enabled
+    - device_smart_healthy
+    """
+    serial_number = (device.serial or "").lower()
     labels = {
-        'disk': disk,
-        'type': disk_type,
-        'vendor': info.get('vendor', ''),
-        'product': info.get('product', ''),
-        'revision': info.get('revision', ''),
-        'lun_id': info.get('lun_id', ''),
-        'model_family': json_data.get('model_family', ''),
-        'device_model': json_data.get('model_name', ''),
-        'serial_number': json_data.get('serial_number', '').lower(),
-        'firmware_version': json_data.get('firmware_version', '')
+        "disk": device.name,
+        "type": device.interface or "",
+        "vendor": device.vendor or "",
+        "model_family": device.family or "",
+        "device_model": device.model or "",
+        "serial_number": serial_number,
+        "firmware_version": device.firmware or "",
     }
-    label_str = ','.join(f'{k}="{v}"' for k, v in labels.items())
+    label_str = ",".join(f'{k}="{v}"' for k, v in labels.items())
+
     metrics = [
         f'device_info{{{label_str}}} 1',
-        f'device_smart_available{{disk="{disk}",type="{disk_type}",serial_number="{labels["serial_number"]}"}} {1 if smart_status.get("available", False) else 0}',
+        f'device_smart_available{{disk="{device.name}",type="{device.interface}",serial_number="{serial_number}"}} {1 if device.smart_capable else 0}',
     ]
-    if smart_status.get("available", False):
-        metrics.append(f'device_smart_enabled{{disk="{disk}",type="{disk_type}",serial_number="{labels["serial_number"]}"}} {1 if smart_status.get("enabled", False) else 0}')
-        if 'passed' in smart_status:
-            metrics.append(f'device_smart_healthy{{disk="{disk}",type="{disk_type}",serial_number="{labels["serial_number"]}"}} {1 if smart_status.get("passed", False) else 0}')
+
+    if device.smart_capable:
+        metrics.append(
+            f'device_smart_enabled{{disk="{device.name}",type="{device.interface}",serial_number="{serial_number}"}} {1 if device.smart_enabled else 0}'
+        )
+        if device.assessment:
+            is_healthy = 1 if device.assessment.upper() == "PASS" else 0
+            metrics.append(
+                f'device_smart_healthy{{disk="{device.name}",type="{device.interface}",serial_number="{serial_number}"}} {is_healthy}'
+            )
+
+    return metrics
+
+def parse_if_attributes(device):
+    """
+    For any device type (ATA, NVMe, SCSI, etc.), we read device.if_attributes.
+    We'll iterate over its public fields, convert them to snake_case,
+    and if it's in SMARTMON_ATTRS and numeric, we produce metrics.
+    """
+    metrics = []
+
+    if not device.if_attributes:
+        return metrics
+
+    disk = device.name
+    disk_type = device.interface or ""
+    serial_number = (device.serial or "").lower()
+    labels = f'disk="{disk}",type="{disk_type}",serial_number="{serial_number}"'
+
+    # Inspect all public attributes on device.if_attributes
+    for attr_name in dir(device.if_attributes):
+        if attr_name.startswith("_"):
+            continue  # skip private / special methods
+        val = getattr(device.if_attributes, attr_name, None)
+        if callable(val):
+            continue  # skip methods
+
+        # Convert CamelCase or PascalCase -> snake_case, e.g. dataUnitsRead -> data_units_read
+        snake_name = re.sub(r'(?<!^)(?=[A-Z])', '_', attr_name).lower()
+
+        if snake_name in SMARTMON_ATTRS and isinstance(val, (int, float)):
+            metrics.append(f"{snake_name}{{{labels}}} {val}")
+
     return metrics
 
 def format_output(metrics):
+    """
+    Convert a list of lines like "some_metric{...} value"
+    into a Prometheus text output with # HELP / # TYPE lines.
+    """
     output = []
     last_metric = ""
     for metric in sorted(metrics):
-        metric_name = metric.split('{')[0]
+        metric_name = metric.split("{")[0]
         if metric_name != last_metric:
             output.append(f"# HELP smartmon_{metric_name} SMART metric {metric_name}")
             output.append(f"# TYPE smartmon_{metric_name} gauge")
             last_metric = metric_name
         output.append(f"smartmon_{metric}")
-    return '\n'.join(output)
+    return "\n".join(output)
 
 def main():
+    all_metrics = []
+
     try:
-        version_output = run_command([SMARTCTL_PATH, '-j'], parse_json=True)
-        smartctl_version_list = version_output.get('smartctl', {}).get('version', [])
-        if smartctl_version_list:
-            smartctl_version_str = '.'.join(map(str, smartctl_version_list))
+        version_output = run_command([SMARTCTL_PATH, "--version"])
+        if version_output.startswith("smartctl"):
+            first_line = version_output.splitlines()[0]
+            version_num = first_line.split()[1]
         else:
-            smartctl_version_str = "unknown"
-    except json.JSONDecodeError:
-        smartctl_version_str = "unknown"
-    metrics = [f'smartctl_version{{version="{smartctl_version_str}"}} 1']
+            version_num = "unknown"
+    except Exception:
+        version_num = "unknown"
+    all_metrics.append(f'smartctl_version{{version="{version_num}"}} 1')
 
-    try:
-        device_list_output = run_command([SMARTCTL_PATH, '--scan-open', '-j'], parse_json=True)
-        devices = []
-        for device in device_list_output.get('devices', []):
-            disk = device.get('name', '')
-            disk_type = device.get('type', 'auto')
-            if disk:
-                devices.append((disk, disk_type))
-    except json.JSONDecodeError:
-        devices = []
-
-    for disk, disk_type in devices:
-        serial_number = ''
-        active = 1
-        metrics.append(f'smartctl_run{{disk="{disk}",type="{disk_type}"}} {int(datetime.utcnow().timestamp())}')
+    dev_list = DeviceList()
+
+    for dev in dev_list.devices:
+        disk_name = dev.name
+        disk_type = dev.interface or ""
+        serial_number = (dev.serial or "").lower()
 
+        run_timestamp = int(datetime.utcnow().timestamp())
+        all_metrics.append(f'smartctl_run{{disk="{disk_name}",type="{disk_type}"}} {run_timestamp}')
+
+        active = 1
         try:
-            standby_output = run_command([SMARTCTL_PATH, '-n', 'standby', '-d', disk_type, '-j', disk], parse_json=True)
-            power_mode = standby_output.get('power_mode', '')
-            if power_mode == 'standby':
+            cmd = [SMARTCTL_PATH, "-n", "standby", "-d", disk_type, "-j", disk_name]
+            standby_json = run_command(cmd, parse_json=True)
+            if standby_json.get("power_mode", "") == "standby":
                 active = 0
         except json.JSONDecodeError:
-            active = 0  # Assume device is inactive if we can't parse the output
-
-        metrics.append(f'device_active{{disk="{disk}",type="{disk_type}"}} {active}')
+            active = 0
+        except Exception:
+            active = 0
 
+        all_metrics.append(
+            f'device_active{{disk="{disk_name}",type="{disk_type}",serial_number="{serial_number}"}} {active}'
+        )
         if active == 0:
             continue
 
-        try:
-            info_output = run_command([SMARTCTL_PATH, '-i', '-H', '-d', disk_type, '-j', disk], parse_json=True)
-        except json.JSONDecodeError:
-            continue
-        metrics.extend(parse_smartctl_info(disk, disk_type, info_output))
-        serial_number = info_output.get('serial_number', '').lower()
-
-        try:
-            attributes_output = run_command([SMARTCTL_PATH, '-A', '-d', disk_type, '-j', disk], parse_json=True)
-        except json.JSONDecodeError:
-            continue
-        metrics.extend(parse_smartctl_attributes(disk, disk_type, serial_number, attributes_output))
+        all_metrics.extend(parse_device_info(dev))
+        all_metrics.extend(parse_if_attributes(dev))
 
-    formatted_output = format_output(metrics)
-    print(formatted_output)
+    print(format_output(all_metrics))
 
 if __name__ == "__main__":
     main()

From 8714025420b249d4a1ed054a07a0b976a4407412 Mon Sep 17 00:00:00 2001
From: technowhizz <7688823+technowhizz@users.noreply.github.com>
Date: Fri, 17 Jan 2025 16:13:05 +0000
Subject: [PATCH 04/16] Add tests for pysmart

---
 etc/kayobe/ansible/scripts/drives/nvme.json |  24 +
 etc/kayobe/ansible/scripts/test_smartmon.py | 513 ++++++++++----------
 2 files changed, 279 insertions(+), 258 deletions(-)
 create mode 100644 etc/kayobe/ansible/scripts/drives/nvme.json

diff --git a/etc/kayobe/ansible/scripts/drives/nvme.json b/etc/kayobe/ansible/scripts/drives/nvme.json
new file mode 100644
index 000000000..bbff19ec0
--- /dev/null
+++ b/etc/kayobe/ansible/scripts/drives/nvme.json
@@ -0,0 +1,24 @@
+{
+  "device_info": {
+    "name": "/dev/nvme0",
+    "interface": "nvme",
+    "vendor": "AcmeCorp",
+    "family": "Acme NVMe Family",
+    "model": "Acme NVMe 1TB",
+    "serial": "ABCD1234",
+    "firmware": "3.0.1",
+    "smart_capable": true,
+    "smart_enabled": true,
+    "assessment": "PASS"
+  },
+  "if_attributes": {
+    "criticalWarning": 0,
+    "temperature": 36,
+    "availableSpare": 100,
+    "availableSpareThreshold": 10,
+    "percentageUsed": 0,
+    "dataUnitsRead": 117446405,
+    "dataUnitsWritten": 84630284,
+    "notInSmartmonAttrs": 999
+  }
+}
diff --git a/etc/kayobe/ansible/scripts/test_smartmon.py b/etc/kayobe/ansible/scripts/test_smartmon.py
index a771a7ee6..a22df8ee1 100644
--- a/etc/kayobe/ansible/scripts/test_smartmon.py
+++ b/etc/kayobe/ansible/scripts/test_smartmon.py
@@ -1,265 +1,262 @@
+import glob
+import json
+import os
+import re
 import unittest
-from unittest.mock import patch
+
+from unittest.mock import patch, MagicMock
+
 from smartmon import (
-    parse_smartctl_info,
-    parse_smartctl_attributes,
+    parse_device_info,
+    parse_if_attributes,
     main,
+    SMARTMON_ATTRS
 )
 
+def load_json_fixture(filename):
+    """
+    Load a JSON file from the 'drives' subfolder.
+    """
+    path = os.path.join(os.path.dirname(__file__), "drives", filename)
+    with open(path, "r", encoding="utf-8") as f:
+        return json.load(f)
+
+
 class TestSmartMon(unittest.TestCase):
-    @patch('smartmon.run_command')
-    def test_parse_smartctl_info(self, mock_run_command):
-        devices_info = [
-            {
-                'disk': '/dev/nvme0',
-                'disk_type': 'nvme',
-                'json_output': {
-                    'device': {
-                        'name': '/dev/nvme0',
-                        'info_name': '/dev/nvme0',
-                        'type': 'nvme',
-                        'protocol': 'NVMe',
-                    },
-                    'model_name': 'Dell Ent NVMe CM6 RI 7.68TB',
-                    'serial_number': 'Y2Q0A0BGTCF8',
-                    'firmware_version': '2.2.0',
-                    'smart_status': {
-                        'passed': True,
-                        'available': True,
-                        'enabled': True
-                    },
-                }
-            },
-            {
-                'disk': '/dev/nvme1',
-                'disk_type': 'nvme',
-                'json_output': {
-                    'device': {
-                        'name': '/dev/nvme1',
-                        'info_name': '/dev/nvme1',
-                        'type': 'nvme',
-                        'protocol': 'NVMe',
-                    },
-                    'model_name': 'Dell Ent NVMe CM6 RI 7.68TB',
-                    'serial_number': 'Y2Q0A09PTCF8',
-                    'firmware_version': '2.2.0',
-                    'smart_status': {
-                        'passed': True,
-                        'available': True,
-                        'enabled': True
-                    },
-                }
-            },
-        ]
-
-        for device_info in devices_info:
-            disk = device_info['disk']
-            disk_type = device_info['disk_type']
-            json_output = device_info['json_output']
-            serial_number = json_output.get('serial_number', '').lower()
-
-            expected_metrics = [
-                f'device_info{{disk="{disk}",type="{disk_type}",vendor="",product="",revision="",lun_id="",model_family="",device_model="{json_output.get("model_name", "")}",serial_number="{serial_number}",firmware_version="{json_output.get("firmware_version", "")}"}} 1',
-                f'device_smart_available{{disk="{disk}",type="{disk_type}",serial_number="{serial_number}"}} 1',
-                f'device_smart_enabled{{disk="{disk}",type="{disk_type}",serial_number="{serial_number}"}} 1',
-                f'device_smart_healthy{{disk="{disk}",type="{disk_type}",serial_number="{serial_number}"}} 1',
-            ]
-
-            metrics = parse_smartctl_info(disk, disk_type, json_output)
-            for expected_metric in expected_metrics:
-                self.assertIn(expected_metric, metrics)
-
-    @patch('smartmon.run_command')
-    def test_parse_smartctl_attributes(self, mock_run_command):
-        devices_attributes = [
-            {
-                'disk': '/dev/nvme0',
-                'disk_type': 'nvme',
-                'serial': 'y2q0a0bgtcf8',
-                'json_output': {
-                    'nvme_smart_health_information_log': {
-                        'critical_warning': 0,
-                        'temperature': 36,
-                        'available_spare': 100,
-                        'available_spare_threshold': 10,
-                        'percentage_used': 0,
-                        'data_units_read': 117446405,
-                        'data_units_written': 84630284,
-                        'host_reads': 634894145,
-                        'host_writes': 4502620984,
-                        'controller_busy_time': 92090,
-                        'power_cycles': 746,
-                        'power_on_hours': 12494,
-                        'unsafe_shutdowns': 35,
-                        'media_errors': 0,
-                        'num_err_log_entries': 827,
-                        'warning_temp_time': 0,
-                        'critical_comp_time': 0
-                    }
-                }
-            },
-            {
-                'disk': '/dev/nvme1',
-                'disk_type': 'nvme',
-                'serial': 'y2q0a09ptcf8',
-                'json_output': {
-                    'nvme_smart_health_information_log': {
-                        'critical_warning': 0,
-                        'temperature': 35,
-                        'available_spare': 99,
-                        'available_spare_threshold': 10,
-                        'percentage_used': 1,
-                        'data_units_read': 50000000,
-                        'data_units_written': 40000000,
-                        'host_reads': 300000000,
-                        'host_writes': 2000000000,
-                        'controller_busy_time': 80000,
-                        'power_cycles': 700,
-                        'power_on_hours': 12000,
-                        'unsafe_shutdowns': 30,
-                        'media_errors': 0,
-                        'num_err_log_entries': 800,
-                        'warning_temp_time': 0,
-                        'critical_comp_time': 0
-                    }
-                }
-            },
-        ]
-
-        for device_attr in devices_attributes:
-            disk = device_attr['disk']
-            disk_type = device_attr['disk_type']
-            serial = device_attr['serial']
-            json_output = device_attr['json_output']
-
-            metrics = parse_smartctl_attributes(disk, disk_type, serial, json_output)
-
-            expected_metrics = [
-                f'temperature{{disk="{disk}",type="{disk_type}",serial_number="{serial}"}} {json_output["nvme_smart_health_information_log"]["temperature"]}',
-                f'available_spare{{disk="{disk}",type="{disk_type}",serial_number="{serial}"}} {json_output["nvme_smart_health_information_log"]["available_spare"]}',
-            ]
-
-            for expected_metric in expected_metrics:
-                self.assertIn(expected_metric, metrics)
-
-    @patch('smartmon.run_command')
-    def test_main(self, mock_run_command):
-        def side_effect(command, parse_json=False):
-            if '--scan-open' in command:
-                return {
-                    'devices': [
-                        {'name': '/dev/nvme0', 'info_name': '/dev/nvme0', 'type': 'nvme'},
-                        {'name': '/dev/nvme1', 'info_name': '/dev/nvme1', 'type': 'nvme'},
-                    ]
-                } if parse_json else ''
-            elif '-n' in command:
-                return {'power_mode': 'active'} if parse_json else ''
-            elif '-i' in command:
-                if '/dev/nvme0' in command:
-                    return {
-                        'device': {
-                            'name': '/dev/nvme0',
-                            'info_name': '/dev/nvme0',
-                            'type': 'nvme',
-                            'protocol': 'NVMe',
-                        },
-                        'model_name': 'Dell Ent NVMe CM6 RI 7.68TB',
-                        'serial_number': 'Y2Q0A0BGTCF8',
-                        'firmware_version': '2.2.0',
-                        'smart_status': {
-                            'passed': True,
-                            'available': True,
-                            'enabled': True
-                        },
-                    } if parse_json else ''
-                elif '/dev/nvme1' in command:
-                    return {
-                        'device': {
-                            'name': '/dev/nvme1',
-                            'info_name': '/dev/nvme1',
-                            'type': 'nvme',
-                            'protocol': 'NVMe',
-                        },
-                        'model_name': 'Dell Ent NVMe CM6 RI 7.68TB',
-                        'serial_number': 'Y2Q0A09PTCF8',
-                        'firmware_version': '2.2.0',
-                        'smart_status': {
-                            'passed': True,
-                            'available': True,
-                            'enabled': True
-                        },
-                    } if parse_json else ''
-            elif '-A' in command:
-                if '/dev/nvme0' in command:
-                    return {
-                        'nvme_smart_health_information_log': {
-                            'critical_warning': 0,
-                            'temperature': 36,
-                            'available_spare': 100,
-                            'available_spare_threshold': 10,
-                            'percentage_used': 0,
-                            'data_units_read': 117446405,
-                            'data_units_written': 84630284,
-                            'host_reads': 634894145,
-                            'host_writes': 4502620984,
-                            'controller_busy_time': 92090,
-                            'power_cycles': 746,
-                            'power_on_hours': 12494,
-                            'unsafe_shutdowns': 35,
-                            'media_errors': 0,
-                            'num_err_log_entries': 827,
-                            'warning_temp_time': 0,
-                            'critical_comp_time': 0
-                        }
-                    } if parse_json else ''
-                elif '/dev/nvme1' in command:
-                    return {
-                        'nvme_smart_health_information_log': {
-                            'critical_warning': 0,
-                            'temperature': 35,
-                            'available_spare': 99,
-                            'available_spare_threshold': 10,
-                            'percentage_used': 1,
-                            'data_units_read': 50000000,
-                            'data_units_written': 40000000,
-                            'host_reads': 300000000,
-                            'host_writes': 2000000000,
-                            'controller_busy_time': 80000,
-                            'power_cycles': 700,
-                            'power_on_hours': 12000,
-                            'unsafe_shutdowns': 30,
-                            'media_errors': 0,
-                            'num_err_log_entries': 800,
-                            'warning_temp_time': 0,
-                            'critical_comp_time': 0
-                        }
-                    } if parse_json else ''
-            elif '-j' in command and len(command) == 2:
-                return {
-                    'smartctl': {
-                        'version': [7, 2],
-                        'svn_revision': '5155',
-                        'platform_info': 'x86_64-linux-5.15.0-122-generic',
-                        'build_info': '(local build)',
-                    }
-                } if parse_json else ''
-            else:
-                return {} if parse_json else ''
-
-        mock_run_command.side_effect = side_effect
-
-        with patch('builtins.print') as mock_print:
-            main()
-            output_lines = []
-            for call in mock_print.call_args_list:
-                output_lines.extend(call[0][0].split('\n'))
-            expected_metrics = [
-                'smartmon_device_info{disk="/dev/nvme0",type="nvme",vendor="",product="",revision="",lun_id="",model_family="",device_model="Dell Ent NVMe CM6 RI 7.68TB",serial_number="y2q0a0bgtcf8",firmware_version="2.2.0"} 1',
-                'smartmon_device_info{disk="/dev/nvme1",type="nvme",vendor="",product="",revision="",lun_id="",model_family="",device_model="Dell Ent NVMe CM6 RI 7.68TB",serial_number="y2q0a09ptcf8",firmware_version="2.2.0"} 1',
-            ]
-            for expected_metric in expected_metrics:
-                self.assertIn(expected_metric, output_lines)
-
-
-if __name__ == '__main__':
+    @classmethod
+    def setUpClass(cls):
+        # Collect all *.json files from ./drives/
+        data_folder = os.path.join(os.path.dirname(__file__), "drives")
+        cls.fixture_files = glob.glob(os.path.join(data_folder, "*.json"))
+
+    def create_mock_device_from_json(self, device_info, if_attributes=None):
+        """
+        Given a 'device_info' dict and optional 'if_attributes', build
+        a MagicMock that mimics a pySMART Device object.
+        """
+        device = MagicMock()
+        device.name = device_info.get("name", "")
+        device.interface = device_info.get("interface", "")
+        device.vendor = device_info.get("vendor", "")
+        device.family = device_info.get("family", "")
+        device.model = device_info.get("model", "")
+        device.serial = device_info.get("serial", "")
+        device.firmware = device_info.get("firmware", "")
+        device.smart_capable = device_info.get("smart_capable", False)
+        device.smart_enabled = device_info.get("smart_enabled", False)
+        device.assessment = device_info.get("assessment", "")
+
+        if if_attributes:
+            class IfAttributesMock:
+                pass
+
+            if_mock = IfAttributesMock()
+            for key, val in if_attributes.items():
+                setattr(if_mock, key, val)
+            device.if_attributes = if_mock
+        else:
+            device.if_attributes = None
+
+        return device
+
+    def test_parse_device_info(self):
+        """
+        Test parse_device_info() for every JSON fixture in ./drives/.
+        We do subTest() so each fixture is tested individually.
+        """
+        for fixture_path in self.fixture_files:
+            fixture_name = os.path.basename(fixture_path)
+            with self.subTest(msg=f"Testing device_info with {fixture_name}"):
+                data = load_json_fixture(fixture_name)
+                device_info = data["device_info"]
+
+                device = self.create_mock_device_from_json(device_info)
+                metrics = parse_device_info(device)
+
+                dev_name = device_info["name"]
+                dev_iface = device_info["interface"]
+                dev_serial = device_info["serial"].lower()
+
+                # The device_info line should exist for every device
+                # e.g. device_info{disk="/dev/...",type="...",serial_number="..."} 1
+                device_info_found = any(
+                    line.startswith("device_info{") and
+                    f'disk="{dev_name}"' in line and
+                    f'type="{dev_iface}"' in line and
+                    f'serial_number="{dev_serial}"' in line
+                    for line in metrics
+                )
+                self.assertTrue(
+                    device_info_found,
+                    f"Expected a device_info metric line for {dev_name} but didn't find it."
+                )
+
+                # If smart_capable is true, we expect device_smart_available = 1
+                if device_info.get("smart_capable"):
+                    smart_available_found = any(
+                        line.startswith("device_smart_available{") and
+                        f'disk="{dev_name}"' in line and
+                        f'serial_number="{dev_serial}"' in line and
+                        line.endswith(" 1")
+                        for line in metrics
+                    )
+                    self.assertTrue(
+                        smart_available_found,
+                        f"Expected device_smart_available=1 for {dev_name}, not found."
+                    )
+
+                # If smart_enabled is true, we expect device_smart_enabled = 1
+                if device_info.get("smart_enabled"):
+                    smart_enabled_found = any(
+                        line.startswith("device_smart_enabled{") and
+                        f'disk="{dev_name}"' in line and
+                        line.endswith(" 1")
+                        for line in metrics
+                    )
+                    self.assertTrue(
+                        smart_enabled_found,
+                        f"Expected device_smart_enabled=1 for {dev_name}, not found."
+                    )
+
+                # device_smart_healthy if assessment in [PASS, WARN, FAIL]
+                # PASS => 1, otherwise => 0
+                assessment = device_info.get("assessment", "").upper()
+                if assessment in ["PASS", "WARN", "FAIL"]:
+                    expected_val = 1 if assessment == "PASS" else 0
+                    smart_healthy_found = any(
+                        line.startswith("device_smart_healthy{") and
+                        f'disk="{dev_name}"' in line and
+                        line.endswith(f" {expected_val}")
+                        for line in metrics
+                    )
+                    self.assertTrue(
+                        smart_healthy_found,
+                        f"Expected device_smart_healthy={expected_val} for {dev_name}, not found."
+                    )
+
+    def test_parse_if_attributes(self):
+        """
+        Test parse_if_attributes() for every JSON fixture in ./drives/.
+        We do subTest() so each fixture is tested individually.
+        """
+        for fixture_path in self.fixture_files:
+            fixture_name = os.path.basename(fixture_path)
+            with self.subTest(msg=f"Testing if_attributes with {fixture_name}"):
+                data = load_json_fixture(fixture_name)
+                device_info = data["device_info"]
+                if_attrs = data.get("if_attributes", {})
+
+                device = self.create_mock_device_from_json(device_info, if_attrs)
+                metrics = parse_if_attributes(device)
+
+                dev_name = device_info["name"]
+                dev_iface = device_info["interface"]
+                dev_serial = device_info["serial"].lower()
+
+                # For each numeric attribute in JSON, if it's in SMARTMON_ATTRS,
+                # we expect a line in the script's output.
+                for attr_key, attr_val in if_attrs.items():
+                    # Convert from e.g. "criticalWarning" -> "critical_warning"
+                    snake_key = re.sub(r'(?<!^)(?=[A-Z])', '_', attr_key).lower()
+
+                    if isinstance(attr_val, (int, float)) and snake_key in SMARTMON_ATTRS:
+                        # We expect e.g. critical_warning{disk="/dev/..."} <value>
+                        expected_line = (
+                            f"{snake_key}{{disk=\"{dev_name}\",type=\"{dev_iface}\",serial_number=\"{dev_serial}\"}} {attr_val}"
+                        )
+                        self.assertIn(
+                            expected_line,
+                            metrics,
+                            f"Expected metric '{expected_line}' for attribute '{attr_key}' not found."
+                        )
+                    else:
+                        # If it's not in SMARTMON_ATTRS or not numeric,
+                        # we do NOT expect a line with that name+value
+                        unexpected_line = (
+                            f"{snake_key}{{disk=\"{dev_name}\",type=\"{dev_iface}\",serial_number=\"{dev_serial}\"}} {attr_val}"
+                        )
+                        self.assertNotIn(
+                            unexpected_line,
+                            metrics,
+                            f"Unexpected metric '{unexpected_line}' found for {attr_key}."
+                        )
+
+                # Also ensure that non-numeric or disallowed attributes do not appear
+                # For instance "notInSmartmonAttrs" should never appear.
+                for line in metrics:
+                    self.assertNotIn(
+                        "not_in_smartmon_attrs",
+                        line,
+                        f"'notInSmartmonAttrs' attribute unexpectedly found in metric line: {line}"
+                    )
+
+    @patch("smartmon.run_command")
+    @patch("smartmon.DeviceList")
+    def test_main(self, mock_devicelist_class, mock_run_cmd):
+        """
+        End-to-end test of main() for every JSON fixture in ./drives/.
+        This ensures we can handle multiple disks (multiple fixture files).
+        """
+        for fixture_path in self.fixture_files:
+            fixture_name = os.path.basename(fixture_path)
+            with self.subTest(msg=f"Testing main() with {fixture_name}"):
+                data = load_json_fixture(fixture_name)
+                device_info = data["device_info"]
+                if_attrs = data.get("if_attributes", {})
+
+                # Patch run_command to return a version & "active" power_mode
+                def run_command_side_effect(cmd, parse_json=False):
+                    if "--version" in cmd:
+                        return "smartctl 7.3 5422 [x86_64-linux-5.15.0]\n..."
+                    if "-n" in cmd and "standby" in cmd and parse_json:
+                        return {"power_mode": "active"}
+                    return ""
+
+                mock_run_cmd.side_effect = run_command_side_effect
+
+                # Mock a single device from the fixture
+                device_mock = self.create_mock_device_from_json(device_info, if_attrs)
+
+                # Make DeviceList() return our single mock device
+                mock_dev_list = MagicMock()
+                mock_dev_list.devices = [device_mock]
+                mock_devicelist_class.return_value = mock_dev_list
+
+                with patch("builtins.print") as mock_print:
+                    main()
+
+                    printed_lines = []
+                    for call_args in mock_print.call_args_list:
+                        printed_lines.extend(call_args[0][0].split("\n"))
+                dev_name = device_info["name"]
+                dev_iface = device_info["interface"]
+                dev_serial = device_info["serial"].lower()
+
+                # We expect a line for the run timestamp, e.g.:
+                # smartmon_smartctl_run{disk="/dev/...",type="..."} 1671234567
+                run_line_found = any(
+                    line.startswith("smartmon_smartctl_run{") and
+                    f'disk="{dev_name}"' in line and
+                    f'type="{dev_iface}"' in line
+                    for line in printed_lines
+                )
+                self.assertTrue(
+                    run_line_found,
+                    f"Expected 'smartmon_smartctl_run' metric line for {dev_name} not found."
+                )
+
+                # Because we mocked "power_mode": "active", we expect device_active=1
+                active_line_found = any(
+                    line.startswith("smartmon_device_active{") and
+                    f'disk="{dev_name}"' in line and
+                    f'serial_number="{dev_serial}"' in line and
+                    line.endswith(" 1")
+                    for line in printed_lines
+                )
+                self.assertTrue(
+                    active_line_found,
+                    f"Expected 'device_active{{...}} 1' line for {dev_name} not found."
+                )
+
+if __name__ == "__main__":
     unittest.main()

From b694537509084051566348a3009f8e7ef1f09af4 Mon Sep 17 00:00:00 2001
From: technowhizz <7688823+technowhizz@users.noreply.github.com>
Date: Mon, 17 Mar 2025 23:45:02 +0000
Subject: [PATCH 05/16] Update docstring to state expected Device

---
 etc/kayobe/ansible/scripts/smartmon.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/etc/kayobe/ansible/scripts/smartmon.py b/etc/kayobe/ansible/scripts/smartmon.py
index bd4bb36bc..e52b6332b 100644
--- a/etc/kayobe/ansible/scripts/smartmon.py
+++ b/etc/kayobe/ansible/scripts/smartmon.py
@@ -85,6 +85,12 @@ def parse_device_info(device):
     - device_smart_available
     - device_smart_enabled
     - device_smart_healthy
+
+    Args:
+        device (Device): A pySMART Device object with attributes such as name, interface, etc.
+
+    Returns:
+        List[str]: A list of Prometheus formatted metric strings.
     """
     serial_number = (device.serial or "").lower()
     labels = {

From 2d25e92f9fc2460ea2f70f7d6f5e9720936b7f89 Mon Sep 17 00:00:00 2001
From: technowhizz <7688823+technowhizz@users.noreply.github.com>
Date: Mon, 17 Mar 2025 23:46:29 +0000
Subject: [PATCH 06/16] Add a function for converting to camelcase

---
 etc/kayobe/ansible/scripts/smartmon.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/etc/kayobe/ansible/scripts/smartmon.py b/etc/kayobe/ansible/scripts/smartmon.py
index e52b6332b..7ec922b03 100644
--- a/etc/kayobe/ansible/scripts/smartmon.py
+++ b/etc/kayobe/ansible/scripts/smartmon.py
@@ -78,6 +78,14 @@ def run_command(command, parse_json=False):
         return json.loads(result.stdout)
     return result.stdout.strip()
 
+def camel_to_snake(name):
+    """
+    Convert a CamelCase string to snake_case.
+
+    Reference: https://stackoverflow.com/questions/1175208/elegant-python-function-to-convert-camelcase-to-snake-case
+    """
+    return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()
+
 def parse_device_info(device):
     """
     Produce Prometheus lines describing the device's identity and SMART status:
@@ -145,8 +153,7 @@ def parse_if_attributes(device):
         if callable(val):
             continue  # skip methods
 
-        # Convert CamelCase or PascalCase -> snake_case, e.g. dataUnitsRead -> data_units_read
-        snake_name = re.sub(r'(?<!^)(?=[A-Z])', '_', attr_name).lower()
+        snake_name = camel_to_snake(attr_name)
 
         if snake_name in SMARTMON_ATTRS and isinstance(val, (int, float)):
             metrics.append(f"{snake_name}{{{labels}}} {val}")

From a87abe098dae35cb86911f8eb0e3003abc1fd888 Mon Sep 17 00:00:00 2001
From: technowhizz <7688823+technowhizz@users.noreply.github.com>
Date: Mon, 24 Mar 2025 00:54:49 +0000
Subject: [PATCH 07/16] Split device and attribute tests into individual test
 cases per fixture for better error reporting

---
 etc/kayobe/ansible/scripts/smartmon.py      |   4 +-
 etc/kayobe/ansible/scripts/test_smartmon.py | 238 ++++++++++----------
 2 files changed, 127 insertions(+), 115 deletions(-)

diff --git a/etc/kayobe/ansible/scripts/smartmon.py b/etc/kayobe/ansible/scripts/smartmon.py
index 7ec922b03..202e6981c 100644
--- a/etc/kayobe/ansible/scripts/smartmon.py
+++ b/etc/kayobe/ansible/scripts/smartmon.py
@@ -3,7 +3,7 @@
 import subprocess
 import json
 import re
-from datetime import datetime
+import datetime
 
 from pySMART import DeviceList
 
@@ -197,7 +197,7 @@ def main():
         disk_type = dev.interface or ""
         serial_number = (dev.serial or "").lower()
 
-        run_timestamp = int(datetime.utcnow().timestamp())
+        run_timestamp = int(datetime.datetime.now(datetime.UTC).timestamp())
         all_metrics.append(f'smartctl_run{{disk="{disk_name}",type="{disk_type}"}} {run_timestamp}')
 
         active = 1
diff --git a/etc/kayobe/ansible/scripts/test_smartmon.py b/etc/kayobe/ansible/scripts/test_smartmon.py
index a22df8ee1..212e5f063 100644
--- a/etc/kayobe/ansible/scripts/test_smartmon.py
+++ b/etc/kayobe/ansible/scripts/test_smartmon.py
@@ -59,136 +59,148 @@ class IfAttributesMock:
 
         return device
 
+    def _test_parse_device_info(self, fixture_name):
+        """
+        Helper method to test parse_device_info() for a single JSON fixture.
+        """
+        data = load_json_fixture(fixture_name)
+        device_info = data["device_info"]
+
+        device = self.create_mock_device_from_json(device_info)
+        metrics = parse_device_info(device)
+
+        dev_name = device_info["name"]
+        dev_iface = device_info["interface"]
+        dev_serial = device_info["serial"].lower()
+
+        # The device_info line should exist for every device
+        # e.g. device_info{disk="/dev/...",type="...",serial_number="..."} 1
+        device_info_found = any(
+            line.startswith("device_info{") and
+            f'disk="{dev_name}"' in line and
+            f'type="{dev_iface}"' in line and
+            f'serial_number="{dev_serial}"' in line
+            for line in metrics
+        )
+        self.assertTrue(
+            device_info_found,
+            f"Expected a device_info metric line for {dev_name} but didn't find it."
+        )
+
+        # If smart_capable is true, we expect device_smart_available = 1
+        if device_info.get("smart_capable"):
+            smart_available_found = any(
+                line.startswith("device_smart_available{") and
+                f'disk="{dev_name}"' in line and
+                f'serial_number="{dev_serial}"' in line and
+                line.endswith(" 1")
+                for line in metrics
+            )
+            self.assertTrue(
+                smart_available_found,
+                f"Expected device_smart_available=1 for {dev_name}, not found."
+            )
+
+        # If smart_enabled is true, we expect device_smart_enabled = 1
+        if device_info.get("smart_enabled"):
+            smart_enabled_found = any(
+                line.startswith("device_smart_enabled{") and
+                f'disk="{dev_name}"' in line and
+                line.endswith(" 1")
+                for line in metrics
+            )
+            self.assertTrue(
+                smart_enabled_found,
+                f"Expected device_smart_enabled=1 for {dev_name}, not found."
+            )
+
+        # device_smart_healthy if assessment in [PASS, WARN, FAIL]
+        # PASS => 1, otherwise => 0
+        assessment = device_info.get("assessment", "").upper()
+        if assessment in ["PASS", "WARN", "FAIL"]:
+            expected_val = 1 if assessment == "PASS" else 0
+            smart_healthy_found = any(
+                line.startswith("device_smart_healthy{") and
+                f'disk="{dev_name}"' in line and
+                line.endswith(f" {expected_val}")
+                for line in metrics
+            )
+            self.assertTrue(
+                smart_healthy_found,
+                f"Expected device_smart_healthy={expected_val} for {dev_name}, not found."
+            )
+
     def test_parse_device_info(self):
         """
         Test parse_device_info() for every JSON fixture in ./drives/.
-        We do subTest() so each fixture is tested individually.
+        Each fixture is tested individually with clear error reporting.
         """
         for fixture_path in self.fixture_files:
             fixture_name = os.path.basename(fixture_path)
-            with self.subTest(msg=f"Testing device_info with {fixture_name}"):
-                data = load_json_fixture(fixture_name)
-                device_info = data["device_info"]
-
-                device = self.create_mock_device_from_json(device_info)
-                metrics = parse_device_info(device)
+            with self.subTest(fixture=fixture_name):
+                self._test_parse_device_info(fixture_name)
 
-                dev_name = device_info["name"]
-                dev_iface = device_info["interface"]
-                dev_serial = device_info["serial"].lower()
-
-                # The device_info line should exist for every device
-                # e.g. device_info{disk="/dev/...",type="...",serial_number="..."} 1
-                device_info_found = any(
-                    line.startswith("device_info{") and
-                    f'disk="{dev_name}"' in line and
-                    f'type="{dev_iface}"' in line and
-                    f'serial_number="{dev_serial}"' in line
-                    for line in metrics
+    def _test_parse_if_attributes(self, fixture_name):
+        """
+        Helper method to test parse_if_attributes() for a single JSON fixture.
+        """
+        data = load_json_fixture(fixture_name)
+        device_info = data["device_info"]
+        if_attrs = data.get("if_attributes", {})
+
+        device = self.create_mock_device_from_json(device_info, if_attrs)
+        metrics = parse_if_attributes(device)
+
+        dev_name = device_info["name"]
+        dev_iface = device_info["interface"]
+        dev_serial = device_info["serial"].lower()
+
+        # For each numeric attribute in JSON, if it's in SMARTMON_ATTRS,
+        # we expect a line in the script's output.
+        for attr_key, attr_val in if_attrs.items():
+            # Convert from e.g. "criticalWarning" -> "critical_warning"
+            snake_key = re.sub(r'(?<!^)(?=[A-Z])', '_', attr_key).lower()
+
+            if isinstance(attr_val, (int, float)) and snake_key in SMARTMON_ATTRS:
+                # We expect e.g. critical_warning{disk="/dev/..."} <value>
+                expected_line = (
+                    f"{snake_key}{{disk=\"{dev_name}\",type=\"{dev_iface}\",serial_number=\"{dev_serial}\"}} {attr_val}"
                 )
-                self.assertTrue(
-                    device_info_found,
-                    f"Expected a device_info metric line for {dev_name} but didn't find it."
+                self.assertIn(
+                    expected_line,
+                    metrics,
+                    f"Expected metric '{expected_line}' for attribute '{attr_key}' not found."
+                )
+            else:
+                # If it's not in SMARTMON_ATTRS or not numeric,
+                # we do NOT expect a line with that name+value
+                unexpected_line = (
+                    f"{snake_key}{{disk=\"{dev_name}\",type=\"{dev_iface}\",serial_number=\"{dev_serial}\"}} {attr_val}"
+                )
+                self.assertNotIn(
+                    unexpected_line,
+                    metrics,
+                    f"Unexpected metric '{unexpected_line}' found for {attr_key}."
                 )
 
-                # If smart_capable is true, we expect device_smart_available = 1
-                if device_info.get("smart_capable"):
-                    smart_available_found = any(
-                        line.startswith("device_smart_available{") and
-                        f'disk="{dev_name}"' in line and
-                        f'serial_number="{dev_serial}"' in line and
-                        line.endswith(" 1")
-                        for line in metrics
-                    )
-                    self.assertTrue(
-                        smart_available_found,
-                        f"Expected device_smart_available=1 for {dev_name}, not found."
-                    )
-
-                # If smart_enabled is true, we expect device_smart_enabled = 1
-                if device_info.get("smart_enabled"):
-                    smart_enabled_found = any(
-                        line.startswith("device_smart_enabled{") and
-                        f'disk="{dev_name}"' in line and
-                        line.endswith(" 1")
-                        for line in metrics
-                    )
-                    self.assertTrue(
-                        smart_enabled_found,
-                        f"Expected device_smart_enabled=1 for {dev_name}, not found."
-                    )
-
-                # device_smart_healthy if assessment in [PASS, WARN, FAIL]
-                # PASS => 1, otherwise => 0
-                assessment = device_info.get("assessment", "").upper()
-                if assessment in ["PASS", "WARN", "FAIL"]:
-                    expected_val = 1 if assessment == "PASS" else 0
-                    smart_healthy_found = any(
-                        line.startswith("device_smart_healthy{") and
-                        f'disk="{dev_name}"' in line and
-                        line.endswith(f" {expected_val}")
-                        for line in metrics
-                    )
-                    self.assertTrue(
-                        smart_healthy_found,
-                        f"Expected device_smart_healthy={expected_val} for {dev_name}, not found."
-                    )
+        # Also ensure that non-numeric or disallowed attributes do not appear
+        # For instance "notInSmartmonAttrs" should never appear.
+        for line in metrics:
+            self.assertNotIn(
+                "not_in_smartmon_attrs",
+                line,
+                f"'notInSmartmonAttrs' attribute unexpectedly found in metric line: {line}"
+            )
 
     def test_parse_if_attributes(self):
         """
         Test parse_if_attributes() for every JSON fixture in ./drives/.
-        We do subTest() so each fixture is tested individually.
+        Each fixture is tested individually with clear error reporting.
         """
         for fixture_path in self.fixture_files:
             fixture_name = os.path.basename(fixture_path)
-            with self.subTest(msg=f"Testing if_attributes with {fixture_name}"):
-                data = load_json_fixture(fixture_name)
-                device_info = data["device_info"]
-                if_attrs = data.get("if_attributes", {})
-
-                device = self.create_mock_device_from_json(device_info, if_attrs)
-                metrics = parse_if_attributes(device)
-
-                dev_name = device_info["name"]
-                dev_iface = device_info["interface"]
-                dev_serial = device_info["serial"].lower()
-
-                # For each numeric attribute in JSON, if it's in SMARTMON_ATTRS,
-                # we expect a line in the script's output.
-                for attr_key, attr_val in if_attrs.items():
-                    # Convert from e.g. "criticalWarning" -> "critical_warning"
-                    snake_key = re.sub(r'(?<!^)(?=[A-Z])', '_', attr_key).lower()
-
-                    if isinstance(attr_val, (int, float)) and snake_key in SMARTMON_ATTRS:
-                        # We expect e.g. critical_warning{disk="/dev/..."} <value>
-                        expected_line = (
-                            f"{snake_key}{{disk=\"{dev_name}\",type=\"{dev_iface}\",serial_number=\"{dev_serial}\"}} {attr_val}"
-                        )
-                        self.assertIn(
-                            expected_line,
-                            metrics,
-                            f"Expected metric '{expected_line}' for attribute '{attr_key}' not found."
-                        )
-                    else:
-                        # If it's not in SMARTMON_ATTRS or not numeric,
-                        # we do NOT expect a line with that name+value
-                        unexpected_line = (
-                            f"{snake_key}{{disk=\"{dev_name}\",type=\"{dev_iface}\",serial_number=\"{dev_serial}\"}} {attr_val}"
-                        )
-                        self.assertNotIn(
-                            unexpected_line,
-                            metrics,
-                            f"Unexpected metric '{unexpected_line}' found for {attr_key}."
-                        )
-
-                # Also ensure that non-numeric or disallowed attributes do not appear
-                # For instance "notInSmartmonAttrs" should never appear.
-                for line in metrics:
-                    self.assertNotIn(
-                        "not_in_smartmon_attrs",
-                        line,
-                        f"'notInSmartmonAttrs' attribute unexpectedly found in metric line: {line}"
-                    )
+            with self.subTest(fixture=fixture_name):
+                self._test_parse_if_attributes(fixture_name)
 
     @patch("smartmon.run_command")
     @patch("smartmon.DeviceList")

From 172e0340ba34ef2647e5d634ec4954b2ceeab78a Mon Sep 17 00:00:00 2001
From: technowhizz <7688823+technowhizz@users.noreply.github.com>
Date: Mon, 24 Mar 2025 01:06:07 +0000
Subject: [PATCH 08/16] Use function for snake case conversion

---
 etc/kayobe/ansible/scripts/test_smartmon.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/etc/kayobe/ansible/scripts/test_smartmon.py b/etc/kayobe/ansible/scripts/test_smartmon.py
index 212e5f063..5727f6a47 100644
--- a/etc/kayobe/ansible/scripts/test_smartmon.py
+++ b/etc/kayobe/ansible/scripts/test_smartmon.py
@@ -10,7 +10,8 @@
     parse_device_info,
     parse_if_attributes,
     main,
-    SMARTMON_ATTRS
+    SMARTMON_ATTRS,
+    camel_to_snake
 )
 
 def load_json_fixture(filename):
@@ -158,8 +159,7 @@ def _test_parse_if_attributes(self, fixture_name):
         # For each numeric attribute in JSON, if it's in SMARTMON_ATTRS,
         # we expect a line in the script's output.
         for attr_key, attr_val in if_attrs.items():
-            # Convert from e.g. "criticalWarning" -> "critical_warning"
-            snake_key = re.sub(r'(?<!^)(?=[A-Z])', '_', attr_key).lower()
+            snake_key = camel_to_snake(attr_key)
 
             if isinstance(attr_val, (int, float)) and snake_key in SMARTMON_ATTRS:
                 # We expect e.g. critical_warning{disk="/dev/..."} <value>

From aad34c174429f1c8513289a8e1f524d3de269251 Mon Sep 17 00:00:00 2001
From: technowhizz <7688823+technowhizz@users.noreply.github.com>
Date: Thu, 10 Apr 2025 13:17:58 +0100
Subject: [PATCH 09/16] Move fixtures to tests folder

---
 etc/kayobe/ansible/scripts/test_smartmon.py        | 14 +++++++-------
 .../ansible/scripts/{drives => tests}/nvme.json    |  0
 2 files changed, 7 insertions(+), 7 deletions(-)
 rename etc/kayobe/ansible/scripts/{drives => tests}/nvme.json (100%)

diff --git a/etc/kayobe/ansible/scripts/test_smartmon.py b/etc/kayobe/ansible/scripts/test_smartmon.py
index 5727f6a47..e131846e0 100644
--- a/etc/kayobe/ansible/scripts/test_smartmon.py
+++ b/etc/kayobe/ansible/scripts/test_smartmon.py
@@ -16,9 +16,9 @@
 
 def load_json_fixture(filename):
     """
-    Load a JSON file from the 'drives' subfolder.
+    Load a JSON file from the 'tests' subfolder.
     """
-    path = os.path.join(os.path.dirname(__file__), "drives", filename)
+    path = os.path.join(os.path.dirname(__file__), "tests", filename)
     with open(path, "r", encoding="utf-8") as f:
         return json.load(f)
 
@@ -26,8 +26,8 @@ def load_json_fixture(filename):
 class TestSmartMon(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        # Collect all *.json files from ./drives/
-        data_folder = os.path.join(os.path.dirname(__file__), "drives")
+        # Collect all *.json files from ./tests/
+        data_folder = os.path.join(os.path.dirname(__file__), "tests")
         cls.fixture_files = glob.glob(os.path.join(data_folder, "*.json"))
 
     def create_mock_device_from_json(self, device_info, if_attributes=None):
@@ -133,7 +133,7 @@ def _test_parse_device_info(self, fixture_name):
 
     def test_parse_device_info(self):
         """
-        Test parse_device_info() for every JSON fixture in ./drives/.
+        Test parse_device_info() for every JSON fixture in ./tests/.
         Each fixture is tested individually with clear error reporting.
         """
         for fixture_path in self.fixture_files:
@@ -194,7 +194,7 @@ def _test_parse_if_attributes(self, fixture_name):
 
     def test_parse_if_attributes(self):
         """
-        Test parse_if_attributes() for every JSON fixture in ./drives/.
+        Test parse_if_attributes() for every JSON fixture in ./tests/.
         Each fixture is tested individually with clear error reporting.
         """
         for fixture_path in self.fixture_files:
@@ -206,7 +206,7 @@ def test_parse_if_attributes(self):
     @patch("smartmon.DeviceList")
     def test_main(self, mock_devicelist_class, mock_run_cmd):
         """
-        End-to-end test of main() for every JSON fixture in ./drives/.
+        End-to-end test of main() for every JSON fixture in ./tests/.
         This ensures we can handle multiple disks (multiple fixture files).
         """
         for fixture_path in self.fixture_files:
diff --git a/etc/kayobe/ansible/scripts/drives/nvme.json b/etc/kayobe/ansible/scripts/tests/nvme.json
similarity index 100%
rename from etc/kayobe/ansible/scripts/drives/nvme.json
rename to etc/kayobe/ansible/scripts/tests/nvme.json

From 44601cabc58a8f38eb48f4ae49297aec9fa70e38 Mon Sep 17 00:00:00 2001
From: technowhizz <7688823+technowhizz@users.noreply.github.com>
Date: Thu, 1 May 2025 14:20:56 +0100
Subject: [PATCH 10/16] Use prometheus_client for writing out metrics

---
 etc/kayobe/ansible/scripts/smartmon.py      |  80 +++++++++----
 etc/kayobe/ansible/scripts/test_smartmon.py | 120 ++++++++++----------
 2 files changed, 117 insertions(+), 83 deletions(-)

diff --git a/etc/kayobe/ansible/scripts/smartmon.py b/etc/kayobe/ansible/scripts/smartmon.py
index 202e6981c..f2dbdde13 100644
--- a/etc/kayobe/ansible/scripts/smartmon.py
+++ b/etc/kayobe/ansible/scripts/smartmon.py
@@ -4,7 +4,9 @@
 import json
 import re
 import datetime
+import os
 
+from prometheus_client import CollectorRegistry, Gauge, write_to_textfile
 from pySMART import DeviceList
 
 SMARTCTL_PATH = "/usr/sbin/smartctl"
@@ -110,21 +112,24 @@ def parse_device_info(device):
         "serial_number": serial_number,
         "firmware_version": device.firmware or "",
     }
-    label_str = ",".join(f'{k}="{v}"' for k, v in labels.items())
+    sorted_labels = sorted(labels.items())
+    label_str = ",".join(f'{k}="{v}"' for k, v in sorted_labels)
+
+    metric_labels = f'disk="{device.name}",serial_number="{serial_number}",type="{device.interface}"'
 
     metrics = [
-        f'device_info{{{label_str}}} 1',
-        f'device_smart_available{{disk="{device.name}",type="{device.interface}",serial_number="{serial_number}"}} {1 if device.smart_capable else 0}',
+        f'device_info{{{label_str}}} 1.0',
+        f'device_smart_available{{{metric_labels}}} {float(1) if device.smart_capable else float(0)}',
     ]
 
     if device.smart_capable:
         metrics.append(
-            f'device_smart_enabled{{disk="{device.name}",type="{device.interface}",serial_number="{serial_number}"}} {1 if device.smart_enabled else 0}'
+            f'device_smart_enabled{{{metric_labels}}} {float(1) if device.smart_enabled else float(0)}'
         )
         if device.assessment:
             is_healthy = 1 if device.assessment.upper() == "PASS" else 0
             metrics.append(
-                f'device_smart_healthy{{disk="{device.name}",type="{device.interface}",serial_number="{serial_number}"}} {is_healthy}'
+                f'device_smart_healthy{{{metric_labels}}} {float(is_healthy)}'
             )
 
     return metrics
@@ -143,7 +148,7 @@ def parse_if_attributes(device):
     disk = device.name
     disk_type = device.interface or ""
     serial_number = (device.serial or "").lower()
-    labels = f'disk="{disk}",type="{disk_type}",serial_number="{serial_number}"'
+    labels = f'disk="{disk}",serial_number="{serial_number}",type="{disk_type}"'
 
     # Inspect all public attributes on device.if_attributes
     for attr_name in dir(device.if_attributes):
@@ -156,27 +161,48 @@ def parse_if_attributes(device):
         snake_name = camel_to_snake(attr_name)
 
         if snake_name in SMARTMON_ATTRS and isinstance(val, (int, float)):
-            metrics.append(f"{snake_name}{{{labels}}} {val}")
+            metrics.append(f"{snake_name}{{{labels}}} {float(val)}")
 
     return metrics
 
-def format_output(metrics):
+def write_metrics_to_textfile(metrics, output_path=None):
     """
-    Convert a list of lines like "some_metric{...} value"
-    into a Prometheus text output with # HELP / # TYPE lines.
+    Write metrics to a Prometheus textfile using prometheus_client.
+    Args:
+        metrics (List[str]): List of metric strings in 'name{labels} value' format.
+        output_path (str): Path to write the metrics file. Defaults to node_exporter textfile collector path.
     """
-    output = []
-    last_metric = ""
-    for metric in sorted(metrics):
-        metric_name = metric.split("{")[0]
-        if metric_name != last_metric:
-            output.append(f"# HELP smartmon_{metric_name} SMART metric {metric_name}")
-            output.append(f"# TYPE smartmon_{metric_name} gauge")
-            last_metric = metric_name
-        output.append(f"smartmon_{metric}")
-    return "\n".join(output)
-
-def main():
+    registry = CollectorRegistry()
+    metric_gauges = {}
+    for metric in metrics:
+        # Split metric into name, labels, and value
+        metric_name, rest = metric.split('{', 1)
+        label_str, value = rest.split('}', 1)
+        value = value.strip()
+        # Parse labels into a dictionary
+        labels = {}
+        label_keys = []
+        label_values = []
+        for label in label_str.split(','):
+            if '=' in label:
+                k, v = label.split('=', 1)
+                k = k.strip()
+                v = v.strip('"')
+                labels[k] = v
+                label_keys.append(k)
+                label_values.append(v)
+        help_str = f"SMART metric {metric_name}"
+        # Create Gauge if not already present
+        if metric_name not in metric_gauges:
+            metric_gauges[metric_name] = Gauge(metric_name, help_str, label_keys, registry=registry)
+        # Set metric value
+        gauge = metric_gauges[metric_name]
+        gauge.labels(*label_values).set(float(value))
+    if output_path is None:
+        output_path = '/var/lib/node_exporter/textfile_collector/smartmon.prom'
+    write_to_textfile(output_path, registry)  # Write all metrics to file
+
+def main(output_path=None):
     all_metrics = []
 
     try:
@@ -197,7 +223,7 @@ def main():
         disk_type = dev.interface or ""
         serial_number = (dev.serial or "").lower()
 
-        run_timestamp = int(datetime.datetime.now(datetime.UTC).timestamp())
+        run_timestamp = int(datetime.datetime.now(datetime.timezone.utc).timestamp())
         all_metrics.append(f'smartctl_run{{disk="{disk_name}",type="{disk_type}"}} {run_timestamp}')
 
         active = 1
@@ -220,7 +246,11 @@ def main():
         all_metrics.extend(parse_device_info(dev))
         all_metrics.extend(parse_if_attributes(dev))
 
-    print(format_output(all_metrics))
+    write_metrics_to_textfile(all_metrics, output_path)
 
 if __name__ == "__main__":
-    main()
+    import argparse
+    parser = argparse.ArgumentParser(description="Export SMART metrics to Prometheus textfile format.")
+    parser.add_argument('--output', type=str, default=None, help='Output path for Prometheus textfile (default: /var/lib/node_exporter/textfile_collector/smartmon.prom)')
+    args = parser.parse_args()
+    main(args.output)
diff --git a/etc/kayobe/ansible/scripts/test_smartmon.py b/etc/kayobe/ansible/scripts/test_smartmon.py
index e131846e0..38bc66e52 100644
--- a/etc/kayobe/ansible/scripts/test_smartmon.py
+++ b/etc/kayobe/ansible/scripts/test_smartmon.py
@@ -1,17 +1,19 @@
 import glob
 import json
 import os
-import re
 import unittest
+import tempfile
+import math
+from time import sleep
 
 from unittest.mock import patch, MagicMock
-
 from smartmon import (
     parse_device_info,
     parse_if_attributes,
     main,
     SMARTMON_ATTRS,
-    camel_to_snake
+    camel_to_snake,
+    write_metrics_to_textfile,
 )
 
 def load_json_fixture(filename):
@@ -75,7 +77,6 @@ def _test_parse_device_info(self, fixture_name):
         dev_serial = device_info["serial"].lower()
 
         # The device_info line should exist for every device
-        # e.g. device_info{disk="/dev/...",type="...",serial_number="..."} 1
         device_info_found = any(
             line.startswith("device_info{") and
             f'disk="{dev_name}"' in line and
@@ -94,12 +95,12 @@ def _test_parse_device_info(self, fixture_name):
                 line.startswith("device_smart_available{") and
                 f'disk="{dev_name}"' in line and
                 f'serial_number="{dev_serial}"' in line and
-                line.endswith(" 1")
+                line.endswith(" 1.0")
                 for line in metrics
             )
             self.assertTrue(
                 smart_available_found,
-                f"Expected device_smart_available=1 for {dev_name}, not found."
+                f"Expected device_smart_available=1.0 for {dev_name}, not found."
             )
 
         # If smart_enabled is true, we expect device_smart_enabled = 1
@@ -107,19 +108,19 @@ def _test_parse_device_info(self, fixture_name):
             smart_enabled_found = any(
                 line.startswith("device_smart_enabled{") and
                 f'disk="{dev_name}"' in line and
-                line.endswith(" 1")
+                line.endswith(" 1.0")
                 for line in metrics
             )
             self.assertTrue(
                 smart_enabled_found,
-                f"Expected device_smart_enabled=1 for {dev_name}, not found."
+                f"Expected device_smart_enabled=1.0 for {dev_name}, not found."
             )
 
         # device_smart_healthy if assessment in [PASS, WARN, FAIL]
         # PASS => 1, otherwise => 0
         assessment = device_info.get("assessment", "").upper()
         if assessment in ["PASS", "WARN", "FAIL"]:
-            expected_val = 1 if assessment == "PASS" else 0
+            expected_val = float(1) if assessment == "PASS" else float(0)
             smart_healthy_found = any(
                 line.startswith("device_smart_healthy{") and
                 f'disk="{dev_name}"' in line and
@@ -162,9 +163,8 @@ def _test_parse_if_attributes(self, fixture_name):
             snake_key = camel_to_snake(attr_key)
 
             if isinstance(attr_val, (int, float)) and snake_key in SMARTMON_ATTRS:
-                # We expect e.g. critical_warning{disk="/dev/..."} <value>
                 expected_line = (
-                    f"{snake_key}{{disk=\"{dev_name}\",type=\"{dev_iface}\",serial_number=\"{dev_serial}\"}} {attr_val}"
+                    f"{snake_key}{{disk=\"{dev_name}\",serial_number=\"{dev_serial}\",type=\"{dev_iface}\"}} {float(attr_val)}"
                 )
                 self.assertIn(
                     expected_line,
@@ -175,7 +175,7 @@ def _test_parse_if_attributes(self, fixture_name):
                 # If it's not in SMARTMON_ATTRS or not numeric,
                 # we do NOT expect a line with that name+value
                 unexpected_line = (
-                    f"{snake_key}{{disk=\"{dev_name}\",type=\"{dev_iface}\",serial_number=\"{dev_serial}\"}} {attr_val}"
+                    f"{snake_key}{{disk=\"{dev_name}\",serial_number=\"{dev_serial}\",type=\"{dev_iface}\"}} {float(attr_val)}"
                 )
                 self.assertNotIn(
                     unexpected_line,
@@ -204,28 +204,32 @@ def test_parse_if_attributes(self):
 
     @patch("smartmon.run_command")
     @patch("smartmon.DeviceList")
-    def test_main(self, mock_devicelist_class, mock_run_cmd):
+    @patch("smartmon.write_metrics_to_textfile", wraps=write_metrics_to_textfile)
+    def test_main(self, mock_write_metrics, mock_devicelist_class, mock_run_cmd):
         """
         End-to-end test of main() for every JSON fixture in ./tests/.
         This ensures we can handle multiple disks (multiple fixture files).
+        Checks metrics written to a temp file, and that write_metrics_to_textfile is called once.
         """
+
+        # Patch run_command to return a version & "active" power_mode
+        def run_command_side_effect(cmd, parse_json=False):
+            if "--version" in cmd:
+                return "smartctl 7.3 5422 [x86_64-linux-5.15.0]\n..."
+            if "-n" in cmd and "standby" in cmd and parse_json:
+                return {"power_mode": "active"}
+            return ""
+
+        mock_run_cmd.side_effect = run_command_side_effect
+
         for fixture_path in self.fixture_files:
             fixture_name = os.path.basename(fixture_path)
             with self.subTest(msg=f"Testing main() with {fixture_name}"):
+                mock_write_metrics.reset_mock()
                 data = load_json_fixture(fixture_name)
                 device_info = data["device_info"]
                 if_attrs = data.get("if_attributes", {})
 
-                # Patch run_command to return a version & "active" power_mode
-                def run_command_side_effect(cmd, parse_json=False):
-                    if "--version" in cmd:
-                        return "smartctl 7.3 5422 [x86_64-linux-5.15.0]\n..."
-                    if "-n" in cmd and "standby" in cmd and parse_json:
-                        return {"power_mode": "active"}
-                    return ""
-
-                mock_run_cmd.side_effect = run_command_side_effect
-
                 # Mock a single device from the fixture
                 device_mock = self.create_mock_device_from_json(device_info, if_attrs)
 
@@ -234,41 +238,41 @@ def run_command_side_effect(cmd, parse_json=False):
                 mock_dev_list.devices = [device_mock]
                 mock_devicelist_class.return_value = mock_dev_list
 
-                with patch("builtins.print") as mock_print:
-                    main()
-
-                    printed_lines = []
-                    for call_args in mock_print.call_args_list:
-                        printed_lines.extend(call_args[0][0].split("\n"))
-                dev_name = device_info["name"]
-                dev_iface = device_info["interface"]
-                dev_serial = device_info["serial"].lower()
-
-                # We expect a line for the run timestamp, e.g.:
-                # smartmon_smartctl_run{disk="/dev/...",type="..."} 1671234567
-                run_line_found = any(
-                    line.startswith("smartmon_smartctl_run{") and
-                    f'disk="{dev_name}"' in line and
-                    f'type="{dev_iface}"' in line
-                    for line in printed_lines
-                )
-                self.assertTrue(
-                    run_line_found,
-                    f"Expected 'smartmon_smartctl_run' metric line for {dev_name} not found."
-                )
-
-                # Because we mocked "power_mode": "active", we expect device_active=1
-                active_line_found = any(
-                    line.startswith("smartmon_device_active{") and
-                    f'disk="{dev_name}"' in line and
-                    f'serial_number="{dev_serial}"' in line and
-                    line.endswith(" 1")
-                    for line in printed_lines
-                )
-                self.assertTrue(
-                    active_line_found,
-                    f"Expected 'device_active{{...}} 1' line for {dev_name} not found."
-                )
+                with tempfile.NamedTemporaryFile(mode="r+", delete_on_close=False) as tmpfile:
+                    path= tmpfile.name
+                    main(output_path=path)
+                    tmpfile.close()
+
+                    # Ensure write_metrics_to_textfile was called once
+                    self.assertEqual(mock_write_metrics.call_count, 1)
+
+                    with open(path, "r") as f:
+                        # Read the metrics from the file
+                        metrics_lines = [line.strip() for line in f.readlines() if line.strip() and not line.startswith('#')]
+                        print(f"Metrics lines: {metrics_lines}")
+
+                # Generate expected metrics using the parse functions
+                expected_metrics = []
+                expected_metrics.extend(parse_device_info(device_mock))
+                expected_metrics.extend(parse_if_attributes(device_mock))
+
+                # Check that all expected metrics are present in the file
+                for expected in expected_metrics:
+                    exp_metric, exp_val_str = expected.rsplit(" ", 1)
+                    exp_val = float(exp_val_str)
+                    found = any(
+                        (exp_metric in line) and
+                        math.isclose(float(line.rsplit(" ", 1)[1]), exp_val)
+                        for line in metrics_lines
+                    )
+                    self.assertTrue(found, f"Expected metric '{expected}' not found")
+
+                # Check that smartctl_version metric is present
+                version_found = any(line.startswith("smartctl_version{") for line in metrics_lines)
+                self.assertTrue(version_found, "Expected 'smartctl_version' metric not found in output file.")
+
+                # Check that the output file is not empty
+                self.assertTrue(metrics_lines, "Metrics output file is empty.")
 
 if __name__ == "__main__":
     unittest.main()

From 783a68c19bbf2d43e4339e139d965136603e8563 Mon Sep 17 00:00:00 2001
From: technowhizz <7688823+technowhizz@users.noreply.github.com>
Date: Thu, 1 May 2025 14:25:40 +0100
Subject: [PATCH 11/16] Add args and returns to doc string for
 parse_ifattributes

---
 etc/kayobe/ansible/scripts/smartmon.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/etc/kayobe/ansible/scripts/smartmon.py b/etc/kayobe/ansible/scripts/smartmon.py
index f2dbdde13..27f735bcc 100644
--- a/etc/kayobe/ansible/scripts/smartmon.py
+++ b/etc/kayobe/ansible/scripts/smartmon.py
@@ -139,6 +139,11 @@ def parse_if_attributes(device):
     For any device type (ATA, NVMe, SCSI, etc.), we read device.if_attributes.
     We'll iterate over its public fields, convert them to snake_case,
     and if it's in SMARTMON_ATTRS and numeric, we produce metrics.
+
+    Args:
+        device (Device): A pySMART Device object with attributes such as name, interface, etc.
+    Returns:
+        List[str]: A list of Prometheus formatted metric strings.
     """
     metrics = []
 

From 4b1fc1f2598245b98530847e635d3a325da9919d Mon Sep 17 00:00:00 2001
From: technowhizz <7688823+technowhizz@users.noreply.github.com>
Date: Thu, 1 May 2025 19:11:36 +0100
Subject: [PATCH 12/16] Ensure metric names start with smartmon

---
 etc/kayobe/ansible/scripts/smartmon.py      | 16 +++++++-------
 etc/kayobe/ansible/scripts/test_smartmon.py | 24 ++++++++++-----------
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/etc/kayobe/ansible/scripts/smartmon.py b/etc/kayobe/ansible/scripts/smartmon.py
index 27f735bcc..033ddbb94 100644
--- a/etc/kayobe/ansible/scripts/smartmon.py
+++ b/etc/kayobe/ansible/scripts/smartmon.py
@@ -118,18 +118,18 @@ def parse_device_info(device):
     metric_labels = f'disk="{device.name}",serial_number="{serial_number}",type="{device.interface}"'
 
     metrics = [
-        f'device_info{{{label_str}}} 1.0',
-        f'device_smart_available{{{metric_labels}}} {float(1) if device.smart_capable else float(0)}',
+        f'smartmon_device_info{{{label_str}}} 1.0',
+        f'smartmon_device_smart_available{{{metric_labels}}} {float(1) if device.smart_capable else float(0)}',
     ]
 
     if device.smart_capable:
         metrics.append(
-            f'device_smart_enabled{{{metric_labels}}} {float(1) if device.smart_enabled else float(0)}'
+            f'smartmon_device_smart_enabled{{{metric_labels}}} {float(1) if device.smart_enabled else float(0)}'
         )
         if device.assessment:
             is_healthy = 1 if device.assessment.upper() == "PASS" else 0
             metrics.append(
-                f'device_smart_healthy{{{metric_labels}}} {float(is_healthy)}'
+                f'smartmon_device_smart_healthy{{{metric_labels}}} {float(is_healthy)}'
             )
 
     return metrics
@@ -166,7 +166,7 @@ def parse_if_attributes(device):
         snake_name = camel_to_snake(attr_name)
 
         if snake_name in SMARTMON_ATTRS and isinstance(val, (int, float)):
-            metrics.append(f"{snake_name}{{{labels}}} {float(val)}")
+            metrics.append(f"smartmon_{snake_name}{{{labels}}} {float(val)}")
 
     return metrics
 
@@ -219,7 +219,7 @@ def main(output_path=None):
             version_num = "unknown"
     except Exception:
         version_num = "unknown"
-    all_metrics.append(f'smartctl_version{{version="{version_num}"}} 1')
+    all_metrics.append(f'smartmon_smartctl_version{{version="{version_num}"}} 1')
 
     dev_list = DeviceList()
 
@@ -229,7 +229,7 @@ def main(output_path=None):
         serial_number = (dev.serial or "").lower()
 
         run_timestamp = int(datetime.datetime.now(datetime.timezone.utc).timestamp())
-        all_metrics.append(f'smartctl_run{{disk="{disk_name}",type="{disk_type}"}} {run_timestamp}')
+        all_metrics.append(f'smartmon_smartctl_run{{disk="{disk_name}",type="{disk_type}"}} {run_timestamp}')
 
         active = 1
         try:
@@ -243,7 +243,7 @@ def main(output_path=None):
             active = 0
 
         all_metrics.append(
-            f'device_active{{disk="{disk_name}",type="{disk_type}",serial_number="{serial_number}"}} {active}'
+            f'smartmon_device_active{{disk="{disk_name}",type="{disk_type}",serial_number="{serial_number}"}} {active}'
         )
         if active == 0:
             continue
diff --git a/etc/kayobe/ansible/scripts/test_smartmon.py b/etc/kayobe/ansible/scripts/test_smartmon.py
index 38bc66e52..4749808a5 100644
--- a/etc/kayobe/ansible/scripts/test_smartmon.py
+++ b/etc/kayobe/ansible/scripts/test_smartmon.py
@@ -78,7 +78,7 @@ def _test_parse_device_info(self, fixture_name):
 
         # The device_info line should exist for every device
         device_info_found = any(
-            line.startswith("device_info{") and
+            line.startswith("smartmon_device_info{") and
             f'disk="{dev_name}"' in line and
             f'type="{dev_iface}"' in line and
             f'serial_number="{dev_serial}"' in line
@@ -86,13 +86,13 @@ def _test_parse_device_info(self, fixture_name):
         )
         self.assertTrue(
             device_info_found,
-            f"Expected a device_info metric line for {dev_name} but didn't find it."
+            f"Expected a smartmon_device_info metric line for {dev_name} but didn't find it."
         )
 
         # If smart_capable is true, we expect device_smart_available = 1
         if device_info.get("smart_capable"):
             smart_available_found = any(
-                line.startswith("device_smart_available{") and
+                line.startswith("smartmon_device_smart_available{") and
                 f'disk="{dev_name}"' in line and
                 f'serial_number="{dev_serial}"' in line and
                 line.endswith(" 1.0")
@@ -100,20 +100,20 @@ def _test_parse_device_info(self, fixture_name):
             )
             self.assertTrue(
                 smart_available_found,
-                f"Expected device_smart_available=1.0 for {dev_name}, not found."
+                f"Expected smartmon_device_smart_available=1.0 for {dev_name}, not found."
             )
 
         # If smart_enabled is true, we expect device_smart_enabled = 1
         if device_info.get("smart_enabled"):
             smart_enabled_found = any(
-                line.startswith("device_smart_enabled{") and
+                line.startswith("smartmon_device_smart_enabled{") and
                 f'disk="{dev_name}"' in line and
                 line.endswith(" 1.0")
                 for line in metrics
             )
             self.assertTrue(
                 smart_enabled_found,
-                f"Expected device_smart_enabled=1.0 for {dev_name}, not found."
+                f"Expected smartmon_device_smart_enabled=1.0 for {dev_name}, not found."
             )
 
         # device_smart_healthy if assessment in [PASS, WARN, FAIL]
@@ -122,14 +122,14 @@ def _test_parse_device_info(self, fixture_name):
         if assessment in ["PASS", "WARN", "FAIL"]:
             expected_val = float(1) if assessment == "PASS" else float(0)
             smart_healthy_found = any(
-                line.startswith("device_smart_healthy{") and
+                line.startswith("smartmon_device_smart_healthy{") and
                 f'disk="{dev_name}"' in line and
                 line.endswith(f" {expected_val}")
                 for line in metrics
             )
             self.assertTrue(
                 smart_healthy_found,
-                f"Expected device_smart_healthy={expected_val} for {dev_name}, not found."
+                f"Expected smartmon_device_smart_healthy={expected_val} for {dev_name}, not found."
             )
 
     def test_parse_device_info(self):
@@ -164,7 +164,7 @@ def _test_parse_if_attributes(self, fixture_name):
 
             if isinstance(attr_val, (int, float)) and snake_key in SMARTMON_ATTRS:
                 expected_line = (
-                    f"{snake_key}{{disk=\"{dev_name}\",serial_number=\"{dev_serial}\",type=\"{dev_iface}\"}} {float(attr_val)}"
+                    f"smartmon_{snake_key}{{disk=\"{dev_name}\",serial_number=\"{dev_serial}\",type=\"{dev_iface}\"}} {float(attr_val)}"
                 )
                 self.assertIn(
                     expected_line,
@@ -175,7 +175,7 @@ def _test_parse_if_attributes(self, fixture_name):
                 # If it's not in SMARTMON_ATTRS or not numeric,
                 # we do NOT expect a line with that name+value
                 unexpected_line = (
-                    f"{snake_key}{{disk=\"{dev_name}\",serial_number=\"{dev_serial}\",type=\"{dev_iface}\"}} {float(attr_val)}"
+                    f"smartmon_{snake_key}{{disk=\"{dev_name}\",serial_number=\"{dev_serial}\",type=\"{dev_iface}\"}} {float(attr_val)}"
                 )
                 self.assertNotIn(
                     unexpected_line,
@@ -268,8 +268,8 @@ def run_command_side_effect(cmd, parse_json=False):
                     self.assertTrue(found, f"Expected metric '{expected}' not found")
 
                 # Check that smartctl_version metric is present
-                version_found = any(line.startswith("smartctl_version{") for line in metrics_lines)
-                self.assertTrue(version_found, "Expected 'smartctl_version' metric not found in output file.")
+                version_found = any(line.startswith("smartmon_smartctl_version{") for line in metrics_lines)
+                self.assertTrue(version_found, "Expected 'smartmon_smartctl_version' metric not found in output file.")
 
                 # Check that the output file is not empty
                 self.assertTrue(metrics_lines, "Metrics output file is empty.")

From 1bc57332572f78b6f9f5a19bc4574976c2af9069 Mon Sep 17 00:00:00 2001
From: technowhizz <7688823+technowhizz@users.noreply.github.com>
Date: Thu, 1 May 2025 19:17:03 +0100
Subject: [PATCH 13/16] Update smartmon playbook for smartmon.py

---
 etc/kayobe/ansible/smartmon-tools.yml | 63 +++++++++++++++++++++++----
 1 file changed, 55 insertions(+), 8 deletions(-)

diff --git a/etc/kayobe/ansible/smartmon-tools.yml b/etc/kayobe/ansible/smartmon-tools.yml
index 00cdfa495..351ce0325 100644
--- a/etc/kayobe/ansible/smartmon-tools.yml
+++ b/etc/kayobe/ansible/smartmon-tools.yml
@@ -13,6 +13,30 @@
         state: present
       become: true
 
+    - name: Ensure Python 3, venv, and pip are installed
+      ansible.builtin.package:
+        name:
+          - python3
+          - python3-venv
+          - python3-pip
+        state: present
+      become: true
+
+    - name: Create smartmon Python virtual environment
+      ansible.builtin.command:
+        cmd: python3 -m venv /opt/smartmon-venv
+        creates: /opt/smartmon-venv/bin/activate
+      become: true
+
+    - name: Install prometheus_client and pySMART in venv
+      ansible.builtin.pip:
+        name:
+          - prometheus_client
+          - pySMART
+        virtualenv: /opt/smartmon-venv
+        virtualenv_python: python3
+      become: true
+
     - name: Ensure the cron/crond service is running
       ansible.builtin.service:
         name: "{{ 'cron' if ansible_facts['distribution'] == 'Ubuntu' else 'crond' }}"
@@ -20,15 +44,15 @@
         enabled: true
       become: true
 
-    - name: Copy smartmon.sh and nvmemon.sh from scripts folder
+    - name: Copy smartmon.py and nvmemon.sh from scripts folder
       ansible.builtin.copy:
         src: scripts/{{ item }}
-        dest: /usr/local/bin/
+        dest: /usr/local/bin/{{ item }}
         owner: root
         group: root
         mode: "0700"
       loop:
-        - smartmon.sh
+        - smartmon.py
         - nvmemon.sh
       become: true
 
@@ -40,16 +64,39 @@
         job: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
       become: true
 
-    - name: Schedule cronjob to run both scripts every 5 minutes and save output to file
+    - name: Schedule cronjob to run smartmon.py every 5 minutes and save output to file
       ansible.builtin.cron:
-        name: SMART metrics for drive monitoring using {{ item }}
+        name: SMART metrics for drive monitoring using smartmon.py
+        user: root
+        minute: "*/5"
+        job: >-
+          umask 0022 && /opt/smartmon-venv/bin/python /usr/local/bin/smartmon.py --output /var/lib/docker/volumes/textfile/_data/smartmon.prom.temp &&
+          mv -f /var/lib/docker/volumes/textfile/_data/smartmon.prom.temp /var/lib/docker/volumes/textfile/_data/smartmon.prom
+      become: true
+
+    - name: Schedule cronjob to run nvmemon.sh every 5 minutes and save output to file
+      ansible.builtin.cron:
+        name: SMART metrics for drive monitoring using nvmemon.sh
         user: root
         minute: "*/5"
         job: >-
-          umask 0022 && /usr/local/bin/{{ item }}.sh >
-          /var/lib/docker/volumes/textfile/_data/{{ item }}.prom.temp &&
-          mv -f /var/lib/docker/volumes/textfile/_data/{{ item }}.prom.temp /var/lib/docker/volumes/textfile/_data/{{ item }}.prom
+          umask 0022 && /usr/local/bin/nvmemon.sh >
+          /var/lib/docker/volumes/textfile/_data/nvmemon.prom.temp &&
+          mv -f /var/lib/docker/volumes/textfile/_data/nvmemon.prom.temp /var/lib/docker/volumes/textfile/_data/nvmemon.prom
+      become: true
+
+    - name: Remove old cronjobs if present
+      ansible.builtin.cron:
+        name: SMART metrics for drive monitoring using {{ item }}
+        user: root
+        state: absent
+      become: true
       loop:
         - smartmon
         - nvmemon
+
+    - name: Remove old smartmon.sh if present
+      ansible.builtin.file:
+        path: /usr/local/bin/smartmon.sh
+        state: absent
       become: true

From df8c944eed7305c62e5afb373a51a43865b0a161 Mon Sep 17 00:00:00 2001
From: technowhizz <7688823+technowhizz@users.noreply.github.com>
Date: Thu, 1 May 2025 19:17:36 +0100
Subject: [PATCH 14/16] Add python script to generate test fixtures from real
 drives

---
 .../ansible/scripts/generate_fixtures.py      | 118 ++++++++++++++++++
 1 file changed, 118 insertions(+)
 create mode 100644 etc/kayobe/ansible/scripts/generate_fixtures.py

diff --git a/etc/kayobe/ansible/scripts/generate_fixtures.py b/etc/kayobe/ansible/scripts/generate_fixtures.py
new file mode 100644
index 000000000..5f8f7cc64
--- /dev/null
+++ b/etc/kayobe/ansible/scripts/generate_fixtures.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+import json
+import re
+from pySMART import DeviceList
+
+SMARTMON_ATTRS = {
+    "airflow_temperature_cel",
+    "command_timeout",
+    "current_pending_sector",
+    "end_to_end_error",
+    "erase_fail_count",
+    "g_sense_error_rate",
+    "hardware_ecc_recovered",
+    "host_reads_32mib",
+    "host_reads_mib",
+    "host_writes_32mib",
+    "host_writes_mib",
+    "load_cycle_count",
+    "media_wearout_indicator",
+    "nand_writes_1gib",
+    "offline_uncorrectable",
+    "power_cycle_count",
+    "power_on_hours",
+    "program_fail_cnt_total",
+    "program_fail_count",
+    "raw_read_error_rate",
+    "reallocated_event_count",
+    "reallocated_sector_ct",
+    "reported_uncorrect",
+    "runtime_bad_block",
+    "sata_downshift_count",
+    "seek_error_rate",
+    "spin_retry_count",
+    "spin_up_time",
+    "start_stop_count",
+    "temperature_case",
+    "temperature_celsius",
+    "temperature_internal",
+    "total_lbas_read",
+    "total_lbas_written",
+    "udma_crc_error_count",
+    "unsafe_shutdown_count",
+    "unused_rsvd_blk_cnt_tot",
+    "wear_leveling_count",
+    "workld_host_reads_perc",
+    "workld_media_wear_indic",
+    "workload_minutes",
+    "critical_warning",
+    "temperature",
+    "available_spare",
+    "available_spare_threshold",
+    "percentage_used",
+    "data_units_read",
+    "data_units_written",
+    "host_reads",
+    "host_writes",
+    "controller_busy_time",
+    "power_cycles",
+    "unsafe_shutdowns",
+    "media_errors",
+    "num_err_log_entries",
+    "warning_temp_time",
+    "critical_comp_time",
+}
+
+DISK_INFO = {
+    "name",
+    "interface",
+    "vendor",
+    "family",
+    "model",
+    "serial",
+    "firmware",
+    "smart_capable",
+    "smart_enabled",
+    "assessment",
+}
+
+def camel_to_snake(name):
+    """
+    Convert a CamelCase string to snake_case.
+
+    Reference: https://stackoverflow.com/questions/1175208/elegant-python-function-to-convert-camelcase-to-snake-case
+    """
+    return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()
+
+def attrs_to_dict(obj, allowed_keys):
+    """
+    Build {attr: value} for every public, non-callable attribute whose
+    snake_case name is in `allowed_keys`.
+    """
+    attributes = {}
+    for name in dir(obj):
+        if name.startswith('_'):
+            continue
+        try:
+            value = getattr(obj, name)
+        except Exception:
+            continue
+        if value is None:
+            continue
+        if callable(value):
+            continue
+        if camel_to_snake(name) in allowed_keys:
+            attributes[name] = value
+    return attributes
+
+for disk in DeviceList().devices:
+
+    fixtures = {}
+    disk_info = attrs_to_dict(disk, DISK_INFO)
+    if_stats = attrs_to_dict(disk.if_attributes, SMARTMON_ATTRS)
+
+    fixtures["device_info"] = disk_info
+    fixtures["if_attributes"] = if_stats
+
+    print(f'Disk: {disk.name}: \n')
+    print(json.dumps(fixtures, indent=2, default=str))

From 6447df07b9ba0f2cf9af93b8f251bd083c893142 Mon Sep 17 00:00:00 2001
From: technowhizz <7688823+technowhizz@users.noreply.github.com>
Date: Thu, 1 May 2025 19:20:22 +0100
Subject: [PATCH 15/16] Remove smartmon.sh

---
 etc/kayobe/ansible/scripts/smartmon.sh | 203 -------------------------
 1 file changed, 203 deletions(-)
 delete mode 100644 etc/kayobe/ansible/scripts/smartmon.sh

diff --git a/etc/kayobe/ansible/scripts/smartmon.sh b/etc/kayobe/ansible/scripts/smartmon.sh
deleted file mode 100644
index c08c46e60..000000000
--- a/etc/kayobe/ansible/scripts/smartmon.sh
+++ /dev/null
@@ -1,203 +0,0 @@
-#!/bin/bash
-# Script informed by the collectd monitoring script for smartmontools (using smartctl)
-# by Samuel B. <samuel_._behan_(at)_dob_._sk> (c) 2012
-# source at: http://devel.dob.sk/collectd-scripts/
-
-# TODO: This probably needs to be a little more complex.  The raw numbers can have more
-#       data in them than you'd think.
-#       http://arstechnica.com/civis/viewtopic.php?p=22062211
-
-# Formatting done via shfmt -i 2
-# https://github.com/mvdan/sh
-
-parse_smartctl_attributes_awk="$(
-  cat <<'SMARTCTLAWK'
-$1 ~ /^ *[0-9]+$/ && $2 ~ /^[a-zA-Z0-9_-]+$/ {
-  gsub(/-/, "_");
-  printf "%s_value{%s,smart_id=\"%s\"} %d\n", $2, labels, $1, $4
-  printf "%s_worst{%s,smart_id=\"%s\"} %d\n", $2, labels, $1, $5
-  printf "%s_threshold{%s,smart_id=\"%s\"} %d\n", $2, labels, $1, $6
-  printf "%s_raw_value{%s,smart_id=\"%s\"} %e\n", $2, labels, $1, $10
-}
-SMARTCTLAWK
-)"
-
-smartmon_attrs="$(
-  cat <<'SMARTMONATTRS'
-airflow_temperature_cel
-command_timeout
-current_pending_sector
-end_to_end_error
-erase_fail_count
-g_sense_error_rate
-hardware_ecc_recovered
-host_reads_32mib
-host_reads_mib
-host_writes_32mib
-host_writes_mib
-load_cycle_count
-media_wearout_indicator
-nand_writes_1gib
-offline_uncorrectable
-power_cycle_count
-power_on_hours
-program_fail_cnt_total
-program_fail_count
-raw_read_error_rate
-reallocated_event_count
-reallocated_sector_ct
-reported_uncorrect
-runtime_bad_block
-sata_downshift_count
-seek_error_rate
-spin_retry_count
-spin_up_time
-start_stop_count
-temperature_case
-temperature_celsius
-temperature_internal
-total_lbas_read
-total_lbas_written
-udma_crc_error_count
-unsafe_shutdown_count
-unused_rsvd_blk_cnt_tot
-wear_leveling_count
-workld_host_reads_perc
-workld_media_wear_indic
-workload_minutes
-SMARTMONATTRS
-)"
-smartmon_attrs="$(echo "${smartmon_attrs}" | xargs | tr ' ' '|')"
-
-parse_smartctl_attributes() {
-  local disk="$1"
-  local disk_type="$2"
-  local serial="$3"
-  local labels="disk=\"${disk}\",type=\"${disk_type}\",serial_number=\"${serial}\""
-  sed 's/^ \+//g' |
-    awk -v labels="${labels}" "${parse_smartctl_attributes_awk}" 2>/dev/null |
-    tr '[:upper:]' '[:lower:]' |
-    grep -E "(${smartmon_attrs})"
-}
-
-parse_smartctl_scsi_attributes() {
-  local disk="$1"
-  local disk_type="$2"
-  local serial="$3"
-  local labels="disk=\"${disk}\",type=\"${disk_type}\",serial_number=\"${serial}\""
-  while read -r line; do
-    attr_type="$(echo "${line}" | tr '=' ':' | cut -f1 -d: | sed 's/^ \+//g' | tr ' ' '_')"
-    attr_value="$(echo "${line}" | tr '=' ':' | cut -f2 -d: | sed 's/^ \+//g')"
-    case "${attr_type}" in
-    number_of_hours_powered_up_) power_on="$(echo "${attr_value}" | awk '{ printf "%e\n", $1 }')" ;;
-    Current_Drive_Temperature) temp_cel="$(echo "${attr_value}" | cut -f1 -d' ' | awk '{ printf "%e\n", $1 }')" ;;
-    Blocks_sent_to_initiator_) lbas_read="$(echo "${attr_value}" | awk '{ printf "%e\n", $1 }')" ;;
-    Blocks_received_from_initiator_) lbas_written="$(echo "${attr_value}" | awk '{ printf "%e\n", $1 }')" ;;
-    Accumulated_start-stop_cycles) power_cycle="$(echo "${attr_value}" | awk '{ printf "%e\n", $1 }')" ;;
-    Elements_in_grown_defect_list) grown_defects="$(echo "${attr_value}" | awk '{ printf "%e\n", $1 }')" ;;
-    esac
-  done
-  [ -n "$power_on" ] && echo "power_on_hours_raw_value{${labels},smart_id=\"9\"} ${power_on}"
-  [ -n "$temp_cel" ] && echo "temperature_celsius_raw_value{${labels},smart_id=\"194\"} ${temp_cel}"
-  [ -n "$lbas_read" ] && echo "total_lbas_read_raw_value{${labels},smart_id=\"242\"} ${lbas_read}"
-  [ -n "$lbas_written" ] && echo "total_lbas_written_raw_value{${labels},smart_id=\"241\"} ${lbas_written}"
-  [ -n "$power_cycle" ] && echo "power_cycle_count_raw_value{${labels},smart_id=\"12\"} ${power_cycle}"
-  [ -n "$grown_defects" ] && echo "grown_defects_count_raw_value{${labels},smart_id=\"-1\"} ${grown_defects}"
-}
-
-parse_smartctl_info() {
-  shopt -s nocasematch
-  local -i smart_available=0 smart_enabled=0 smart_healthy=
-  local disk="$1" disk_type="$2"
-  local model_family='' device_model='' serial_number='' fw_version='' vendor='' product='' revision='' lun_id=''
-  while read -r line; do
-    info_type="$(echo "${line}" | cut -f1 -d: | tr ' ' '_')"
-    info_value="$(echo "${line}" | cut -f2- -d: | sed 's/^ \+//g' | sed 's/"/\\"/')"
-    case "${info_type}" in
-    Model_Family) model_family="${info_value}" ;;
-    Device_Model) device_model="${info_value}" ;;
-    Serial_Number) serial_number="$(echo ${info_value} | tr '[:upper:]' '[:lower:]')" ;;
-    Firmware_Version) fw_version="${info_value}" ;;
-    Vendor) vendor="${info_value}" ;;
-    Product) product="${info_value}" ;;
-    Revision) revision="${info_value}" ;;
-    Logical_Unit_id) lun_id="${info_value}" ;;
-    esac
-    if [[ "${info_type}" == 'SMART_support_is' ]]; then
-      case "${info_value:0:7}" in
-      Enabled) smart_available=1; smart_enabled=1 ;;
-      Availab) smart_available=1; smart_enabled=0 ;;
-      Unavail) smart_available=0; smart_enabled=0 ;;
-      esac
-    fi
-    if [[ "${info_type}" == 'SMART_overall-health_self-assessment_test_result' ]]; then
-      case "${info_value:0:6}" in
-      PASSED) smart_healthy=1 ;;
-      *) smart_healthy=0 ;;
-      esac
-    elif [[ "${info_type}" == 'SMART_Health_Status' ]]; then
-      case "${info_value:0:2}" in
-      OK) smart_healthy=1 ;;
-      *) smart_healthy=0 ;;
-      esac
-    fi
-  done
-  echo "device_info{disk=\"${disk}\",type=\"${disk_type}\",vendor=\"${vendor}\",product=\"${product}\",revision=\"${revision}\",lun_id=\"${lun_id}\",model_family=\"${model_family}\",device_model=\"${device_model}\",serial_number=\"${serial_number}\",firmware_version=\"${fw_version}\"} 1"
-  echo "device_smart_available{disk=\"${disk}\",type=\"${disk_type}\",serial_number=\"${serial_number}\"} ${smart_available}"
-  [[ "${smart_available}" == "1" ]] && echo "device_smart_enabled{disk=\"${disk}\",type=\"${disk_type}\",serial_number=\"${serial_number}\"} ${smart_enabled}"
-  [[ "${smart_available}" == "1" ]] && [[ "${smart_healthy}" != "" ]] && echo "device_smart_healthy{disk=\"${disk}\",type=\"${disk_type}\",serial_number=\"${serial_number}\"} ${smart_healthy}"
-}
-
-output_format_awk="$(
-  cat <<'OUTPUTAWK'
-BEGIN { v = "" }
-v != $1 {
-  print "# HELP smartmon_" $1 " SMART metric " $1;
-  print "# TYPE smartmon_" $1 " gauge";
-  v = $1
-}
-{print "smartmon_" $0}
-OUTPUTAWK
-)"
-
-format_output() {
-  sort |
-    awk -F'{' "${output_format_awk}"
-}
-
-smartctl_version="$(/usr/sbin/smartctl -V | head -n1 | awk '$1 == "smartctl" {print $2}')"
-
-echo "smartctl_version{version=\"${smartctl_version}\"} 1" | format_output
-
-if [[ "$(expr "${smartctl_version}" : '\([0-9]*\)\..*')" -lt 6 ]]; then
-  exit
-fi
-
-device_list="$(/usr/sbin/smartctl --scan-open | awk '/^\/dev/{print $1 "|" $3}')"
-
-for device in ${device_list}; do
-  disk="$(echo "${device}" | cut -f1 -d'|')"
-  type="$(echo "${device}" | cut -f2 -d'|')"
-  # Use REGEX to extract the serial number from the parsed information and save that to a variable
-  serial_number="$(/usr/sbin/smartctl -i -H -d "${type}" "${disk}" | parse_smartctl_info "${disk}" "${type}"| sed -E ':a;N;$!ba;s/.*serial_number=\"([^"]+)\".*/\1/g' | sed -E 's/^device_info\{.*//g')"
-  active=1
-  echo "smartctl_run{disk=\"${disk}\",type=\"${type}\"}" "$(TZ=UTC date '+%s')"
-  # Check if the device is in a low-power mode
-  /usr/sbin/smartctl -n standby -d "${type}" "${disk}" > /dev/null || active=0
-  echo "device_active{disk=\"${disk}\",type=\"${type}\"}" "${active}"
-  # Skip further metrics to prevent the disk from spinning up
-  test ${active} -eq 0 && continue
-  # Get the SMART information and health
-  /usr/sbin/smartctl -i -H -d "${type}" "${disk}" | parse_smartctl_info "${disk}" "${type}"
-  # Get the SMART attributes
-  case ${type} in
-  sat) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_attributes "${disk}" "${type}" "${serial_number}" ;;
-  sat+megaraid*) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_attributes "${disk}" "${type}" "${serial_number}" ;;
-  scsi) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_scsi_attributes "${disk}" "${type}" "${serial_number}" ;;
-  megaraid*) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_scsi_attributes "${disk}" "${type}" "${serial_number}" ;;
-  *)
-      (>&2 echo "disk type is not sat, scsi or megaraid but ${type}")
-    exit
-    ;;
-  esac
-done | format_output

From 5142d7905d27ff356b824fb19d6f1090e1c63ca1 Mon Sep 17 00:00:00 2001
From: technowhizz <7688823+technowhizz@users.noreply.github.com>
Date: Thu, 1 May 2025 19:20:57 +0100
Subject: [PATCH 16/16] Add test fixture for Dell ENT NVMe

---
 .../scripts/tests/Dell_ENT_NVMe_CM6.json      | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 etc/kayobe/ansible/scripts/tests/Dell_ENT_NVMe_CM6.json

diff --git a/etc/kayobe/ansible/scripts/tests/Dell_ENT_NVMe_CM6.json b/etc/kayobe/ansible/scripts/tests/Dell_ENT_NVMe_CM6.json
new file mode 100644
index 000000000..d867910ae
--- /dev/null
+++ b/etc/kayobe/ansible/scripts/tests/Dell_ENT_NVMe_CM6.json
@@ -0,0 +1,26 @@
+{
+  "device_info": {
+    "assessment": "PASS",
+    "firmware": "2.1.8",
+    "interface": "nvme",
+    "model": "Dell Ent NVMe CM6 RI 7.68TB",
+    "name": "nvme8",
+    "serial": "Y2Q0A0BPTCF8",
+    "smart_capable": true,
+    "smart_enabled": true,
+    "vendor": "Dell"
+  },
+  "if_attributes": {
+    "availableSpare": 100,
+    "availableSpareThreshold": 10,
+    "controllerBusyTime": 2478,
+    "criticalWarning": 0,
+    "dataUnitsRead": 177817765,
+    "dataUnitsWritten": 127992843,
+    "percentageUsed": 1,
+    "powerCycles": 750,
+    "powerOnHours": 17427,
+    "temperature": 36,
+    "unsafeShutdowns": 37
+  }
+}