Skip to content

Commit

Permalink
Feat: Raise RAPL permission error with informative message (#111)
Browse files Browse the repository at this point in the history
Co-authored-by: Jae-Won Chung <[email protected]>
  • Loading branch information
wbjin and jaywonchung authored Aug 26, 2024
1 parent 8430a42 commit 6a1342b
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 8 deletions.
1 change: 0 additions & 1 deletion capriccio/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
python capriccio/generate.py /path/to/sentiment140.json
"""


import argparse
import os
import warnings
Expand Down
14 changes: 8 additions & 6 deletions tests/test_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ def getTotalEnergyConsumption(self):
"""Returns the total energy consumption of the specified powerzone. Units: mJ."""
return CpuDramMeasurement(
cpu_mj=float(next(self.cpu_energy)),
dram_mj=float(next(self.dram_energy))
if self.dram_energy is not None
else None,
dram_mj=(
float(next(self.dram_energy)) if self.dram_energy is not None else None
),
)

def supportsGetDramEnergyConsumption(self):
Expand Down Expand Up @@ -281,9 +281,11 @@ def assert_window_begin(name: str, begin_time: int):
for i in range(len(monitor.cpu_indices))
}
assert monitor.measurement_states[name].dram_energy == {
i: pytest.approx((200 + 5 * (begin_time - 4)) / 1000.0)
if i % 2 == 0
else None
i: (
pytest.approx((200 + 5 * (begin_time - 4)) / 1000.0)
if i % 2 == 0
else None
)
for i in range(0, len(monitor.cpu_indices), 2)
}
pynvml_mock.nvmlDeviceGetTotalEnergyConsumption.assert_has_calls(
Expand Down
12 changes: 12 additions & 0 deletions zeus/device/cpu/rapl.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,14 @@ def __init__(self, message: str) -> None:
super().__init__(message)


class ZeusRAPLPermissionError(ZeusBaseCPUError):
"""Zeus GPU exception that wraps No Permission to perform GPU operation."""

def __init__(self, message: str) -> None:
"""Intialize the exception object."""
super().__init__(message)


class RAPLFile:
"""RAPL File class for each RAPL file.
Expand All @@ -169,6 +177,10 @@ def __init__(self, path: str) -> None:
self.last_energy = float(energy_file.read().strip())
except FileNotFoundError as err:
raise ZeusRAPLFileInitError("Error reading package energy") from err
except PermissionError as err:
raise cpu_common.ZeusCPUNoPermissionError(
"Can't read file due to permission error"
) from err
try:
with open(
os.path.join(path, "max_energy_range_uj"), "r"
Expand Down
10 changes: 9 additions & 1 deletion zeus/monitor/energy.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from zeus.utils.framework import sync_execution as sync_execution_fn
from zeus.device import get_gpus, get_cpus
from zeus.device.gpu.common import ZeusGPUInitError, EmptyGPUs
from zeus.device.cpu.common import ZeusCPUInitError, EmptyCPUs
from zeus.device.cpu.common import ZeusCPUInitError, ZeusCPUNoPermissionError, EmptyCPUs

logger = get_logger(__name__)

Expand Down Expand Up @@ -186,6 +186,14 @@ def __init__(
self.cpus = get_cpus()
except ZeusCPUInitError:
self.cpus = EmptyCPUs()
except ZeusCPUNoPermissionError as err:
if cpu_indices:
raise RuntimeError(
"Root privilege is required to read RAPL metrics. See "
"https://ml.energy/zeus/getting_started/#system-privileges "
"for more information or disable CPU measurement by passing cpu_indices=[] to "
"ZeusMonitor"
) from err

# Resolve GPU indices. If the user did not specify `gpu_indices`, use all available GPUs.
self.gpu_indices = (
Expand Down

0 comments on commit 6a1342b

Please sign in to comment.