Add Site report for GPUs (#27)

CHTC · Oct 14, 2021 · 4095013 · 4095013
1 parent 1440029
commit 4095013
Showing 1 changed file with 88 additions and 92 deletions.
diff --git a/accounting/filters/ChtcScheddGpuFilter.py b/accounting/filters/ChtcScheddGpuFilter.py
@@ -291,19 +291,14 @@ def site_filter(self, data, doc):
             return
 
         # Get output dict for this site
-        site = i.get("MachineAttrGLIDEIN_ResourceName0", "UNKNOWN") or "UNKNOWN"
+        site = i.get("LastRemoteHost", "UNKNOWN") or "UNKNOWN"
+        site = site.split("@")[-1]
         o = data["Site"][site]
 
         # Add custom attrs to the list of attrs
         filter_attrs = DEFAULT_FILTER_ATTRS.copy()
         filter_attrs = filter_attrs + ["User"]
 
-        # Count number of DAGNode Jobs
-        if i.get("DAGNodeName") is not None and i.get("JobUniverse")!=12:
-            o["_NumDAGNodes"].append(1)
-        else:
-            o["_NumDAGNodes"].append(0)
-
         # Count number of history ads (i.e. number of unique job ids)
         o["_NumJobs"].append(1)
 
@@ -316,6 +311,7 @@ def get_filters(self):
         filters = [
             self.schedd_filter,
             self.user_filter,
+            self.site_filter,
         ]
         return filters
 
@@ -328,7 +324,7 @@ def add_custom_columns(self, agg):
             columns[5] = "Num Users"
         if agg == "Site":
             columns[5] = "Num Users"
-            rm_columns = [30,45,50,70,80,90,300,305,310,320,330,340,350,370,380,390]
+            rm_columns = [30,35,45,50,70,80,90,180,181,190,191,300,303,305,307,310,320,330,340,350,390]
             [columns.pop(key) for key in rm_columns]
         return columns
 
@@ -341,90 +337,6 @@ def merge_filtered_data(self, data, agg):
         return rows
 
 
-    def compute_site_custom_columns(self, data, agg, agg_name):
-
-        # Output dictionary
-        row = {}
-
-        # Compute goodput and total CPU hours columns
-        goodput_cpu_time = []
-        for (goodput_time, cpus) in zip(
-                data["CommittedTime"],
-                data["RequestCpus"]):
-            if None in [goodput_time, cpus]:
-                goodput_cpu_time.append(None)
-            else:
-                goodput_cpu_time.append(goodput_time * cpus)
-
-        # Short jobs are jobs that ran for < 1 minute
-        is_short_job = []
-        for (goodput_time, record_date, start_date) in zip(
-                data["CommittedTime"],
-                data["RecordTime"],
-                data["JobCurrentStartDate"]):
-            if (goodput_time is not None) and (goodput_time > 0):
-                is_short_job.append(goodput_time < 60)
-            elif None in (record_date, start_date):
-                is_short_job.append(None)
-            else:
-                is_short_job.append((record_date - start_date) < 60)
-
-        # "Long" (i.e. "normal") jobs ran >= 1 minute
-        # We only want to use these when computing percentiles,
-        # so filter out short jobs and removed jobs,
-        # and sort them so we can easily grab the percentiles later
-        long_times_sorted = []
-        for (is_short, goodput_time) in zip(
-                is_short_job,
-                data["CommittedTime"]):
-            if (is_short == False):
-                long_times_sorted.append(goodput_time)
-        long_times_sorted = self.clean(long_times_sorted)
-        long_times_sorted.sort()
-
-        # Compute columns
-        row["All CPU Hours"]   = sum(self.clean(goodput_cpu_time)) / 3600
-        row["Num Uniq Job Ids"] = sum(data['_NumJobs'])
-        row["Avg MB Sent"]      = stats.mean(self.clean(data["BytesSent"], allow_empty_list=False)) / 1e6
-        row["Max MB Sent"]      = max(self.clean(data["BytesSent"], allow_empty_list=False)) / 1e6
-        row["Avg MB Recv"]      = stats.mean(self.clean(data["BytesRecvd"], allow_empty_list=False)) / 1e6
-        row["Max MB Recv"]      = max(self.clean(data["BytesRecvd"], allow_empty_list=False)) / 1e6
-        row["Num Short Jobs"]   = sum(self.clean(is_short_job))
-        row["Max Rqst Mem MB"]  = max(self.clean(data['RequestMemory'], allow_empty_list=False))
-        row["Med Used Mem MB"]  = stats.median(self.clean(data["MemoryUsage"], allow_empty_list=False))
-        row["Max Used Mem MB"]  = max(self.clean(data["MemoryUsage"], allow_empty_list=False))
-        row["Max Rqst Cpus"]    = max(self.clean(data["RequestCpus"], allow_empty_list=False))
-        row["Num Users"] = len(set(data["User"]))
-
-        if row["Num Uniq Job Ids"] > 0:
-            row["% Short Jobs"] = 100 * row["Num Short Jobs"] / row["Num Uniq Job Ids"]
-        else:
-            row["% Short Jobs"] = 0
-
-        # Compute time percentiles and stats
-        if len(long_times_sorted) > 0:
-            row["Min Hrs"]  = long_times_sorted[ 0] / 3600
-            row["25% Hrs"]  = long_times_sorted[  len(long_times_sorted)//4] / 3600
-            row["Med Hrs"]  = stats.median(long_times_sorted) / 3600
-            row["75% Hrs"]  = long_times_sorted[3*len(long_times_sorted)//4] / 3600
-            row["95% Hrs"]  = long_times_sorted[int(0.95*len(long_times_sorted))] / 3600
-            row["Max Hrs"]  = long_times_sorted[-1] / 3600
-            row["Mean Hrs"] = stats.mean(long_times_sorted) / 3600
-        else:
-            for col in [f"{x} Hrs" for x in ["Min", "25%", "Med", "75%", "95%", "Max", "Mean"]]:
-                row[col] = 0
-
-        if len(long_times_sorted) > 1:
-            row["Std Hrs"] = stats.stdev(long_times_sorted) / 3600
-        else:
-            # There is no variance if there is only one value
-            row["Std Hrs"] = 0
-
-        # Compute mode for Project and Schedd columns in the Users table
-        row["Num Users"] = len(set(data["User"]))
-
-        return row
-
     def compute_custom_columns(self, data, agg, agg_name):
 
         if agg == "Site":
@@ -587,3 +499,87 @@ def compute_custom_columns(self, data, agg, agg_name):
             row["Num Users"] = len(set(data["User"]))  
 
         return row 
+
+
+    def compute_site_custom_columns(self, data, agg, agg_name):
+
+        # Output dictionary
+        row = {}
+
+        # Compute goodput and total CPU hours columns
+        goodput_cpu_time = []
+        goodput_gpu_time = []
+        for (goodput_time, cpus, gpus) in zip(
+                data["CommittedTime"],
+                data["RequestCpus"],
+                data["RequestGpus"]):
+            if None in [goodput_time, cpus, gpus]:
+                goodput_cpu_time.append(None)
+                goodput_gpu_time.append(None)
+            else:
+                goodput_cpu_time.append(goodput_time * cpus)
+                goodput_gpu_time.append(goodput_time * cpus)
+
+        # Short jobs are jobs that ran for < 1 minute
+        is_short_job = []
+        for (goodput_time, record_date, start_date) in zip(
+                data["CommittedTime"],
+                data["RecordTime"],
+                data["JobCurrentStartDate"]):
+            if (goodput_time is not None) and (goodput_time > 0):
+                is_short_job.append(goodput_time < 60)
+            elif None in (record_date, start_date):
+                is_short_job.append(None)
+            else:
+                is_short_job.append((record_date - start_date) < 60)
+
+        # "Long" (i.e. "normal") jobs ran >= 1 minute
+        # We only want to use these when computing percentiles,
+        # so filter out short jobs and removed jobs,
+        # and sort them so we can easily grab the percentiles later
+        long_times_sorted = []
+        for (is_short, goodput_time) in zip(
+                is_short_job,
+                data["CommittedTime"]):
+            if (is_short == False):
+                long_times_sorted.append(goodput_time)
+        long_times_sorted = self.clean(long_times_sorted)
+        long_times_sorted.sort()
+
+        # Compute columns
+        row["All CPU Hours"]    = sum(self.clean(goodput_cpu_time)) / 3600
+        row["All GPU Hours"]    = sum(self.clean(goodput_gpu_time)) / 3600
+        row["Num Uniq Job Ids"] = sum(data['_NumJobs'])
+        row["Num Short Jobs"]   = sum(self.clean(is_short_job))
+        row["Max Rqst Mem MB"]  = max(self.clean(data['RequestMemory'], allow_empty_list=False))
+        row["Med Used Mem MB"]  = stats.median(self.clean(data["MemoryUsage"], allow_empty_list=False))
+        row["Max Used Mem MB"]  = max(self.clean(data["MemoryUsage"], allow_empty_list=False))
+        row["Max Rqst Cpus"]    = max(self.clean(data["RequestCpus"], allow_empty_list=False))
+        row["Max Rqst Gpus"]    = max(self.clean(data["RequestGpus"], allow_empty_list=False))
+        row["Num Users"]        = len(set(data["User"]))
+
+        if row["Num Uniq Job Ids"] > 0:
+            row["% Short Jobs"] = 100 * row["Num Short Jobs"] / row["Num Uniq Job Ids"]
+        else:
+            row["% Short Jobs"] = 0
+
+        # Compute time percentiles and stats
+        if len(long_times_sorted) > 0:
+            row["Min Hrs"]  = long_times_sorted[ 0] / 3600
+            row["25% Hrs"]  = long_times_sorted[  len(long_times_sorted)//4] / 3600
+            row["Med Hrs"]  = stats.median(long_times_sorted) / 3600
+            row["75% Hrs"]  = long_times_sorted[3*len(long_times_sorted)//4] / 3600
+            row["95% Hrs"]  = long_times_sorted[int(0.95*len(long_times_sorted))] / 3600
+            row["Max Hrs"]  = long_times_sorted[-1] / 3600
+            row["Mean Hrs"] = stats.mean(long_times_sorted) / 3600
+        else:
+            for col in [f"{x} Hrs" for x in ["Min", "25%", "Med", "75%", "95%", "Max", "Mean"]]:
+                row[col] = 0
+
+        if len(long_times_sorted) > 1:
+            row["Std Hrs"] = stats.stdev(long_times_sorted) / 3600
+        else:
+            # There is no variance if there is only one value
+            row["Std Hrs"] = 0
+
+        return row