From 14400292065b909194cd8898a45085add6411285 Mon Sep 17 00:00:00 2001 From: jasoncpatton Date: Thu, 14 Oct 2021 08:35:51 -0500 Subject: [PATCH] Report number of singularity jobs (#26) --- accounting/filters/OsgScheddCpuFilter.py | 31 ++++++++++++------- .../formatters/OsgScheddCpuFormatter.py | 5 ++- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/accounting/filters/OsgScheddCpuFilter.py b/accounting/filters/OsgScheddCpuFilter.py index d16ade5..fec4df7 100644 --- a/accounting/filters/OsgScheddCpuFilter.py +++ b/accounting/filters/OsgScheddCpuFilter.py @@ -14,7 +14,8 @@ 60: "% Short Jobs", 70: "% Jobs w/>1 Exec Att", 80: "% Jobs w/1+ Holds", - + 82: "% Jobs using S'ty", + 85: "Shadw Starts / Job Id", 90: "Exec Atts / Shadw Start", 95: "Holds / Job Id", @@ -52,6 +53,7 @@ 360: "Num Short Jobs", 370: "Num Local Univ Jobs", 380: "Num Sched Univ Jobs", + 390: "Num S'ty Jobs", } @@ -73,12 +75,13 @@ "BytesRecvd", "TransferInputFilesCount", "TransferOutputFilesCount", + "SingularityImage", ] class OsgScheddCpuFilter(BaseFilter): name = "OSG schedd job history" - + def __init__(self, **kwargs): self.collector_host = "flock.opensciencegrid.org" self.schedd_collector_host_map = {} @@ -122,7 +125,7 @@ def schedd_filter(self, data, doc): schedd = i.get("ScheddName", "UNKNOWN") or "UNKNOWN" o = data["Schedds"][schedd] - # Filter out jobs that did not run in the OS pool + # Filter out jobs that did not run in the OS pool if i.get("LastRemotePool", self.schedd_collector_host(schedd)) != self.collector_host: return @@ -184,7 +187,7 @@ def user_filter(self, data, doc): o["_NumDAGNodes"].append(1) else: o["_NumDAGNodes"].append(0) - + # Count number of history ads (i.e. number of unique job ids) o["_NumJobs"].append(1) @@ -265,7 +268,7 @@ def project_filter(self, data, doc): for attr in filter_attrs: o[attr].append(i.get(attr, None)) - + def site_filter(self, data, doc): # Get input dict @@ -328,7 +331,7 @@ def add_custom_columns(self, agg): rm_columns = [30,50,70,80,85,90,95,180,181,182,190,191,192,300,305,310,320,325,330,340,350,355,370,380] [columns.pop(key) for key in rm_columns] return columns - + def merge_filtered_data(self, data, agg): rows = super().merge_filtered_data(data, agg) if agg == "Site": @@ -380,19 +383,22 @@ def compute_site_custom_columns(self, data, agg, agg_name): long_times_sorted.sort() # Compute columns - row["All CPU Hours"] = sum(self.clean(goodput_cpu_time)) / 3600 + row["All CPU Hours"] = sum(self.clean(goodput_cpu_time)) / 3600 row["Num Uniq Job Ids"] = sum(data['_NumJobs']) row["Num Short Jobs"] = sum(self.clean(is_short_job)) row["Max Rqst Mem MB"] = max(self.clean(data['RequestMemory'], allow_empty_list=False)) row["Med Used Mem MB"] = stats.median(self.clean(data["MemoryUsage"], allow_empty_list=False)) row["Max Used Mem MB"] = max(self.clean(data["MemoryUsage"], allow_empty_list=False)) row["Max Rqst Cpus"] = max(self.clean(data["RequestCpus"], allow_empty_list=False)) - row["Num Users"] = len(set(data["User"])) - + row["Num Users"] = len(set(data["User"])) + row["Num S'ty Jobs"] = len(self.clean(data["SingularityImage"])) + if row["Num Uniq Job Ids"] > 0: row["% Short Jobs"] = 100 * row["Num Short Jobs"] / row["Num Uniq Job Ids"] + row["% Jobs using S'ty"] = 100 * row["Num S'ty Jobs"] / row["Num Uniq Job Ids"] else: row["% Short Jobs"] = 0 + row["% Jobs using S'ty"] = 0 # Compute time percentiles and stats if len(long_times_sorted) > 0: @@ -506,6 +512,7 @@ def compute_custom_columns(self, data, agg, agg_name): row["Num Shadw Starts"] = sum(self.clean(num_shadow_starts)) row["Num Local Univ Jobs"] = sum(data["_NumLocalUnivJobs"]) row["Num Sched Univ Jobs"] = sum(data["_NumSchedulerUnivJobs"]) + row["Num S'ty Jobs"] = len(self.clean(data["SingularityImage"])) # Compute derivative columns if row["All CPU Hours"] > 0: @@ -519,6 +526,7 @@ def compute_custom_columns(self, data, agg, agg_name): row["% Short Jobs"] = 100 * row["Num Short Jobs"] / row["Num Uniq Job Ids"] row["% Jobs w/>1 Exec Att"] = 100 * row["Num Jobs w/>1 Exec Att"] / row["Num Uniq Job Ids"] row["% Jobs w/1+ Holds"] = 100 * row["Num Jobs w/1+ Holds"] / row["Num Uniq Job Ids"] + row["% Jobs using S'ty"] = 100 * row["Num S'ty Jobs"] / row["Num Uniq Job Ids"] else: row["Shadw Starts / Job Id"] = 0 row["Holds / Job Id"] = 0 @@ -526,6 +534,7 @@ def compute_custom_columns(self, data, agg, agg_name): row["% Short Jobs"] = 0 row["% Jobs w/>1 Exec Att"] = 0 row["% Jobs w/1+ Holds"] = 0 + row["% Jobs using S'ty"] = 0 if row["Num Shadw Starts"] > 0: row["Exec Atts / Shadw Start"] = row["Num Exec Atts"] / row["Num Shadw Starts"] else: @@ -606,6 +615,6 @@ def compute_custom_columns(self, data, agg, agg_name): else: row["Most Used Schedd"] = "UNKNOWN" if agg == "Projects": - row["Num Users"] = len(set(data["User"])) + row["Num Users"] = len(set(data["User"])) - return row + return row diff --git a/accounting/formatters/OsgScheddCpuFormatter.py b/accounting/formatters/OsgScheddCpuFormatter.py index 4999ba7..2bea628 100644 --- a/accounting/formatters/OsgScheddCpuFormatter.py +++ b/accounting/formatters/OsgScheddCpuFormatter.py @@ -78,6 +78,7 @@ def format_rows(self, header, rows, custom_fmts={}, default_text_fmt=None, defau "% Short Jobs": lambda x: f"{float(x):.1f}", "% Jobs w/>1 Exec Att": lambda x: f"{float(x):.1f}", "% Jobs w/1+ Holds": lambda x: f"{float(x):.1f}", + "% Jobs using S'ty": lambda x: f"{float(x):.1f}", "Input Files Xferd / Exec Att": lambda x: f"{float(x):.1f}", "Input MB Xferd / Exec Att": lambda x: f"{float(x):.1f}", "Input MB / File": lambda x: f"{float(x):.1f}", @@ -100,6 +101,7 @@ def get_legend(self): custom_items["% Short Jobs"] = "Percent of Num Uniq Job Ids that were short jobs" custom_items["% Jobs w/>1 Exec Att"] = "Percent of Num Uniq Job Ids that had more than one execution attempt" custom_items["% Jobs w/1+ Holds"] = "Percent of Num Uniq Job Ids that had one or more jobs go on hold" + custom_items["% Jobs using S'ty"] = "Percent of Num Uniq Job Ids that are using a Singularity image" custom_items["Shadw Starts / Job Id"] = "Num Shadw Starts per Num Uniq Job Ids" custom_items["Exec Atts / Shadw Start"] = "Num Exec Atts per Num Shadw Starts" @@ -107,9 +109,6 @@ def get_legend(self): custom_items["Min/25%/Median/75%/Max/Mean/Std Hrs"] = "Final execution wallclock hours that a non-short job (Min-Max) or jobs (Mean/Std) ran for (excluding Short jobs, excluding Local and Scheduler Universe jobs)" - custom_items["Avg MB Sent"] = "Mean MB sent to a job sandbox from a submit point" - custom_items["Avg MB Recv"] = "Mean MB sent to a submit point from a job sandbox" - custom_items["Med Used Mem MB"] = "Median measured memory usage across all submitted jobs' last execution attempts in MB" custom_items["CPU Hours / Bad Exec Att"] = "Average CPU Hours used in a non-final execution attempt"