Skip to content

Commit

Permalink
Report number of singularity jobs (#26)
Browse files Browse the repository at this point in the history
  • Loading branch information
jasoncpatton authored Oct 14, 2021
1 parent bd0365d commit 1440029
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 14 deletions.
31 changes: 20 additions & 11 deletions accounting/filters/OsgScheddCpuFilter.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
60: "% Short Jobs",
70: "% Jobs w/>1 Exec Att",
80: "% Jobs w/1+ Holds",

82: "% Jobs using S'ty",

85: "Shadw Starts / Job Id",
90: "Exec Atts / Shadw Start",
95: "Holds / Job Id",
Expand Down Expand Up @@ -52,6 +53,7 @@
360: "Num Short Jobs",
370: "Num Local Univ Jobs",
380: "Num Sched Univ Jobs",
390: "Num S'ty Jobs",
}


Expand All @@ -73,12 +75,13 @@
"BytesRecvd",
"TransferInputFilesCount",
"TransferOutputFilesCount",
"SingularityImage",
]


class OsgScheddCpuFilter(BaseFilter):
name = "OSG schedd job history"

def __init__(self, **kwargs):
self.collector_host = "flock.opensciencegrid.org"
self.schedd_collector_host_map = {}
Expand Down Expand Up @@ -122,7 +125,7 @@ def schedd_filter(self, data, doc):
schedd = i.get("ScheddName", "UNKNOWN") or "UNKNOWN"
o = data["Schedds"][schedd]

# Filter out jobs that did not run in the OS pool
# Filter out jobs that did not run in the OS pool
if i.get("LastRemotePool", self.schedd_collector_host(schedd)) != self.collector_host:
return

Expand Down Expand Up @@ -184,7 +187,7 @@ def user_filter(self, data, doc):
o["_NumDAGNodes"].append(1)
else:
o["_NumDAGNodes"].append(0)

# Count number of history ads (i.e. number of unique job ids)
o["_NumJobs"].append(1)

Expand Down Expand Up @@ -265,7 +268,7 @@ def project_filter(self, data, doc):
for attr in filter_attrs:
o[attr].append(i.get(attr, None))


def site_filter(self, data, doc):

# Get input dict
Expand Down Expand Up @@ -328,7 +331,7 @@ def add_custom_columns(self, agg):
rm_columns = [30,50,70,80,85,90,95,180,181,182,190,191,192,300,305,310,320,325,330,340,350,355,370,380]
[columns.pop(key) for key in rm_columns]
return columns

def merge_filtered_data(self, data, agg):
rows = super().merge_filtered_data(data, agg)
if agg == "Site":
Expand Down Expand Up @@ -380,19 +383,22 @@ def compute_site_custom_columns(self, data, agg, agg_name):
long_times_sorted.sort()

# Compute columns
row["All CPU Hours"] = sum(self.clean(goodput_cpu_time)) / 3600
row["All CPU Hours"] = sum(self.clean(goodput_cpu_time)) / 3600
row["Num Uniq Job Ids"] = sum(data['_NumJobs'])
row["Num Short Jobs"] = sum(self.clean(is_short_job))
row["Max Rqst Mem MB"] = max(self.clean(data['RequestMemory'], allow_empty_list=False))
row["Med Used Mem MB"] = stats.median(self.clean(data["MemoryUsage"], allow_empty_list=False))
row["Max Used Mem MB"] = max(self.clean(data["MemoryUsage"], allow_empty_list=False))
row["Max Rqst Cpus"] = max(self.clean(data["RequestCpus"], allow_empty_list=False))
row["Num Users"] = len(set(data["User"]))

row["Num Users"] = len(set(data["User"]))
row["Num S'ty Jobs"] = len(self.clean(data["SingularityImage"]))

if row["Num Uniq Job Ids"] > 0:
row["% Short Jobs"] = 100 * row["Num Short Jobs"] / row["Num Uniq Job Ids"]
row["% Jobs using S'ty"] = 100 * row["Num S'ty Jobs"] / row["Num Uniq Job Ids"]
else:
row["% Short Jobs"] = 0
row["% Jobs using S'ty"] = 0

# Compute time percentiles and stats
if len(long_times_sorted) > 0:
Expand Down Expand Up @@ -506,6 +512,7 @@ def compute_custom_columns(self, data, agg, agg_name):
row["Num Shadw Starts"] = sum(self.clean(num_shadow_starts))
row["Num Local Univ Jobs"] = sum(data["_NumLocalUnivJobs"])
row["Num Sched Univ Jobs"] = sum(data["_NumSchedulerUnivJobs"])
row["Num S'ty Jobs"] = len(self.clean(data["SingularityImage"]))

# Compute derivative columns
if row["All CPU Hours"] > 0:
Expand All @@ -519,13 +526,15 @@ def compute_custom_columns(self, data, agg, agg_name):
row["% Short Jobs"] = 100 * row["Num Short Jobs"] / row["Num Uniq Job Ids"]
row["% Jobs w/>1 Exec Att"] = 100 * row["Num Jobs w/>1 Exec Att"] / row["Num Uniq Job Ids"]
row["% Jobs w/1+ Holds"] = 100 * row["Num Jobs w/1+ Holds"] / row["Num Uniq Job Ids"]
row["% Jobs using S'ty"] = 100 * row["Num S'ty Jobs"] / row["Num Uniq Job Ids"]
else:
row["Shadw Starts / Job Id"] = 0
row["Holds / Job Id"] = 0
row["% Rm'd Jobs"] = 0
row["% Short Jobs"] = 0
row["% Jobs w/>1 Exec Att"] = 0
row["% Jobs w/1+ Holds"] = 0
row["% Jobs using S'ty"] = 0
if row["Num Shadw Starts"] > 0:
row["Exec Atts / Shadw Start"] = row["Num Exec Atts"] / row["Num Shadw Starts"]
else:
Expand Down Expand Up @@ -606,6 +615,6 @@ def compute_custom_columns(self, data, agg, agg_name):
else:
row["Most Used Schedd"] = "UNKNOWN"
if agg == "Projects":
row["Num Users"] = len(set(data["User"]))
row["Num Users"] = len(set(data["User"]))

return row
return row
5 changes: 2 additions & 3 deletions accounting/formatters/OsgScheddCpuFormatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def format_rows(self, header, rows, custom_fmts={}, default_text_fmt=None, defau
"% Short Jobs": lambda x: f"<td>{float(x):.1f}</td>",
"% Jobs w/>1 Exec Att": lambda x: f"<td>{float(x):.1f}</td>",
"% Jobs w/1+ Holds": lambda x: f"<td>{float(x):.1f}</td>",
"% Jobs using S'ty": lambda x: f"<td>{float(x):.1f}</td>",
"Input Files Xferd / Exec Att": lambda x: f"<td>{float(x):.1f}</td>",
"Input MB Xferd / Exec Att": lambda x: f"<td>{float(x):.1f}</td>",
"Input MB / File": lambda x: f"<td>{float(x):.1f}</td>",
Expand All @@ -100,16 +101,14 @@ def get_legend(self):
custom_items["% Short Jobs"] = "Percent of Num Uniq Job Ids that were short jobs"
custom_items["% Jobs w/>1 Exec Att"] = "Percent of Num Uniq Job Ids that had more than one execution attempt"
custom_items["% Jobs w/1+ Holds"] = "Percent of Num Uniq Job Ids that had one or more jobs go on hold"
custom_items["% Jobs using S'ty"] = "Percent of Num Uniq Job Ids that are using a Singularity image"

custom_items["Shadw Starts / Job Id"] = "Num Shadw Starts per Num Uniq Job Ids"
custom_items["Exec Atts / Shadw Start"] = "Num Exec Atts per Num Shadw Starts"
custom_items["Holds / Job Id"] = "Num Job Holds per Num Uniq Job Ids"

custom_items["Min/25%/Median/75%/Max/Mean/Std Hrs"] = "Final execution wallclock hours that a non-short job (Min-Max) or jobs (Mean/Std) ran for (excluding Short jobs, excluding Local and Scheduler Universe jobs)"

custom_items["Avg MB Sent"] = "Mean MB sent to a job sandbox from a submit point"
custom_items["Avg MB Recv"] = "Mean MB sent to a submit point from a job sandbox"

custom_items["Med Used Mem MB"] = "Median measured memory usage across all submitted jobs' last execution attempts in MB"

custom_items["CPU Hours / Bad Exec Att"] = "Average CPU Hours used in a non-final execution attempt"
Expand Down

0 comments on commit 1440029

Please sign in to comment.