Merge Pull Request #7377 from jjellio/Trilinos/build_stats

Automatically Merged using Trilinos Pull Request AutoTester PR Title: Provide per-file build statistics for Trilinos and others... PR Author: jjellio
trilinos · May 15, 2020 · c664636 · c664636
2 parents 669b52d + d987e76
commit c664636
Show file tree

Hide file tree

Showing 5 changed files with 418 additions and 0 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -69,6 +69,9 @@ INCLUDE(${CMAKE_SOURCE_DIR}/ProjectName.cmake)
 # not in an include file :-(
 PROJECT(${PROJECT_NAME} NONE)
 
+## set an env so we know we are in configure
+set(ENV{CMAKE_IS_IN_CONFIGURE_MODE} 1)
+
 #
 # B) Pull in the TriBITS system and execute
 #

diff --git a/commonTools/build_stats/wrapper/NMParser.py b/commonTools/build_stats/wrapper/NMParser.py
@@ -0,0 +1,80 @@
+"""
+
+Note:
+Try to by python2 and python3 compliant
+"""
+import subprocess # spawning nm
+import re         # re matching
+import os         # line seperator
+
+class NMParser:
+  """Simple NM parser that"""
+
+  # the values are
+  nm_option_csv_map = {
+    'N' : 'symbol_debug',
+    'p' : 'symbol_stack_unwind',
+    'R' : 'symbol_ro_data_global',
+    'r' : 'symbol_ro_data_local',
+    'T' : 'symbol_text_global',
+    't' : 'symbol_text_local',
+    'u' : 'symbol_unique_global',
+  }
+
+  nm_option_desc_map = {
+    'N' : 'debugging symbol',
+    'p' : 'stack unwind section',
+    'R' : 'read only global data',
+    'r' : 'read only local data',
+    'T' : 'global text section',
+    't' : 'local text section',
+    'u' : 'unique global symbol',
+  }
+
+  nm_re_type_expr = ''.join(nm_option_desc_map)
+  nm_re_str = r'^[a-zA-Z0-9]+\s+(?P<size_hex>[a-zA-Z0-9]{2,})\s+(?P<type>[' + nm_re_type_expr + '])\s+'
+  nm_re = re.compile(nm_re_str)
+
+  @staticmethod
+  def parse_object(filename):
+    """
+      Simple NM parsing of an object file
+      Given an object file, we call nm -aS file
+
+      Next, we parse stdout and match symbol lines corresponding to types
+      from nm_option_desc_map.
+
+      Data are aggregated into a dict using the keys from nm_option_desc_map
+
+      The keys are obtained from nm_option_desc_map and enforced inside the regex used
+      See nm_re_type_expr, nm_re_str, and nm_re in the static fields of this class
+    """
+    p = subprocess.Popen(['nm', '-aS', filename],
+                         stdout=subprocess.PIPE)
+    output = p.communicate()[0]
+
+    nm_counts = dict()
+
+    for line in output.split(os.linesep):
+      m = NMParser.nm_re.match(line)
+      if m:
+        nm_counts[m.group('type')] = nm_counts.get(m.group('type'), 0) + 1
+    # return what we found
+    return nm_counts
+
+  @staticmethod
+  def print_counts(nm_counts,
+                   cvs_line=False,
+                   csv_header=False):
+    for k,v in nm_counts.items():
+      print("\"{key}\",{value}".format(key=NMParser.nm_option_desc_map[k],
+                                       value=v))
+  @staticmethod
+  def get_csv_map (nm_counts):
+    # create a map of the form: csv_header_str : value
+    # loop over the csv_map, which will guarantee we always return the same columns.
+    # otherwise, looping over nm_counts will only return csv columns found in this specific file
+    # , while the wrapper needs consistent output from all files parsed
+    csv_map = { v : nm_counts.get(k,0) for k,v in NMParser.nm_option_csv_map.items() }
+    return csv_map
+
diff --git a/commonTools/build_stats/wrapper/WrapperCommandLineParser.py b/commonTools/build_stats/wrapper/WrapperCommandLineParser.py
@@ -0,0 +1,88 @@
+import os
+import sys
+
+class WrapperCommandLineParser:
+  """
+    Commandline parsing find any wrapper args, determine any output names
+  """
+  def __init__(self, cmdline_args):
+    # if we write anything out it goes here
+    self.output_stats_file = ''
+    # if op generates an output file (-o ...)
+    self.op_output_file = ''
+    # if we perform an operation this is it
+    self.op = ''
+    # whether to gather and print a csv_banner
+    self.print_csv_banner = False
+    # whatever the op's args should be
+    self.op_args = []
+    self.parse_cmdline_args(cmdline_args)
+
+  def __repr__(self):
+    return self.lcl_print()
+
+  def __str__(self):
+    return self.lcl_print()
+
+  def lcl_print(self):
+    fmt_string = [
+      'output_stats_file : {output_stats_file}',
+      'op : {op}',
+      'op_output_file : {op_output_file}',
+      'print_csv_banner : {print_csv_banner}',
+
+    ]
+    return '\n'.join(fmt_string).format(
+                  output_stats_file=self.output_stats_file,
+                  op_output_file=self.op_output_file,
+                  op=self.op,
+                  print_csv_banner=self.print_csv_banner)
+
+
+  def parse_cmdline_args(self, cmdline_args):
+    wrapper_header_arg = '----get_header'
+    wrapper_op_arg_prefix = '----op='
+    print_csv_header=False
+    have_op=False
+    # require that any wrapper arg be the first
+    try:
+      wrapper_arg = cmdline_args[1]
+      if wrapper_arg == wrapper_header_arg:
+        self.print_csv_banner=True
+      elif wrapper_arg.startswith(wrapper_op_arg_prefix):
+        self.op = wrapper_arg.split('=', 1)[1]
+        # find the output arg (will raise an exception if not found)
+        # we use -o blah.o or -o /path/to/blah.o or none at all
+        # we name the output as: blah.o.op.timing
+        # this will result in blah.ar.timing, blah.mpicc.timing blah.ld.timing...
+        short_op = os.path.basename(self.op)
+        self.output_stats_file = short_op + '.timing'
+        try:
+          output_idx = cmdline_args.index('-o')
+          self.op_output_file = cmdline_args[output_idx+1]
+          self.output_stats_file = self.op_output_file + '.' + self.output_stats_file
+        except:
+          pass
+
+      else:
+        raise Exception('unparseable arguments')
+
+      # remove the first 2 args (script name + wrapper arg)
+      self.op_args = cmdline_args[2:]
+
+    except:
+      # any error and we give up
+      help_msg = ["Compiler wrapper:",
+                  "  Usage: wrapper ----op=<compiler> [args] | ----get_header",
+                  "",
+                  "   ----op=/path/to/compiler",
+                  "   path to the compiler we are wrapping",
+                  "   ----get_header",
+                  "   may not be combined with ----op, prints the csv_header generated",
+                  "",
+                  "  Tool depends on finding a -o <output> option in args",
+                  "  statistics will be written to <output>.timing",
+                  ]
+      print('\n'.join(help_msg))
+      sys.exit(0)
+
diff --git a/commonTools/build_stats/wrapper/WrapperOpTimer.py b/commonTools/build_stats/wrapper/WrapperOpTimer.py
@@ -0,0 +1,185 @@
+import subprocess
+import csv
+import os
+
+class WrapperOpTimer:
+  # the values are
+  usr_bin_time_csv_map = {
+    "E":
+      "elapsed_real_time_fmt",
+    "e":
+      "elapsed_real_time_sec",
+    "S":
+      "cpu_sec_kernel_mode",
+    "U":
+      "cpu_sec_user_mode",
+    "P":
+      "perc_cpu_used",
+    "M":
+      "max_resident_size_Kb",
+    "t":
+      "avg_resident_size_Kb",
+    "K":
+      "avg_total_memory_used_Kb",
+    "D":
+      "avg_size_unshared_data_area_Kb",
+    "p":
+      "avg_size_unshared_stack_area_Kb",
+    "X":
+      "avg_size_unshared_text_area_Kb",
+    "Z":
+      'page_size_bytes',
+    "F":
+      "num_major_page_faults",
+    "R":
+      "num_minor_page_faults",
+    "W":
+      "num_swapped",
+    "c":
+      "num_involuntary_context_switch",
+    "w":
+      "num_waits",
+    "I":
+      "num_filesystem_inputs",
+    "O":
+      "num_filesystem_outputs",
+    "r":
+      "num_socket_msg_recv",
+    "s":
+      "num_socket_msg_sent",
+    "k":
+      "num_signals",
+    "x":
+      "exit_status",
+  }
+
+  usr_bin_time_desc_map = {
+    "E":
+      "Elapsed real time ([h:]m:s)",
+    "e":
+      "Elapsed real time (s)",
+    "S":
+      "Total number of CPU-seconds that the process spent in kernel mode",
+    "U":
+      "Total number of CPU-seconds that the process spent in user mode",
+    "P":
+      "Percentage of the CPU that this job got",
+    "M":
+      "Maximum resident set size of the process during its lifetime (Kb)",
+    "t":
+      "(Not in tcsh.) Average resident set size of the process (Kb)",
+    "K":
+      "Average total (data+stack+text) memory use of the process (Kb)",
+    "D":
+      "Average size of unshared data area (Kb)",
+    "p":
+      "Average size of unshared stack space (Kb)",
+    "X":
+      "Average size of shared text space (Kb)",
+    "Z":
+      "System page size (bytes)",
+    "F":
+      "Number of major page faults",
+    "R":
+      "Number of minor or recoverable page faults",
+    "W":
+      "Number of times the process was swapped out of main memory",
+    "c":
+      "Number of times the process was context-switched involuntarily",
+    "w":
+      "Number of waits",
+    "I":
+      "Number of file system inputs by the process",
+    "O":
+      "Number of file system outputs by the process",
+    "r":
+      "Number of socket messages received by the process",
+    "s":
+      "Number of socket messages sent by the process",
+    "k":
+      "Number of signals delivered to the process",
+    "x":
+      "(Not in tcsh.) Exit status of the command",
+  }
+
+  default_fields = [
+    "e",
+    "M",
+    "K",
+    "D",
+    "X",
+    "F",
+    "R",
+    "W",
+    "w",
+    "c",
+    "S",
+    "U",
+    "P",
+    "I",
+    "O",
+    "r",
+    "s",
+    "k",
+    "x",
+    ]
+
+  field_header_full = ','.join([ usr_bin_time_csv_map[f] for f in default_fields ])
+  field_header_short = ','.join(default_fields)
+  field_arg = '--format=' + field_header_full + '\n' + ','.join([ '%{}'.format(f) for f in default_fields] )
+
+  @staticmethod
+  def time_op(op,
+              op_output_file,
+              output_stats_file,
+              op_args):
+    """
+      evaluate 'op' with 'op_args', and gather stats into output_stats_file
+    """
+    cmd = [
+            '/usr/bin/time',
+            # '--append',
+            '--output=' + output_stats_file,
+            WrapperOpTimer.field_arg,
+           op ] + op_args
+
+    # print(' '.join(cmd))
+    p = subprocess.Popen(cmd)
+    p.communicate()
+
+    # initializing the titles and rows list
+    fields = []
+    csv_row = {}
+
+    # reading csv file
+    with open(output_stats_file, 'r') as csvfile:
+      # creating a csv reader object
+      csvreader = csv.reader(csvfile)
+
+      # extracting field names through first row
+      fields = next(csvreader)
+
+      # extracting each data row one by one
+      # we effectively retain on the last row.
+      # it isn't clear if we should expect multiple rows per file
+      for row in csvreader:
+        csv_row = dict(zip(fields, row))
+
+    # markup the output
+    csv_row['FileSize'] = WrapperOpTimer.get_file_size(op_output_file)
+    csv_row['FileName'] = op_output_file
+
+    return csv_row
+
+
+  # returns the file size in bytes
+  @staticmethod
+  def get_file_size(filename):
+    sz = -1
+    try:
+      sz = os.stat(filename).st_size
+    except:
+      pass
+    return sz
+
+