#!/usr/bin/env python3 # # Script to aggregate and report Linux perf results. # # Example: # ./scripts/perf.py -R -obench.perf ./runners/bench_runner # ./scripts/perf.py bench.perf -j -Flfs.c -Flfs_util.c -Scycles # # Copyright (c) 2022, The littlefs authors. # SPDX-License-Identifier: BSD-3-Clause # import bisect import collections as co import csv import errno import fcntl import functools as ft import itertools as it import math as m import multiprocessing as mp import os import re import shlex import shutil import subprocess as sp import tempfile import zipfile # TODO support non-zip perf results? PERF_PATH = ['perf'] PERF_EVENTS = 'cycles,branch-misses,branches,cache-misses,cache-references' PERF_FREQ = 100 OBJDUMP_PATH = ['objdump'] THRESHOLD = (0.5, 0.85) # integer fields class Int(co.namedtuple('Int', 'x')): __slots__ = () def __new__(cls, x=0): if isinstance(x, Int): return x if isinstance(x, str): try: x = int(x, 0) except ValueError: # also accept +-∞ and +-inf if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x): x = m.inf elif re.match('^\s*-\s*(?:∞|inf)\s*$', x): x = -m.inf else: raise assert isinstance(x, int) or m.isinf(x), x return super().__new__(cls, x) def __str__(self): if self.x == m.inf: return '∞' elif self.x == -m.inf: return '-∞' else: return str(self.x) def __int__(self): assert not m.isinf(self.x) return self.x def __float__(self): return float(self.x) none = '%7s' % '-' def table(self): return '%7s' % (self,) diff_none = '%7s' % '-' diff_table = table def diff_diff(self, other): new = self.x if self else 0 old = other.x if other else 0 diff = new - old if diff == +m.inf: return '%7s' % '+∞' elif diff == -m.inf: return '%7s' % '-∞' else: return '%+7d' % diff def ratio(self, other): new = self.x if self else 0 old = other.x if other else 0 if m.isinf(new) and m.isinf(old): return 0.0 elif m.isinf(new): return +m.inf elif m.isinf(old): return -m.inf elif not old and not new: return 0.0 elif not old: return 1.0 else: return (new-old) / old def __add__(self, other): return self.__class__(self.x + other.x) def __sub__(self, other): return self.__class__(self.x - other.x) def __mul__(self, other): return self.__class__(self.x * other.x) # perf results class PerfResult(co.namedtuple('PerfResult', [ 'file', 'function', 'line', 'cycles', 'bmisses', 'branches', 'cmisses', 'caches', 'children'])): _by = ['file', 'function', 'line'] _fields = ['cycles', 'bmisses', 'branches', 'cmisses', 'caches'] _sort = ['cycles', 'bmisses', 'cmisses', 'branches', 'caches'] _types = { 'cycles': Int, 'bmisses': Int, 'branches': Int, 'cmisses': Int, 'caches': Int} __slots__ = () def __new__(cls, file='', function='', line=0, cycles=0, bmisses=0, branches=0, cmisses=0, caches=0, children=[]): return super().__new__(cls, file, function, int(Int(line)), Int(cycles), Int(bmisses), Int(branches), Int(cmisses), Int(caches), children) def __add__(self, other): return PerfResult(self.file, self.function, self.line, self.cycles + other.cycles, self.bmisses + other.bmisses, self.branches + other.branches, self.cmisses + other.cmisses, self.caches + other.caches, self.children + other.children) def openio(path, mode='r', buffering=-1): # allow '-' for stdin/stdout if path == '-': if mode == 'r': return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering) else: return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering) else: return open(path, mode, buffering) # run perf as a subprocess, storing measurements into a zip file def record(command, *, output=None, perf_freq=PERF_FREQ, perf_period=None, perf_events=PERF_EVENTS, perf_path=PERF_PATH, **args): # create a temporary file for perf to write to, as far as I can tell # this is strictly needed because perf's pipe-mode only works with stdout with tempfile.NamedTemporaryFile('rb') as f: # figure out our perf invocation perf = perf_path + list(filter(None, [ 'record', '-F%s' % perf_freq if perf_freq is not None and perf_period is None else None, '-c%s' % perf_period if perf_period is not None else None, '-B', '-g', '--all-user', '-e%s' % perf_events, '-o%s' % f.name])) # run our command try: if args.get('verbose'): print(' '.join(shlex.quote(c) for c in perf + command)) err = sp.call(perf + command, close_fds=False) except KeyboardInterrupt: err = errno.EOWNERDEAD # synchronize access z = os.open(output, os.O_RDWR | os.O_CREAT) fcntl.flock(z, fcntl.LOCK_EX) # copy measurements into our zip file with os.fdopen(z, 'r+b') as z: with zipfile.ZipFile(z, 'a', compression=zipfile.ZIP_DEFLATED, compresslevel=1) as z: with z.open('perf.%d' % os.getpid(), 'w') as g: shutil.copyfileobj(f, g) # forward the return code return err # try to only process each dso onceS # # note this only caches with the non-keyword arguments def multiprocessing_cache(f): local_cache = {} manager = mp.Manager() global_cache = manager.dict() lock = mp.Lock() def multiprocessing_cache(*args, **kwargs): # check local cache? if args in local_cache: return local_cache[args] # check global cache? with lock: if args in global_cache: v = global_cache[args] local_cache[args] = v return v # fall back to calling the function v = f(*args, **kwargs) global_cache[args] = v local_cache[args] = v return v return multiprocessing_cache @multiprocessing_cache def collect_syms_and_lines(obj_path, *, objdump_path=None, **args): symbol_pattern = re.compile( '^(?P[0-9a-fA-F]+)' '\s+.*' '\s+(?P[0-9a-fA-F]+)' '\s+(?P[^\s]+)\s*$') line_pattern = re.compile( '^\s+(?:' # matches dir/file table '(?P[0-9]+)' '(?:\s+(?P[0-9]+))?' '\s+.*' '\s+(?P[^\s]+)' # matches line opcodes '|' '\[[^\]]*\]\s+' '(?:' '(?PSpecial)' '|' '(?PCopy)' '|' '(?PEnd of Sequence)' '|' 'File .*?to (?:entry )?(?P\d+)' '|' 'Line .*?to (?P[0-9]+)' '|' '(?:Address|PC) .*?to (?P[0x0-9a-fA-F]+)' '|' '.' ')*' ')$', re.IGNORECASE) # figure out symbol addresses and file+line ranges syms = {} sym_at = [] cmd = objdump_path + ['-t', obj_path] if args.get('verbose'): print(' '.join(shlex.quote(c) for c in cmd)) proc = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE if not args.get('verbose') else None, universal_newlines=True, errors='replace', close_fds=False) for line in proc.stdout: m = symbol_pattern.match(line) if m: name = m.group('name') addr = int(m.group('addr'), 16) size = int(m.group('size'), 16) # ignore zero-sized symbols if not size: continue # note multiple symbols can share a name if name not in syms: syms[name] = set() syms[name].add((addr, size)) sym_at.append((addr, name, size)) proc.wait() if proc.returncode != 0: if not args.get('verbose'): for line in proc.stderr: sys.stdout.write(line) # assume no debug-info on failure pass # sort and keep largest/first when duplicates sym_at.sort(key=lambda x: (x[0], -x[2], x[1])) sym_at_ = [] for addr, name, size in sym_at: if len(sym_at_) == 0 or sym_at_[-1][0] != addr: sym_at_.append((addr, name, size)) sym_at = sym_at_ # state machine for dwarf line numbers, note that objdump's # decodedline seems to have issues with multiple dir/file # tables, which is why we need this lines = [] line_at = [] dirs = {} files = {} op_file = 1 op_line = 1 op_addr = 0 cmd = objdump_path + ['--dwarf=rawline', obj_path] if args.get('verbose'): print(' '.join(shlex.quote(c) for c in cmd)) proc = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE if not args.get('verbose') else None, universal_newlines=True, errors='replace', close_fds=False) for line in proc.stdout: m = line_pattern.match(line) if m: if m.group('no') and not m.group('dir'): # found a directory entry dirs[int(m.group('no'))] = m.group('path') elif m.group('no'): # found a file entry dir = int(m.group('dir')) if dir in dirs: files[int(m.group('no'))] = os.path.join( dirs[dir], m.group('path')) else: files[int(m.group('no'))] = m.group('path') else: # found a state machine update if m.group('op_file'): op_file = int(m.group('op_file'), 0) if m.group('op_line'): op_line = int(m.group('op_line'), 0) if m.group('op_addr'): op_addr = int(m.group('op_addr'), 0) if (m.group('op_special') or m.group('op_copy') or m.group('op_end')): file = os.path.abspath(files.get(op_file, '?')) lines.append((file, op_line, op_addr)) line_at.append((op_addr, file, op_line)) if m.group('op_end'): op_file = 1 op_line = 1 op_addr = 0 proc.wait() if proc.returncode != 0: if not args.get('verbose'): for line in proc.stderr: sys.stdout.write(line) # assume no debug-info on failure pass # sort and keep first when duplicates lines.sort() lines_ = [] for file, line, addr in lines: if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line: lines_.append((file, line, addr)) lines = lines_ # sort and keep first when duplicates line_at.sort() line_at_ = [] for addr, file, line in line_at: if len(line_at_) == 0 or line_at_[-1][0] != addr: line_at_.append((addr, file, line)) line_at = line_at_ return syms, sym_at, lines, line_at def collect_decompressed(path, *, perf_path=PERF_PATH, sources=None, everything=False, propagate=0, depth=1, **args): sample_pattern = re.compile( '(?P\w+)' '\s+(?P\w+)' '\s+(?P