stackprof-0.2.26/0000755000004100000410000000000014605430231013623 5ustar www-datawww-datastackprof-0.2.26/bin/0000755000004100000410000000000014605430231014373 5ustar www-datawww-datastackprof-0.2.26/bin/stackprof-flamegraph.pl0000755000004100000410000000014514605430231021033 0ustar www-datawww-data#!/usr/bin/env ruby exec(File.expand_path("../../vendor/FlameGraph/flamegraph.pl", __FILE__), *ARGV) stackprof-0.2.26/bin/stackprof0000755000004100000410000001307414605430231016322 0ustar www-datawww-data#!/usr/bin/env ruby require 'optparse' require 'stackprof' banner = <<-END Usage: stackprof run [--mode=MODE|--out=FILE|--interval=INTERVAL|--format=FORMAT] -- COMMAND Usage: stackprof [file.dump]+ [--text|--method=NAME|--callgrind|--graphviz] END if ARGV.first == "run" ARGV.shift env = {} parser = OptionParser.new(banner) do |o| o.on('--mode [MODE]', String, 'Mode of sampling: cpu, wall, object, default to wall') do |mode| env["STACKPROF_MODE"] = mode end o.on('--out [FILENAME]', String, 'The target file, which will be overwritten. Defaults to a random temporary file') do |out| env['STACKPROF_OUT'] = out end o.on('--interval [MILLISECONDS]', Integer, 'Mode-relative sample rate') do |interval| env['STACKPROF_INTERVAL'] = interval.to_s end o.on('--raw', 'collects the extra data required by the --flamegraph and --stackcollapse report types') do |raw| env['STACKPROF_RAW'] = raw.to_s end o.on('--ignore-gc', 'Ignore garbage collection frames') do |gc| env['STACKPROF_IGNORE_GC'] = gc.to_s end end parser.parse! parser.abort(parser.help) if ARGV.empty? stackprof_path = File.expand_path('../lib', __dir__) env['RUBYOPT'] = "-I #{stackprof_path} -r stackprof/autorun #{ENV['RUBYOPT']}" Kernel.exec(env, *ARGV) else options = {} parser = OptionParser.new(banner) do |o| o.on('--text', 'Text summary per method (default)'){ options[:format] = :text } o.on('--json', 'JSON output (use with web viewers)'){ options[:format] = :json } o.on('--files', 'List of files'){ |f| options[:format] = :files } o.on('--limit [num]', Integer, 'Limit --text, --files, or --graphviz output to N entries'){ |n| options[:limit] = n } o.on('--sort-total', "Sort --text or --files output on total samples\n\n"){ options[:sort] = true } o.on('--method [grep]', 'Zoom into specified method'){ |f| options[:format] = :method; options[:filter] = f } o.on('--file [grep]', "Show annotated code for specified file"){ |f| options[:format] = :file; options[:filter] = f } o.on('--walk', "Walk the stacktrace interactively\n\n"){ |f| options[:walk] = true } o.on('--callgrind', 'Callgrind output (use with kcachegrind, stackprof-gprof2dot.py)'){ options[:format] = :callgrind } o.on('--graphviz', "Graphviz output (use with dot)"){ options[:format] = :graphviz } o.on('--node-fraction [frac]', OptionParser::DecimalNumeric, 'Drop nodes representing less than [frac] fraction of samples'){ |n| options[:node_fraction] = n } o.on('--stackcollapse', 'stackcollapse.pl compatible output (use with stackprof-flamegraph.pl)'){ options[:format] = :stackcollapse } o.on('--timeline-flamegraph', "timeline-flamegraph output (js)"){ options[:format] = :timeline_flamegraph } o.on('--alphabetical-flamegraph', "alphabetical-flamegraph output (js)"){ options[:format] = :alphabetical_flamegraph } o.on('--flamegraph', "alias to --timeline-flamegraph"){ options[:format] = :timeline_flamegraph } o.on('--flamegraph-viewer [f.js]', String, "open html viewer for flamegraph output"){ |file| puts("open file://#{File.expand_path('../../lib/stackprof/flamegraph/viewer.html', __FILE__)}?data=#{File.expand_path(file)}") exit } o.on('--d3-flamegraph', "flamegraph output (html using d3-flame-graph)\n\n"){ options[:format] = :d3_flamegraph } o.on('--select-files []', String, 'Show results of matching files'){ |path| (options[:select_files] ||= []) << File.expand_path(path) } o.on('--reject-files []', String, 'Exclude results of matching files'){ |path| (options[:reject_files] ||= []) << File.expand_path(path) } o.on('--select-names []', Regexp, 'Show results of matching method names'){ |regexp| (options[:select_names] ||= []) << regexp } o.on('--reject-names []', Regexp, 'Exclude results of matching method names'){ |regexp| (options[:reject_names] ||= []) << regexp } o.on('--dump', 'Print marshaled profile dump (combine multiple profiles)'){ options[:format] = :dump } o.on('--debug', 'Pretty print raw profile data'){ options[:format] = :debug } end parser.parse! parser.abort(parser.help) if ARGV.empty? reports = [] while ARGV.size > 0 begin file = ARGV.pop reports << StackProf::Report.from_file(file) rescue TypeError => e STDERR.puts "** error parsing #{file}: #{e.inspect}" end end report = reports.inject(:+) default_options = { :format => :text, :sort => false, :limit => 30 } if options[:format] == :graphviz default_options[:limit] = 120 default_options[:node_fraction] = 0.005 end options = default_options.merge(options) options.delete(:limit) if options[:limit] == 0 case options[:format] when :text report.print_text(options[:sort], options[:limit], options[:select_files], options[:reject_files], options[:select_names], options[:reject_names]) when :json report.print_json when :debug report.print_debug when :dump report.print_dump when :callgrind report.print_callgrind when :graphviz report.print_graphviz(options) when :stackcollapse report.print_stackcollapse when :timeline_flamegraph report.print_timeline_flamegraph when :alphabetical_flamegraph report.print_alphabetical_flamegraph when :d3_flamegraph report.print_d3_flamegraph when :method options[:walk] ? report.walk_method(options[:filter]) : report.print_method(options[:filter]) when :file report.print_file(options[:filter]) when :files report.print_files(options[:sort], options[:limit]) else raise ArgumentError, "unknown format: #{options[:format]}" end end stackprof-0.2.26/bin/stackprof-gprof2dot.py0000755000004100000410000000014314605430231020646 0ustar www-datawww-data#!/usr/bin/env ruby exec(File.expand_path("../../vendor/gprof2dot/gprof2dot.py", __FILE__), *ARGV) stackprof-0.2.26/stackprof.gemspec0000644000004100000410000000213714605430231017167 0ustar www-datawww-dataGem::Specification.new do |s| s.name = 'stackprof' s.version = '0.2.26' s.homepage = 'http://github.com/tmm1/stackprof' s.authors = 'Aman Gupta' s.email = 'aman@tmm1.net' s.metadata = { 'bug_tracker_uri' => 'https://github.com/tmm1/stackprof/issues', 'changelog_uri' => "https://github.com/tmm1/stackprof/blob/v#{s.version}/CHANGELOG.md", 'documentation_uri' => "https://www.rubydoc.info/gems/stackprof/#{s.version}", 'source_code_uri' => "https://github.com/tmm1/stackprof/tree/v#{s.version}" } s.files = `git ls-files`.split("\n") s.extensions = 'ext/stackprof/extconf.rb' s.bindir = 'bin' s.executables << 'stackprof' s.executables << 'stackprof-flamegraph.pl' s.executables << 'stackprof-gprof2dot.py' s.summary = 'sampling callstack-profiler for ruby 2.2+' s.description = 'stackprof is a fast sampling profiler for ruby code, with cpu, wallclock and object allocation samplers.' s.required_ruby_version = '>= 2.2' s.license = 'MIT' s.add_development_dependency 'rake-compiler', '~> 0.9' s.add_development_dependency 'minitest', '~> 5.0' end stackprof-0.2.26/.gitignore0000644000004100000410000000013214605430231015607 0ustar www-datawww-data/tmp /lib/stackprof/stackprof.bundle /lib/stackprof/stackprof.so *.sw? /pkg /Gemfile.lock stackprof-0.2.26/.github/0000755000004100000410000000000014605430231015163 5ustar www-datawww-datastackprof-0.2.26/.github/workflows/0000755000004100000410000000000014605430231017220 5ustar www-datawww-datastackprof-0.2.26/.github/workflows/ci.yml0000644000004100000410000000174714605430231020347 0ustar www-datawww-dataname: CI on: [push, pull_request] jobs: rubies: runs-on: ubuntu-latest strategy: fail-fast: false matrix: ruby: [ ruby-head, '3.2', '3.1', '3.0', '2.7', truffleruby ] steps: - name: Checkout uses: actions/checkout@v2 - name: Set up Ruby uses: ruby/setup-ruby@v1 with: ruby-version: ${{ matrix.ruby }} - name: Install dependencies run: bundle install - name: Run test run: rake - name: Install gem run: rake install platforms: strategy: matrix: os: [macos] ruby: ['3.0'] runs-on: ${{ matrix.os }}-latest steps: - name: Checkout uses: actions/checkout@v2 - name: Set up Ruby uses: ruby/setup-ruby@v1 with: ruby-version: ${{ matrix.ruby }} - name: Install dependencies run: bundle install - name: Run test run: rake - name: Install gem run: rake install stackprof-0.2.26/lib/0000755000004100000410000000000014605430231014371 5ustar www-datawww-datastackprof-0.2.26/lib/stackprof/0000755000004100000410000000000014605430231016365 5ustar www-datawww-datastackprof-0.2.26/lib/stackprof/report.rb0000644000004100000410000005421614605430231020235 0ustar www-datawww-data# frozen_string_literal: true require 'pp' require 'digest/sha2' require 'json' module StackProf class Report MARSHAL_SIGNATURE = "\x04\x08" class << self def from_file(file) if (content = IO.binread(file)).start_with?(MARSHAL_SIGNATURE) new(Marshal.load(content)) else from_json(JSON.parse(content)) end end def from_json(json) new(parse_json(json)) end def parse_json(json) json.keys.each do |key| value = json.delete(key) from_json(value) if value.is_a?(Hash) new_key = case key when /\A[0-9]*\z/ key.to_i else key.to_sym end json[new_key] = value end json end end def initialize(data) @data = data end attr_reader :data def frames(sort_by_total=false) @data[:"sorted_frames_#{sort_by_total}"] ||= @data[:frames].sort_by{ |iseq, stats| -stats[sort_by_total ? :total_samples : :samples] }.inject({}){|h, (k, v)| h[k] = v; h} end def normalized_frames id2hash = {} @data[:frames].each do |frame, info| id2hash[frame.to_s] = info[:hash] = Digest::SHA256.hexdigest("#{info[:name]}#{info[:file]}#{info[:line]}") end @data[:frames].inject(Hash.new) do |hash, (frame, info)| info = hash[id2hash[frame.to_s]] = info.dup info[:edges] = info[:edges].inject(Hash.new){ |edges, (edge, weight)| edges[id2hash[edge.to_s]] = weight; edges } if info[:edges] hash end end def version @data[:version] end def modeline "#{@data[:mode]}(#{@data[:interval]})" end def overall_samples @data[:samples] end def max_samples @data[:max_samples] ||= @data[:frames].values.max_by{ |frame| frame[:samples] }[:samples] end def files @data[:files] ||= @data[:frames].inject(Hash.new) do |hash, (addr, frame)| if file = frame[:file] and lines = frame[:lines] hash[file] ||= Hash.new lines.each do |line, weight| hash[file][line] = add_lines(hash[file][line], weight) end end hash end end def add_lines(a, b) return b if a.nil? return a+b if a.is_a? Integer return [ a[0], a[1]+b ] if b.is_a? Integer [ a[0]+b[0], a[1]+b[1] ] end def print_debug pp @data end def print_dump(f=STDOUT) f.puts Marshal.dump(@data.reject{|k,v| k == :files }) end def print_json(f=STDOUT) require "json" f.puts JSON.generate(@data, max_nesting: false) end def print_stackcollapse raise "profile does not include raw samples (add `raw: true` to collecting StackProf.run)" unless raw = data[:raw] while len = raw.shift frames = raw.slice!(0, len) weight = raw.shift print frames.map{ |a| data[:frames][a][:name] }.join(';') puts " #{weight}" end end def print_timeline_flamegraph(f=STDOUT, skip_common=true) print_flamegraph(f, skip_common, false) end def print_alphabetical_flamegraph(f=STDOUT, skip_common=true) print_flamegraph(f, skip_common, true) end def print_flamegraph(f, skip_common, alphabetical=false) raise "profile does not include raw samples (add `raw: true` to collecting StackProf.run)" unless raw = data[:raw] stacks, max_x, max_y = flamegraph_stacks(raw) stacks.sort! if alphabetical f.puts 'flamegraph([' max_y.times do |y| row_prev = nil row_width = 0 x = 0 stacks.each do |stack| weight = stack.last cell = stack[y] unless y == stack.length-1 if cell.nil? if row_prev flamegraph_row(f, x - row_width, y, row_width, row_prev) end row_prev = nil x += weight next end if row_prev.nil? # start new row with this cell row_width = weight row_prev = cell x += weight elsif row_prev == cell # grow current row along x-axis row_width += weight x += weight else # end current row and start new row flamegraph_row(f, x - row_width, y, row_width, row_prev) x += weight row_prev = cell row_width = weight end row_prev = cell end if row_prev next if skip_common && row_width == max_x flamegraph_row(f, x - row_width, y, row_width, row_prev) end end f.puts '])' end def flamegraph_stacks(raw) stacks = [] max_x = 0 max_y = 0 idx = 0 while len = raw[idx] idx += 1 max_y = len if len > max_y stack = raw.slice(idx, len+1) idx += len+1 stacks << stack max_x += stack.last end return stacks, max_x, max_y end def flamegraph_row(f, x, y, weight, addr) frame = @data[:frames][addr] f.print ',' if @rows_started @rows_started = true f.puts %{{"x":#{x},"y":#{y},"width":#{weight},"frame_id":#{addr},"frame":#{frame[:name].dump},"file":#{frame[:file].dump}}} end def convert_to_d3_flame_graph_format(name, stacks, depth) weight = 0 children = [] stacks.chunk do |stack| if depth == stack.length - 1 :leaf else stack[depth] end end.each do |val, child_stacks| if val == :leaf child_stacks.each do |stack| weight += stack.last end else frame = @data[:frames][val] child_name = "#{ frame[:name] } : #{ frame[:file] } : #{ frame[:line] }" child_data = convert_to_d3_flame_graph_format(child_name, child_stacks, depth + 1) weight += child_data["value"] children << child_data end end { "name" => name, "value" => weight, "children" => children, } end def print_d3_flamegraph(f=STDOUT, skip_common=true) raise "profile does not include raw samples (add `raw: true` to collecting StackProf.run)" unless raw = data[:raw] stacks, * = flamegraph_stacks(raw) # d3-flame-grpah supports only alphabetical flamegraph stacks.sort! require "json" json = JSON.generate(convert_to_d3_flame_graph_format("", stacks, 0), max_nesting: false) # This html code is almost copied from d3-flame-graph sample code. # (Apache License 2.0) # https://github.com/spiermar/d3-flame-graph/blob/gh-pages/index.html f.print <<-END stackprof (mode: #{ data[:mode] })

stackprof (mode: #{ data[:mode] })

powered by d3-flame-graph

END end def print_graphviz(options = {}, f = STDOUT) if filter = options[:filter] mark_stack = [] list = frames(true) list.each{ |addr, frame| mark_stack << addr if frame[:name] =~ filter } while addr = mark_stack.pop frame = list[addr] unless frame[:marked] mark_stack += frame[:edges].map{ |addr, weight| addr if list[addr][:total_samples] <= weight*1.2 }.compact if frame[:edges] frame[:marked] = true end end list = list.select{ |addr, frame| frame[:marked] } list.each{ |addr, frame| frame[:edges] && frame[:edges].delete_if{ |k,v| list[k].nil? } } list else list = frames(true) end limit = options[:limit] fraction = options[:node_fraction] included_nodes = {} node_minimum = fraction ? (fraction * overall_samples).ceil : 0 f.puts "digraph profile {" f.puts "Legend [shape=box,fontsize=24,shape=plaintext,label=\"" f.print "Total samples: #{overall_samples}\\l" f.print "Showing top #{limit} nodes\\l" if limit f.print "Dropped nodes with < #{node_minimum} samples\\l" if fraction f.puts "\"];" list.each_with_index do |(frame, info), index| call, total = info.values_at(:samples, :total_samples) break if total < node_minimum || (limit && index >= limit) sample = ''.dup sample << "#{call} (%2.1f%%)\\rof " % (call*100.0/overall_samples) if call < total sample << "#{total} (%2.1f%%)\\r" % (total*100.0/overall_samples) fontsize = (1.0 * call / max_samples) * 28 + 10 size = (1.0 * total / overall_samples) * 2.0 + 0.5 f.puts " \"#{frame}\" [size=#{size}] [fontsize=#{fontsize}] [penwidth=\"#{size}\"] [shape=box] [label=\"#{info[:name]}\\n#{sample}\"];" included_nodes[frame] = true end list.each do |frame, info| next unless included_nodes[frame] if edges = info[:edges] edges.each do |edge, weight| next unless included_nodes[edge] size = (1.0 * weight / overall_samples) * 2.0 + 0.5 f.puts " \"#{frame}\" -> \"#{edge}\" [label=\"#{weight}\"] [weight=\"#{weight}\"] [penwidth=\"#{size}\"];" end end end f.puts "}" end def print_text(sort_by_total=false, limit=nil, select_files= nil, reject_files=nil, select_names=nil, reject_names=nil, f = STDOUT) f.puts "==================================" f.printf " Mode: #{modeline}\n" f.printf " Samples: #{@data[:samples]} (%.2f%% miss rate)\n", 100.0*@data[:missed_samples]/(@data[:missed_samples]+@data[:samples]) f.printf " GC: #{@data[:gc_samples]} (%.2f%%)\n", 100.0*@data[:gc_samples]/@data[:samples] f.puts "==================================" f.printf "% 10s (pct) % 10s (pct) FRAME\n" % ["TOTAL", "SAMPLES"] list = frames(sort_by_total) list.select!{|_, info| select_files.any?{|path| info[:file].start_with?(path)}} if select_files list.select!{|_, info| select_names.any?{|reg| info[:name] =~ reg}} if select_names list.reject!{|_, info| reject_files.any?{|path| info[:file].start_with?(path)}} if reject_files list.reject!{|_, info| reject_names.any?{|reg| info[:name] =~ reg}} if reject_names list = list.first(limit) if limit list.each do |frame, info| call, total = info.values_at(:samples, :total_samples) f.printf "% 10d % 8s % 10d % 8s %s\n", total, "(%2.1f%%)" % (total*100.0/overall_samples), call, "(%2.1f%%)" % (call*100.0/overall_samples), info[:name] end end def print_callgrind(f = STDOUT) f.puts "version: 1" f.puts "creator: stackprof" f.puts "pid: 0" f.puts "cmd: ruby" f.puts "part: 1" f.puts "desc: mode: #{modeline}" f.puts "desc: missed: #{@data[:missed_samples]})" f.puts "positions: line" f.puts "events: Instructions" f.puts "summary: #{@data[:samples]}" list = frames list.each do |addr, frame| f.puts "fl=#{frame[:file]}" f.puts "fn=#{frame[:name]}" frame[:lines].each do |line, weight| f.puts "#{line} #{weight.is_a?(Array) ? weight[1] : weight}" end if frame[:lines] frame[:edges].each do |edge, weight| oframe = list[edge] f.puts "cfl=#{oframe[:file]}" unless oframe[:file] == frame[:file] f.puts "cfn=#{oframe[:name]}" f.puts "calls=#{weight} #{frame[:line] || 0}\n#{oframe[:line] || 0} #{weight}" end if frame[:edges] f.puts end f.puts "totals: #{@data[:samples]}" end def print_method(name, f = STDOUT) name = /#{name}/ unless Regexp === name frames.each do |frame, info| next unless info[:name] =~ name file, line = info.values_at(:file, :line) line ||= 1 lines = info[:lines] maxline = lines ? lines.keys.max : line + 5 f.printf "%s (%s:%d)\n", info[:name], file, line f.printf " samples: % 5d self (%2.1f%%) / % 5d total (%2.1f%%)\n", info[:samples], 100.0*info[:samples]/overall_samples, info[:total_samples], 100.0*info[:total_samples]/overall_samples if (callers = callers_for(frame)).any? f.puts " callers:" callers = callers.sort_by(&:last).reverse callers.each do |name, weight| f.printf " % 5d (% 8s) %s\n", weight, "%3.1f%%" % (100.0*weight/info[:total_samples]), name end end if callees = info[:edges] f.printf " callees (%d total):\n", info[:total_samples]-info[:samples] callees = callees.map{ |k, weight| [data[:frames][k][:name], weight] }.sort_by{ |k,v| -v } callees.each do |name, weight| f.printf " % 5d (% 8s) %s\n", weight, "%3.1f%%" % (100.0*weight/(info[:total_samples]-info[:samples])), name end end f.puts " code:" source_display(f, file, lines, line-1..maxline) end end # Walk up and down the stack from a given starting point (name). Loops # until `:exit` is selected def walk_method(name) method_choice = /#{Regexp.escape name}/ invalid_choice = false # Continue walking up and down the stack until the users selects "exit" while method_choice != :exit print_method method_choice unless invalid_choice STDOUT.puts "\n\n" # Determine callers and callees for the current frame new_frames = frames.select {|_, info| info[:name] =~ method_choice } new_choices = new_frames.map {|frame, info| [ callers_for(frame).sort_by(&:last).reverse.map(&:first), (info[:edges] || []).map{ |k, w| [data[:frames][k][:name], w] }.sort_by{ |k,v| -v }.map(&:first) ]}.flatten + [:exit] # Print callers and callees for selection STDOUT.puts "Select next method:" new_choices.each_with_index do |method, index| STDOUT.printf "%2d) %s\n", index + 1, method.to_s end # Pick selection STDOUT.printf "> " selection = STDIN.gets.chomp.to_i - 1 STDOUT.puts "\n\n\n" # Determine if it was a valid choice # (if not, don't re-run .print_method) if new_choice = new_choices[selection] invalid_choice = false method_choice = new_choice == :exit ? :exit : %r/^#{Regexp.escape new_choice}$/ else invalid_choice = true STDOUT.puts "Invalid choice. Please select again..." end end end def print_files(sort_by_total=false, limit=nil, f = STDOUT) list = files.map{ |file, vals| [file, vals.values.inject([0,0]){ |sum, n| add_lines(sum, n) }] } list = list.sort_by{ |file, samples| -samples[1] } list = list.first(limit) if limit list.each do |file, vals| total_samples, samples = *vals f.printf "% 5d (%5.1f%%) / % 5d (%5.1f%%) %s\n", total_samples, (100.0*total_samples/overall_samples), samples, (100.0*samples/overall_samples), file end end def print_file(filter, f = STDOUT) filter = /#{Regexp.escape filter}/ unless Regexp === filter list = files.select{ |name, lines| name =~ filter } list.sort_by{ |file, vals| -vals.values.inject(0){ |sum, n| sum + (n.is_a?(Array) ? n[1] : n) } }.each do |file, lines| source_display(f, file, lines) end end def +(other) raise ArgumentError, "cannot combine #{other.class}" unless self.class == other.class raise ArgumentError, "cannot combine #{modeline} with #{other.modeline}" unless modeline == other.modeline raise ArgumentError, "cannot combine v#{version} with v#{other.version}" unless version == other.version f1, f2 = normalized_frames, other.normalized_frames frames = (f1.keys + f2.keys).uniq.inject(Hash.new) do |hash, id| if f1[id].nil? hash[id] = f2[id] elsif f2[id] hash[id] = f1[id] hash[id][:total_samples] += f2[id][:total_samples] hash[id][:samples] += f2[id][:samples] if f2[id][:edges] edges = hash[id][:edges] ||= {} f2[id][:edges].each do |edge, weight| edges[edge] ||= 0 edges[edge] += weight end end if f2[id][:lines] lines = hash[id][:lines] ||= {} f2[id][:lines].each do |line, weight| lines[line] = add_lines(lines[line], weight) end end else hash[id] = f1[id] end hash end d1, d2 = data, other.data data = { version: version, mode: d1[:mode], interval: d1[:interval], samples: d1[:samples] + d2[:samples], gc_samples: d1[:gc_samples] + d2[:gc_samples], missed_samples: d1[:missed_samples] + d2[:missed_samples], frames: frames } self.class.new(data) end private def root_frames frames.select{ |addr, frame| callers_for(addr).size == 0 } end def callers_for(addr) @callers_for ||= {} @callers_for[addr] ||= data[:frames].map{ |id, other| [other[:name], other[:edges][addr]] if other[:edges] && other[:edges].include?(addr) }.compact end def source_display(f, file, lines, range=nil) File.readlines(file).each_with_index do |code, i| next unless range.nil? || range.include?(i) if lines and lineinfo = lines[i+1] total_samples, samples = lineinfo if version == 1.0 samples = total_samples f.printf "% 5d % 7s | % 5d | %s", samples, "(%2.1f%%)" % (100.0*samples/overall_samples), i+1, code elsif samples > 0 f.printf "% 5d % 8s / % 5d % 7s | % 5d | %s", total_samples, "(%2.1f%%)" % (100.0*total_samples/overall_samples), samples, "(%2.1f%%)" % (100.0*samples/overall_samples), i+1, code else f.printf "% 5d % 8s | % 5d | %s", total_samples, "(%3.1f%%)" % (100.0*total_samples/overall_samples), i+1, code end else if version == 1.0 f.printf " | % 5d | %s", i+1, code else f.printf " | % 5d | %s", i+1, code end end end rescue SystemCallError f.puts " SOURCE UNAVAILABLE" end end end stackprof-0.2.26/lib/stackprof/truffleruby.rb0000644000004100000410000000121414605430231021261 0ustar www-datawww-datamodule StackProf # Define the same methods as stackprof.c class << self def running? false end def run(*args) unimplemented end def start(*args) unimplemented end def stop unimplemented end def results(*args) unimplemented end def sample unimplemented end def use_postponed_job! # noop end private def unimplemented raise "Use --cpusampler=flamegraph or --cpusampler instead of StackProf on TruffleRuby.\n" \ "See https://www.graalvm.org/tools/profiling/ and `ruby --help:cpusampler` for more details." end end end stackprof-0.2.26/lib/stackprof/autorun.rb0000644000004100000410000000110614605430231020405 0ustar www-datawww-datarequire "stackprof" options = {} options[:mode] = ENV["STACKPROF_MODE"].to_sym if ENV.key?("STACKPROF_MODE") options[:interval] = Integer(ENV["STACKPROF_INTERVAL"]) if ENV.key?("STACKPROF_INTERVAL") options[:raw] = true if ENV["STACKPROF_RAW"] options[:ignore_gc] = true if ENV["STACKPROF_IGNORE_GC"] at_exit do StackProf.stop output_path = ENV.fetch("STACKPROF_OUT") do require "tempfile" Tempfile.create(["stackprof", ".dump"]).path end StackProf.results(output_path) $stderr.puts("StackProf results dumped at: #{output_path}") end StackProf.start(**options) stackprof-0.2.26/lib/stackprof/middleware.rb0000644000004100000410000000355214605430231021034 0ustar www-datawww-datarequire 'fileutils' module StackProf class Middleware def initialize(app, options = {}) @app = app @options = options @num_reqs = options[:save_every] || nil Middleware.mode = options[:mode] || :cpu Middleware.interval = options[:interval] || 1000 Middleware.raw = options[:raw] || false Middleware.enabled = options[:enabled] options[:path] = 'tmp/' if options[:path].to_s.empty? Middleware.path = options[:path] Middleware.metadata = options[:metadata] || {} at_exit{ Middleware.save } if options[:save_at_exit] end def call(env) enabled = Middleware.enabled?(env) StackProf.start( mode: Middleware.mode, interval: Middleware.interval, raw: Middleware.raw, metadata: Middleware.metadata, ) if enabled @app.call(env) ensure if enabled StackProf.stop if @num_reqs && (@num_reqs-=1) == 0 @num_reqs = @options[:save_every] Middleware.save end end end class << self attr_accessor :enabled, :mode, :interval, :raw, :path, :metadata def enabled?(env) if enabled.respond_to?(:call) enabled.call(env) else enabled end end def save if results = StackProf.results path = Middleware.path is_directory = path != path.chomp('/') if is_directory filename = "stackprof-#{results[:mode]}-#{Process.pid}-#{Time.now.to_i}.dump" else filename = File.basename(path) path = File.dirname(path) end FileUtils.mkdir_p(path) File.open(File.join(path, filename), 'wb') do |f| f.write Marshal.dump(results) end filename end end end end end stackprof-0.2.26/lib/stackprof/flamegraph/0000755000004100000410000000000014605430231020473 5ustar www-datawww-datastackprof-0.2.26/lib/stackprof/flamegraph/flamegraph.js0000644000004100000410000006507314605430231023152 0ustar www-datawww-dataif (typeof Element.prototype.matches !== 'function') { Element.prototype.matches = Element.prototype.msMatchesSelector || Element.prototype.mozMatchesSelector || Element.prototype.webkitMatchesSelector || function matches(selector) { var element = this var elements = (element.document || element.ownerDocument).querySelectorAll(selector) var index = 0 while (elements[index] && elements[index] !== element) { ++index } return Boolean(elements[index]) } } if (typeof Element.prototype.closest !== 'function') { Element.prototype.closest = function closest(selector) { var element = this while (element && element.nodeType === 1) { if (element.matches(selector)) { return element } element = element.parentNode } return null } } if (typeof Object.assign !== 'function') { (function() { Object.assign = function(target) { 'use strict' // We must check against these specific cases. if (target === undefined || target === null) { throw new TypeError('Cannot convert undefined or null to object') } var output = Object(target) for (var index = 1; index < arguments.length; index++) { var source = arguments[index] if (source !== undefined && source !== null) { for (var nextKey in source) { if (source.hasOwnProperty(nextKey)) { output[nextKey] = source[nextKey] } } } } return output } })() } function EventSource() { var self = this self.eventListeners = {} } EventSource.prototype.on = function(name, callback) { var self = this var listeners = self.eventListeners[name] if (!listeners) listeners = self.eventListeners[name] = [] listeners.push(callback) } EventSource.prototype.dispatch = function(name, data) { var self = this var listeners = self.eventListeners[name] || [] listeners.forEach(function(c) { requestAnimationFrame(function() { c(data) }) }) } function CanvasView(canvas) { var self = this self.canvas = canvas } CanvasView.prototype.setDimensions = function(width, height) { var self = this if (self.resizeRequestID) cancelAnimationFrame(self.resizeRequestID) self.resizeRequestID = requestAnimationFrame(self.setDimensionsNow.bind(self, width, height)) } CanvasView.prototype.setDimensionsNow = function(width, height) { var self = this if (width === self.width && height === self.height) return self.width = width self.height = height self.canvas.style.width = width self.canvas.style.height = height var ratio = window.devicePixelRatio || 1 self.canvas.width = width * ratio self.canvas.height = height * ratio var ctx = self.canvas.getContext('2d') ctx.setTransform(1, 0, 0, 1, 0, 0) ctx.scale(ratio, ratio) self.repaintNow() } CanvasView.prototype.paint = function() { } CanvasView.prototype.scheduleRepaint = function() { var self = this if (self.repaintRequestID) return self.repaintRequestID = requestAnimationFrame(function() { self.repaintRequestID = null self.repaintNow() }) } CanvasView.prototype.repaintNow = function() { var self = this self.canvas.getContext('2d').clearRect(0, 0, self.width, self.height) self.paint() if (self.repaintRequestID) { cancelAnimationFrame(self.repaintRequestID) self.repaintRequestID = null } } function Flamechart(canvas, data, dataRange, info) { var self = this CanvasView.call(self, canvas) EventSource.call(self) self.canvas = canvas self.data = data self.dataRange = dataRange self.info = info self.viewport = { x: dataRange.minX, y: dataRange.minY, width: dataRange.maxX - dataRange.minX, height: dataRange.maxY - dataRange.minY, } } Flamechart.prototype = Object.create(CanvasView.prototype) Flamechart.prototype.constructor = Flamechart Object.assign(Flamechart.prototype, EventSource.prototype) Flamechart.prototype.xScale = function(x) { var self = this return self.widthScale(x - self.viewport.x) } Flamechart.prototype.yScale = function(y) { var self = this return self.heightScale(y - self.viewport.y) } Flamechart.prototype.widthScale = function(width) { var self = this return width * self.width / self.viewport.width } Flamechart.prototype.heightScale = function(height) { var self = this return height * self.height / self.viewport.height } Flamechart.prototype.frameRect = function(f) { return { x: f.x, y: f.y, width: f.width, height: 1, } } Flamechart.prototype.dataToCanvas = function(r) { var self = this return { x: self.xScale(r.x), y: self.yScale(r.y), width: self.widthScale(r.width), height: self.heightScale(r.height), } } Flamechart.prototype.setViewport = function(viewport) { var self = this if (self.viewport.x === viewport.x && self.viewport.y === viewport.y && self.viewport.width === viewport.width && self.viewport.height === viewport.height) return self.viewport = viewport self.scheduleRepaint() self.dispatch('viewportchanged', { current: viewport }) } Flamechart.prototype.paint = function(opacity, frames, gemName) { var self = this var ctx = self.canvas.getContext('2d') ctx.strokeStyle = 'rgba(0, 0, 0, 0.2)' if (self.showLabels) { ctx.textBaseline = 'middle' ctx.font = '11px ' + getComputedStyle(this.canvas).fontFamily // W tends to be one of the widest characters (and if the font is truly // fixed-width then any character will do). var characterWidth = ctx.measureText('WWWW').width / 4 } if (typeof opacity === 'undefined') opacity = 1 frames = frames || self.data var blocksByColor = {} frames.forEach(function(f) { if (gemName && f.gemName !== gemName) return var r = self.dataToCanvas(self.frameRect(f)) if (r.x >= self.width || r.y >= self.height || (r.x + r.width) <= 0 || (r.y + r.height) <= 0) { return } var i = self.info[f.frame_id] var color = colorString(i.color, opacity) var colorBlocks = blocksByColor[color] if (!colorBlocks) colorBlocks = blocksByColor[color] = [] colorBlocks.push({ rect: r, text: f.frame }) }) var textBlocks = [] Object.keys(blocksByColor).forEach(function(color) { ctx.fillStyle = color blocksByColor[color].forEach(function(block) { if (opacity < 1) ctx.clearRect(block.rect.x, block.rect.y, block.rect.width, block.rect.height) ctx.fillRect(block.rect.x, block.rect.y, block.rect.width, block.rect.height) if (block.rect.width > 4 && block.rect.height > 4) ctx.strokeRect(block.rect.x, block.rect.y, block.rect.width, block.rect.height) if (!self.showLabels || block.rect.width / characterWidth < 4) return textBlocks.push(block) }) }) ctx.fillStyle = '#000' textBlocks.forEach(function(block) { var text = block.text var textRect = Object.assign({}, block.rect) textRect.x += 1 textRect.width -= 2 if (textRect.width < text.length * characterWidth * 0.75) text = centerTruncate(block.text, Math.floor(textRect.width / characterWidth)) ctx.fillText(text, textRect.x, textRect.y + textRect.height / 2, textRect.width) }) } Flamechart.prototype.frameAtPoint = function(x, y) { var self = this return self.data.find(function(d) { var r = self.dataToCanvas(self.frameRect(d)) return r.x <= x && r.x + r.width >= x && r.y <= y && r.y + r.height >= y }) } function MainFlamechart(canvas, data, dataRange, info) { var self = this Flamechart.call(self, canvas, data, dataRange, info) self.showLabels = true self.canvas.addEventListener('mousedown', self.onMouseDown.bind(self)) self.canvas.addEventListener('mousemove', self.onMouseMove.bind(self)) self.canvas.addEventListener('mouseout', self.onMouseOut.bind(self)) self.canvas.addEventListener('wheel', self.onWheel.bind(self)) } MainFlamechart.prototype = Object.create(Flamechart.prototype) MainFlamechart.prototype.setDimensionsNow = function(width, height) { var self = this var viewport = Object.assign({}, self.viewport) viewport.height = height / 16 self.setViewport(viewport) CanvasView.prototype.setDimensionsNow.call(self, width, height) } MainFlamechart.prototype.onMouseDown = function(e) { var self = this if (e.button !== 0) return captureMouse({ mouseup: self.onMouseUp.bind(self), mousemove: self.onMouseMove.bind(self), }) var clientRect = self.canvas.getBoundingClientRect() var currentX = e.clientX - clientRect.left var currentY = e.clientY - clientRect.top self.dragging = true self.dragInfo = { mouse: { x: currentX, y: currentY }, viewport: { x: self.viewport.x, y: self.viewport.y }, } e.preventDefault() } MainFlamechart.prototype.onMouseUp = function(e) { var self = this if (!self.dragging) return releaseCapture() self.dragging = false e.preventDefault() } MainFlamechart.prototype.onMouseMove = function(e) { var self = this var clientRect = self.canvas.getBoundingClientRect() var currentX = e.clientX - clientRect.left var currentY = e.clientY - clientRect.top if (self.dragging) { var viewport = Object.assign({}, self.viewport) viewport.x = self.dragInfo.viewport.x - (currentX - self.dragInfo.mouse.x) * viewport.width / self.width viewport.y = self.dragInfo.viewport.y - (currentY - self.dragInfo.mouse.y) * viewport.height / self.height viewport.x = Math.min(self.dataRange.maxX - viewport.width, Math.max(self.dataRange.minX, viewport.x)) viewport.y = Math.min(self.dataRange.maxY - viewport.height, Math.max(self.dataRange.minY, viewport.y)) self.setViewport(viewport) return } var frame = self.frameAtPoint(currentX, currentY) self.setHoveredFrame(frame) } MainFlamechart.prototype.onMouseOut = function() { var self = this if (self.dragging) return self.setHoveredFrame(null) } MainFlamechart.prototype.onWheel = function(e) { var self = this var deltaX = e.deltaX var deltaY = e.deltaY if (e.deltaMode == WheelEvent.prototype.DOM_DELTA_LINE) { deltaX *= 11 deltaY *= 11 } if (e.shiftKey) { if ('webkitDirectionInvertedFromDevice' in e) { if (e.webkitDirectionInvertedFromDevice) deltaY *= -1 } else if (/Mac OS X/.test(navigator.userAgent)) { // Assume that most Mac users have "Scroll direction: Natural" enabled. deltaY *= -1 } var mouseWheelZoomSpeed = 1 / 120 self.handleZoomGesture(Math.pow(1.2, -(deltaY || deltaX) * mouseWheelZoomSpeed), e.offsetX) e.preventDefault() return } var viewport = Object.assign({}, self.viewport) viewport.x += deltaX * viewport.width / (self.dataRange.maxX - self.dataRange.minX) viewport.x = Math.min(self.dataRange.maxX - viewport.width, Math.max(self.dataRange.minX, viewport.x)) viewport.y += (deltaY / 8) * viewport.height / (self.dataRange.maxY - self.dataRange.minY) viewport.y = Math.min(self.dataRange.maxY - viewport.height, Math.max(self.dataRange.minY, viewport.y)) self.setViewport(viewport) e.preventDefault() } MainFlamechart.prototype.handleZoomGesture = function(zoom, originX) { var self = this var viewport = Object.assign({}, self.viewport) var ratioX = originX / self.width var newWidth = Math.min(viewport.width / zoom, self.dataRange.maxX - self.dataRange.minX) viewport.x = Math.max(self.dataRange.minX, viewport.x + (viewport.width - newWidth) * ratioX) viewport.width = Math.min(newWidth, self.dataRange.maxX - viewport.x) self.setViewport(viewport) } MainFlamechart.prototype.setHoveredFrame = function(frame) { var self = this if (frame === self.hoveredFrame) return var previous = self.hoveredFrame self.hoveredFrame = frame self.dispatch('hoveredframechanged', { previous: previous, current: self.hoveredFrame }) } function OverviewFlamechart(container, viewportOverlay, data, dataRange, info) { var self = this Flamechart.call(self, container.querySelector('.overview'), data, dataRange, info) self.container = container self.showLabels = false self.viewportOverlay = viewportOverlay self.canvas.addEventListener('mousedown', self.onMouseDown.bind(self)) self.viewportOverlay.addEventListener('mousedown', self.onOverlayMouseDown.bind(self)) } OverviewFlamechart.prototype = Object.create(Flamechart.prototype) OverviewFlamechart.prototype.setViewportOverlayRect = function(r) { var self = this self.viewportOverlayRect = r r = self.dataToCanvas(r) r.width = Math.max(2, r.width) r.height = Math.max(2, r.height) if ('transform' in self.viewportOverlay.style) { self.viewportOverlay.style.transform = 'translate(' + r.x + 'px, ' + r.y + 'px) scale(' + r.width + ', ' + r.height + ')' } else { self.viewportOverlay.style.left = r.x self.viewportOverlay.style.top = r.y self.viewportOverlay.style.width = r.width self.viewportOverlay.style.height = r.height } } OverviewFlamechart.prototype.onMouseDown = function(e) { var self = this captureMouse({ mouseup: self.onMouseUp.bind(self), mousemove: self.onMouseMove.bind(self), }) self.dragging = true self.dragStartX = e.clientX - self.canvas.getBoundingClientRect().left self.handleDragGesture(e) e.preventDefault() } OverviewFlamechart.prototype.onMouseUp = function(e) { var self = this if (!self.dragging) return releaseCapture() self.dragging = false self.handleDragGesture(e) e.preventDefault() } OverviewFlamechart.prototype.onMouseMove = function(e) { var self = this if (!self.dragging) return self.handleDragGesture(e) e.preventDefault() } OverviewFlamechart.prototype.handleDragGesture = function(e) { var self = this var clientRect = self.canvas.getBoundingClientRect() var currentX = e.clientX - clientRect.left var currentY = e.clientY - clientRect.top if (self.dragCurrentX === currentX) return self.dragCurrentX = currentX var minX = Math.min(self.dragStartX, self.dragCurrentX) var maxX = Math.max(self.dragStartX, self.dragCurrentX) var rect = Object.assign({}, self.viewportOverlayRect) rect.x = minX / self.width * self.viewport.width + self.viewport.x rect.width = Math.max(self.viewport.width / 1000, (maxX - minX) / self.width * self.viewport.width) rect.y = Math.max(self.viewport.y, Math.min(self.viewport.height - self.viewport.y, currentY / self.height * self.viewport.height + self.viewport.y - rect.height / 2)) self.setViewportOverlayRect(rect) self.dispatch('overlaychanged', { current: self.viewportOverlayRect }) } OverviewFlamechart.prototype.onOverlayMouseDown = function(e) { var self = this captureMouse({ mouseup: self.onOverlayMouseUp.bind(self), mousemove: self.onOverlayMouseMove.bind(self), }) self.overlayDragging = true self.overlayDragInfo = { mouse: { x: e.clientX, y: e.clientY }, rect: Object.assign({}, self.viewportOverlayRect), } self.viewportOverlay.classList.add('moving') self.handleOverlayDragGesture(e) e.preventDefault() } OverviewFlamechart.prototype.onOverlayMouseUp = function(e) { var self = this if (!self.overlayDragging) return releaseCapture() self.overlayDragging = false self.viewportOverlay.classList.remove('moving') self.handleOverlayDragGesture(e) e.preventDefault() } OverviewFlamechart.prototype.onOverlayMouseMove = function(e) { var self = this if (!self.overlayDragging) return self.handleOverlayDragGesture(e) e.preventDefault() } OverviewFlamechart.prototype.handleOverlayDragGesture = function(e) { var self = this var deltaX = (e.clientX - self.overlayDragInfo.mouse.x) / self.width * self.viewport.width var deltaY = (e.clientY - self.overlayDragInfo.mouse.y) / self.height * self.viewport.height var rect = Object.assign({}, self.overlayDragInfo.rect) rect.x += deltaX rect.y += deltaY rect.x = Math.max(self.viewport.x, Math.min(self.viewport.x + self.viewport.width - rect.width, rect.x)) rect.y = Math.max(self.viewport.y, Math.min(self.viewport.y + self.viewport.height - rect.height, rect.y)) self.setViewportOverlayRect(rect) self.dispatch('overlaychanged', { current: self.viewportOverlayRect }) } function FlamegraphView(data, info, sortedGems) { var self = this self.data = data self.info = info self.dataRange = self.computeDataRange() self.mainChart = new MainFlamechart(document.querySelector('.flamegraph'), data, self.dataRange, info) self.overview = new OverviewFlamechart(document.querySelector('.overview-container'), document.querySelector('.overview-viewport-overlay'), data, self.dataRange, info) self.infoElement = document.querySelector('.info') self.mainChart.on('hoveredframechanged', self.onHoveredFrameChanged.bind(self)) self.mainChart.on('viewportchanged', self.onViewportChanged.bind(self)) self.overview.on('overlaychanged', self.onOverlayChanged.bind(self)) var legend = document.querySelector('.legend') self.renderLegend(legend, sortedGems) legend.addEventListener('mousemove', self.onLegendMouseMove.bind(self)) legend.addEventListener('mouseout', self.onLegendMouseOut.bind(self)) window.addEventListener('resize', self.updateDimensions.bind(self)) self.updateDimensions() } FlamegraphView.prototype.updateDimensions = function() { var self = this var margin = {top: 10, right: 10, bottom: 10, left: 10} var width = window.innerWidth - 200 - margin.left - margin.right var mainChartHeight = Math.ceil(window.innerHeight * 0.80) - margin.top - margin.bottom var overviewHeight = Math.floor(window.innerHeight * 0.20) - 60 - margin.top - margin.bottom self.mainChart.setDimensions(width + margin.left + margin.right, mainChartHeight + margin.top + margin.bottom) self.overview.setDimensions(width + margin.left + margin.right, overviewHeight + margin.top + margin.bottom) self.overview.setViewportOverlayRect(self.mainChart.viewport) } FlamegraphView.prototype.computeDataRange = function() { var self = this var range = { minX: Infinity, minY: Infinity, maxX: -Infinity, maxY: -Infinity } self.data.forEach(function(d) { range.minX = Math.min(range.minX, d.x) range.minY = Math.min(range.minY, d.y) range.maxX = Math.max(range.maxX, d.x + d.width) range.maxY = Math.max(range.maxY, d.y + 1) }) return range } FlamegraphView.prototype.onHoveredFrameChanged = function(data) { var self = this self.updateInfo(data.current) if (data.previous) self.repaintFrames(1, self.info[data.previous.frame_id].frames) if (data.current) self.repaintFrames(0.5, self.info[data.current.frame_id].frames) } FlamegraphView.prototype.repaintFrames = function(opacity, frames) { var self = this self.mainChart.paint(opacity, frames) self.overview.paint(opacity, frames) } FlamegraphView.prototype.updateInfo = function(frame) { var self = this if (!frame) { self.infoElement.style.backgroundColor = '' self.infoElement.querySelector('.frame').textContent = '' self.infoElement.querySelector('.file').textContent = '' self.infoElement.querySelector('.samples').textContent = '' self.infoElement.querySelector('.exclusive').textContent = '' return } var i = self.info[frame.frame_id] var shortFile = frame.file.replace(/^.+\/(gems|app|lib|config|jobs)/, '$1') var sData = self.samplePercentRaw(i.samples.length, frame.topFrame ? frame.topFrame.exclusiveCount : 0) self.infoElement.style.backgroundColor = colorString(i.color, 1) self.infoElement.querySelector('.frame').textContent = frame.frame self.infoElement.querySelector('.file').textContent = shortFile self.infoElement.querySelector('.samples').textContent = sData[0] + ' samples (' + sData[1] + '%)' if (sData[3]) self.infoElement.querySelector('.exclusive').textContent = sData[2] + ' exclusive (' + sData[3] + '%)' else self.infoElement.querySelector('.exclusive').textContent = '' } FlamegraphView.prototype.samplePercentRaw = function(samples, exclusive) { var self = this var ret = [samples, ((samples / self.dataRange.maxX) * 100).toFixed(2)] if (exclusive) ret = ret.concat([exclusive, ((exclusive / self.dataRange.maxX) * 100).toFixed(2)]) return ret } FlamegraphView.prototype.onViewportChanged = function(data) { var self = this self.overview.setViewportOverlayRect(data.current) } FlamegraphView.prototype.onOverlayChanged = function(data) { var self = this self.mainChart.setViewport(data.current) } FlamegraphView.prototype.renderLegend = function(element, sortedGems) { var self = this var fragment = document.createDocumentFragment() sortedGems.forEach(function(gem) { var sData = self.samplePercentRaw(gem.samples.length) var node = document.createElement('div') node.className = 'legend-gem' node.setAttribute('data-gem-name', gem.name) node.style.backgroundColor = colorString(gem.color, 1) var span = document.createElement('span') span.style.float = 'right' span.textContent = sData[0] + 'x' span.appendChild(document.createElement('br')) span.appendChild(document.createTextNode(sData[1] + '%')) node.appendChild(span) var name = document.createElement('div') name.className = 'name' name.textContent = gem.name name.appendChild(document.createElement('br')) name.appendChild(document.createTextNode('\u00a0')) node.appendChild(name) fragment.appendChild(node) }) element.appendChild(fragment) } FlamegraphView.prototype.onLegendMouseMove = function(e) { var self = this var gemElement = e.target.closest('.legend-gem') var gemName = gemElement.getAttribute('data-gem-name') if (self.hoveredGemName === gemName) return if (self.hoveredGemName) { self.mainChart.paint(1, null, self.hoveredGemName) self.overview.paint(1, null, self.hoveredGemName) } self.hoveredGemName = gemName self.mainChart.paint(0.5, null, self.hoveredGemName) self.overview.paint(0.5, null, self.hoveredGemName) } FlamegraphView.prototype.onLegendMouseOut = function() { var self = this if (!self.hoveredGemName) return self.mainChart.paint(1, null, self.hoveredGemName) self.overview.paint(1, null, self.hoveredGemName) self.hoveredGemName = null } var capturingListeners = null function captureMouse(listeners) { if (capturingListeners) releaseCapture() for (var name in listeners) document.addEventListener(name, listeners[name], true) capturingListeners = listeners } function releaseCapture() { if (!capturingListeners) return for (var name in capturingListeners) document.removeEventListener(name, capturingListeners[name], true) capturingListeners = null } function guessGem(frame) { var split = frame.split('/gems/') if (split.length === 1) { split = frame.split('/app/') if (split.length === 1) { split = frame.split('/lib/') } else { return split[split.length - 1].split('/')[0] } split = split[Math.max(split.length - 2, 0)].split('/') return split[split.length - 1].split(':')[0] } else { return split[split.length - 1].split('/')[0].split('-', 2)[0] } } function color() { var r = parseInt(205 + Math.random() * 50) var g = parseInt(Math.random() * 230) var b = parseInt(Math.random() * 55) return [r, g, b] } // http://stackoverflow.com/a/7419630 function rainbow(numOfSteps, step) { // This function generates vibrant, "evenly spaced" colours (i.e. no clustering). This is ideal for creating easily distiguishable vibrant markers in Google Maps and other apps. // Adam Cole, 2011-Sept-14 // HSV to RBG adapted from: http://mjijackson.com/2008/02/rgb-to-hsl-and-rgb-to-hsv-color-model-conversion-algorithms-in-javascript var r, g, b var h = step / numOfSteps var i = ~~(h * 6) var f = h * 6 - i var q = 1 - f switch (i % 6) { case 0: r = 1, g = f, b = 0; break case 1: r = q, g = 1, b = 0; break case 2: r = 0, g = 1, b = f; break case 3: r = 0, g = q, b = 1; break case 4: r = f, g = 0, b = 1; break case 5: r = 1, g = 0, b = q; break } return [Math.floor(r * 255), Math.floor(g * 255), Math.floor(b * 255)] } function colorString(color, opacity) { if (typeof opacity === 'undefined') opacity = 1 return 'rgba(' + color.join(',') + ',' + opacity + ')' } // http://stackoverflow.com/questions/1960473/unique-values-in-an-array function getUnique(orig) { var o = {} for (var i = 0; i < orig.length; i++) o[orig[i]] = 1 return Object.keys(o) } function centerTruncate(text, maxLength) { var charactersToKeep = maxLength - 1 if (charactersToKeep <= 0) return '' if (text.length <= charactersToKeep) return text var prefixLength = Math.ceil(charactersToKeep / 2) var suffixLength = charactersToKeep - prefixLength var prefix = text.substr(0, prefixLength) var suffix = suffixLength > 0 ? text.substr(-suffixLength) : '' return [prefix, '\u2026', suffix].join('') } function flamegraph(data) { var info = {} data.forEach(function(d) { var i = info[d.frame_id] if (!i) info[d.frame_id] = i = {frames: [], samples: [], color: color()} i.frames.push(d) for (var j = 0; j < d.width; j++) { i.samples.push(d.x + j) } }) // Samples may overlap on the same line for (var r in info) { if (info[r].samples) { info[r].samples = getUnique(info[r].samples) } } // assign some colors, analyze samples per gem var gemStats = {} var topFrames = {} var lastFrame = {frame: 'd52e04d-df28-41ed-a215-b6ec840a8ea5', x: -1} data.forEach(function(d) { var gem = guessGem(d.file) var stat = gemStats[gem] d.gemName = gem if (!stat) { gemStats[gem] = stat = {name: gem, samples: [], frames: []} } stat.frames.push(d.frame_id) for (var j = 0; j < d.width; j++) { stat.samples.push(d.x + j) } // This assumes the traversal is in order if (lastFrame.x !== d.x) { var topFrame = topFrames[lastFrame.frame_id] if (!topFrame) { topFrames[lastFrame.frame_id] = topFrame = {exclusiveCount: 0} } topFrame.exclusiveCount += 1 lastFrame.topFrame = topFrame } lastFrame = d }) var topFrame = topFrames[lastFrame.frame_id] if (!topFrame) { topFrames[lastFrame.frame_id] = topFrame = {exclusiveCount: 0} } topFrame.exclusiveCount += 1 lastFrame.topFrame = topFrame var totalGems = 0 for (var k in gemStats) { totalGems++ gemStats[k].samples = getUnique(gemStats[k].samples) } var gemsSorted = Object.keys(gemStats).map(function(k) { return gemStats[k] }) gemsSorted.sort(function(a, b) { return b.samples.length - a.samples.length }) var currentIndex = 0 gemsSorted.forEach(function(stat) { stat.color = rainbow(totalGems, currentIndex) currentIndex += 1 for (var x = 0; x < stat.frames.length; x++) { info[stat.frames[x]].color = stat.color } }) new FlamegraphView(data, info, gemsSorted) } stackprof-0.2.26/lib/stackprof/flamegraph/viewer.html0000644000004100000410000000366614605430231022675 0ustar www-datawww-data flamegraph
stackprof-0.2.26/lib/stackprof.rb0000644000004100000410000000136414605430231016716 0ustar www-datawww-dataif RUBY_ENGINE == 'truffleruby' require "stackprof/truffleruby" else require "stackprof/stackprof" end if defined?(RubyVM::YJIT) && RubyVM::YJIT.enabled? if RUBY_VERSION < "3.3" # On 3.3 we don't need postponed jobs: # https://github.com/ruby/ruby/commit/a1dc1a3de9683daf5a543d6f618e17aabfcb8708 StackProf.use_postponed_job! end elsif RUBY_VERSION == "3.2.0" # 3.2.0 crash is the signal is received at the wrong time. # Fixed in https://github.com/ruby/ruby/pull/7116 # The fix is backported in 3.2.1: https://bugs.ruby-lang.org/issues/19336 StackProf.use_postponed_job! end module StackProf VERSION = '0.2.26' end StackProf.autoload :Report, "stackprof/report.rb" StackProf.autoload :Middleware, "stackprof/middleware.rb" stackprof-0.2.26/test/0000755000004100000410000000000014605430231014602 5ustar www-datawww-datastackprof-0.2.26/test/test_middleware.rb0000644000004100000410000000540114605430231020303 0ustar www-datawww-data$:.unshift File.expand_path('../../lib', __FILE__) require 'stackprof' require 'stackprof/middleware' require 'minitest/autorun' require 'tmpdir' class StackProf::MiddlewareTest < Minitest::Test def test_path_default StackProf::Middleware.new(Object.new) assert_equal 'tmp/', StackProf::Middleware.path end def test_path_custom StackProf::Middleware.new(Object.new, { path: 'foo/' }) assert_equal 'foo/', StackProf::Middleware.path end def test_save_default middleware = StackProf::Middleware.new(->(env) { 100.times { Object.new } }, save_every: 1, enabled: true) Dir.mktmpdir do |dir| Dir.chdir(dir) { middleware.call({}) } dir = File.join(dir, "tmp") assert File.directory? dir profile = Dir.entries(dir).reject { |x| File.directory?(x) }.first assert profile assert_equal "stackprof", profile.split("-")[0] assert_equal "cpu", profile.split("-")[1] assert_equal Process.pid.to_s, profile.split("-")[2] end end def test_save_custom middleware = StackProf::Middleware.new(->(env) { 100.times { Object.new } }, path: "foo/", save_every: 1, enabled: true) Dir.mktmpdir do |dir| Dir.chdir(dir) { middleware.call({}) } dir = File.join(dir, "foo") assert File.directory? dir profile = Dir.entries(dir).reject { |x| File.directory?(x) }.first assert profile assert_equal "stackprof", profile.split("-")[0] assert_equal "cpu", profile.split("-")[1] assert_equal Process.pid.to_s, profile.split("-")[2] end end def test_enabled_should_use_a_proc_if_passed env = {} StackProf::Middleware.new(Object.new, enabled: Proc.new{ false }) refute StackProf::Middleware.enabled?(env) StackProf::Middleware.new(Object.new, enabled: Proc.new{ true }) assert StackProf::Middleware.enabled?(env) end def test_enabled_should_use_a_proc_if_passed_and_use_the_request_env enable_proc = Proc.new {|env| env['PROFILE'] } env = Hash.new { false } StackProf::Middleware.new(Object.new, enabled: enable_proc) refute StackProf::Middleware.enabled?(env) env = Hash.new { true } StackProf::Middleware.new(Object.new, enabled: enable_proc) assert StackProf::Middleware.enabled?(env) end def test_raw StackProf::Middleware.new(Object.new, raw: true) assert StackProf::Middleware.raw end def test_metadata metadata = { key: 'value' } StackProf::Middleware.new(Object.new, metadata: metadata) assert_equal metadata, StackProf::Middleware.metadata end end unless RUBY_ENGINE == 'truffleruby' stackprof-0.2.26/test/fixtures/0000755000004100000410000000000014605430231016453 5ustar www-datawww-datastackprof-0.2.26/test/fixtures/profile.dump0000644000004100000410000000002514605430231020777 0ustar www-datawww-data{: modeI"cpu:ETstackprof-0.2.26/test/fixtures/profile.json0000644000004100000410000000002214605430231021000 0ustar www-datawww-data{ "mode": "cpu" } stackprof-0.2.26/test/test_stackprof.rb0000644000004100000410000002070714605430231020170 0ustar www-datawww-data$:.unshift File.expand_path('../../lib', __FILE__) require 'stackprof' require 'minitest/autorun' require 'tempfile' require 'pathname' class StackProfTest < Minitest::Test def setup Object.new # warm some caches to avoid flakiness end def test_info profile = StackProf.run{} assert_equal 1.2, profile[:version] assert_equal :wall, profile[:mode] assert_equal 1000, profile[:interval] assert_equal 0, profile[:samples] end def test_running assert_equal false, StackProf.running? StackProf.run{ assert_equal true, StackProf.running? } end def test_start_stop_results assert_nil StackProf.results assert_equal true, StackProf.start assert_equal false, StackProf.start assert_equal true, StackProf.running? assert_nil StackProf.results assert_equal true, StackProf.stop assert_equal false, StackProf.stop assert_equal false, StackProf.running? assert_kind_of Hash, StackProf.results assert_nil StackProf.results end def test_object_allocation profile_base_line = __LINE__+1 profile = StackProf.run(mode: :object) do Object.new Object.new end assert_equal :object, profile[:mode] assert_equal 1, profile[:interval] if RUBY_VERSION >= '3' assert_equal 4, profile[:samples] else assert_equal 2, profile[:samples] end frame = profile[:frames].values.first assert_includes frame[:name], "StackProfTest#test_object_allocation" assert_equal 2, frame[:samples] assert_includes [profile_base_line - 2, profile_base_line], frame[:line] if RUBY_VERSION >= '3' assert_equal [2, 1], frame[:lines][profile_base_line+1] assert_equal [2, 1], frame[:lines][profile_base_line+2] else assert_equal [1, 1], frame[:lines][profile_base_line+1] assert_equal [1, 1], frame[:lines][profile_base_line+2] end frame = profile[:frames].values[1] if RUBY_VERSION < '2.3' if RUBY_VERSION >= '3' assert_equal [4, 0], frame[:lines][profile_base_line] else assert_equal [2, 0], frame[:lines][profile_base_line] end end def test_object_allocation_interval profile = StackProf.run(mode: :object, interval: 10) do 100.times { Object.new } end assert_equal 10, profile[:samples] end def test_cputime profile = StackProf.run(mode: :cpu, interval: 500) do math end assert_operator profile[:samples], :>=, 1 if RUBY_VERSION >= '3' assert profile[:frames].values.take(2).map { |f| f[:name].include? "StackProfTest#math" }.any? else frame = profile[:frames].values.first assert_includes frame[:name], "StackProfTest#math" end end def test_walltime GC.disable profile = StackProf.run(mode: :wall) do idle end frame = profile[:frames].values.first if RUBY_VERSION >= '3' assert_equal "IO.select", frame[:name] else assert_equal "StackProfTest#idle", frame[:name] end assert_in_delta 200, frame[:samples], 25 ensure GC.enable end def test_custom profile_base_line = __LINE__+1 profile = StackProf.run(mode: :custom) do 10.times do StackProf.sample end end assert_equal :custom, profile[:mode] assert_equal 10, profile[:samples] offset = RUBY_VERSION >= '3' ? 1 : 0 frame = profile[:frames].values[offset] assert_includes frame[:name], "StackProfTest#test_custom" assert_includes [profile_base_line-2, profile_base_line+1], frame[:line] if RUBY_VERSION >= '3' assert_equal [10, 0], frame[:lines][profile_base_line+2] else assert_equal [10, 10], frame[:lines][profile_base_line+2] end end def test_raw before_monotonic = Process.clock_gettime(Process::CLOCK_MONOTONIC, :microsecond) profile = StackProf.run(mode: :custom, raw: true) do 10.times do StackProf.sample sleep 0.0001 end end after_monotonic = Process.clock_gettime(Process::CLOCK_MONOTONIC, :microsecond) raw = profile[:raw] raw_lines = profile[:raw_lines] assert_equal 10, raw[-1] assert_equal raw[0] + 2, raw.size assert_equal 10, raw_lines[-1] # seen 10 times offset = RUBY_VERSION >= '3' ? -3 : -2 assert_equal 140, raw_lines[offset] # sample caller is on 140 assert_includes profile[:frames][raw[offset]][:name], 'StackProfTest#test_raw' assert_equal 10, profile[:raw_sample_timestamps].size profile[:raw_sample_timestamps].each_cons(2) do |t1, t2| assert_operator t1, :>, before_monotonic assert_operator t2, :>=, t1 assert_operator t2, :<, after_monotonic end assert_equal 10, profile[:raw_timestamp_deltas].size total_duration = after_monotonic - before_monotonic assert_operator profile[:raw_timestamp_deltas].inject(&:+), :<, total_duration profile[:raw_timestamp_deltas].each do |delta| assert_operator delta, :>, 0 end end def test_metadata metadata = { path: '/foo/bar', revision: '5c0b01f1522ae8c194510977ae29377296dd236b', } profile = StackProf.run(mode: :cpu, metadata: metadata) do math end assert_equal metadata, profile[:metadata] end def test_empty_metadata profile = StackProf.run(mode: :cpu) do math end assert_equal({}, profile[:metadata]) end def test_raises_if_metadata_is_not_a_hash exception = assert_raises ArgumentError do StackProf.run(mode: :cpu, metadata: 'foobar') do math end end assert_equal 'metadata should be a hash', exception.message end def test_fork StackProf.run do pid = fork do exit! StackProf.running?? 1 : 0 end Process.wait(pid) assert_equal 0, $?.exitstatus assert_equal true, StackProf.running? end end def foo(n = 10) if n == 0 StackProf.sample return end foo(n - 1) end def test_recursive_total_samples profile = StackProf.run(mode: :cpu, raw: true) do 10.times do foo end end frame = profile[:frames].values.find do |frame| frame[:name] == "StackProfTest#foo" end assert_equal 10, frame[:total_samples] end def test_gc profile = StackProf.run(interval: 100, raw: true) do 5.times do GC.start end end gc_frame = profile[:frames].values.find{ |f| f[:name] == "(garbage collection)" } marking_frame = profile[:frames].values.find{ |f| f[:name] == "(marking)" } sweeping_frame = profile[:frames].values.find{ |f| f[:name] == "(sweeping)" } assert gc_frame assert marking_frame assert sweeping_frame # We can't guarantee a certain number of GCs to run, so just assert # that it's within some kind of delta assert_in_delta gc_frame[:total_samples], profile[:gc_samples], 2 # Lazy marking / sweeping can cause this math to not add up, so also use a delta assert_in_delta profile[:gc_samples], [gc_frame, marking_frame, sweeping_frame].map{|x| x[:samples] }.inject(:+), 2 assert_operator profile[:gc_samples], :>, 0 assert_operator profile[:missed_samples], :<=, 25 end def test_out tmpfile = Tempfile.new('stackprof-out') ret = StackProf.run(mode: :custom, out: tmpfile) do StackProf.sample end assert_equal tmpfile, ret tmpfile.rewind profile = Marshal.load(tmpfile.read) refute_empty profile[:frames] end def test_out_to_path_string tmpfile = Tempfile.new('stackprof-out') ret = StackProf.run(mode: :custom, out: tmpfile.path) do StackProf.sample end refute_equal tmpfile, ret assert_equal tmpfile.path, ret.path tmpfile.rewind profile = Marshal.load(tmpfile.read) refute_empty profile[:frames] end def test_pathname_out tmpfile = Tempfile.new('stackprof-out') pathname = Pathname.new(tmpfile.path) ret = StackProf.run(mode: :custom, out: pathname) do StackProf.sample end assert_equal tmpfile.path, ret.path tmpfile.rewind profile = Marshal.load(tmpfile.read) refute_empty profile[:frames] end def test_min_max_interval [-1, 0, 1_000_000, 1_000_001].each do |invalid_interval| err = assert_raises(ArgumentError, "invalid interval #{invalid_interval}") do StackProf.run(interval: invalid_interval, debug: true) {} end assert_match(/microseconds/, err.message) end end def math 250_000.times do 2 ** 10 end end def idle r, w = IO.pipe IO.select([r], nil, nil, 0.2) ensure r.close w.close end end unless RUBY_ENGINE == 'truffleruby' stackprof-0.2.26/test/test_report.rb0000644000004100000410000000214014605430231017476 0ustar www-datawww-data$:.unshift File.expand_path('../../lib', __FILE__) require 'stackprof' require 'minitest/autorun' class ReportDumpTest < Minitest::Test require 'stringio' def test_dump_to_stdout data = {} report = StackProf::Report.new(data) out, _err = capture_subprocess_io do report.print_dump end assert_dump data, out end def test_dump_to_file data = {} f = StringIO.new report = StackProf::Report.new(data) report.print_dump(f) assert_dump data, f.string end private def assert_dump(expected, marshal_data) assert_equal expected, Marshal.load(marshal_data) end end class ReportReadTest < Minitest::Test require 'pathname' def test_from_file_read_json file = fixture("profile.json") report = StackProf::Report.from_file(file) assert_equal({ mode: "cpu" }, report.data) end def test_from_file_read_marshal file = fixture("profile.dump") report = StackProf::Report.from_file(file) assert_equal({ mode: "cpu" }, report.data) end private def fixture(name) Pathname.new(__dir__).join("fixtures", name) end end stackprof-0.2.26/test/test_truffleruby.rb0000644000004100000410000000067414605430231020546 0ustar www-datawww-data$:.unshift File.expand_path('../../lib', __FILE__) require 'stackprof' require 'minitest/autorun' if RUBY_ENGINE == 'truffleruby' class StackProfTruffleRubyTest < Minitest::Test def test_error error = assert_raises RuntimeError do StackProf.run(mode: :cpu) do unreacheable end end assert_match(/TruffleRuby/, error.message) assert_match(/--cpusampler/, error.message) end end end stackprof-0.2.26/Rakefile0000644000004100000410000000073114605430231015271 0ustar www-datawww-datarequire "bundler/gem_tasks" require "rake/testtask" Rake::TestTask.new(:test) do |t| t.libs << "test" t.libs << "lib" t.test_files = FileList["test/**/test_*.rb"] end if RUBY_ENGINE == "truffleruby" task :compile do # noop end task :clean do # noop end else require "rake/extensiontask" Rake::ExtensionTask.new("stackprof") do |ext| ext.ext_dir = "ext/stackprof" ext.lib_dir = "lib/stackprof" end end task default: %i(compile test) stackprof-0.2.26/Gemfile0000644000004100000410000000004614605430231015116 0ustar www-datawww-datasource 'https://rubygems.org' gemspec stackprof-0.2.26/LICENSE0000644000004100000410000000207214605430231014631 0ustar www-datawww-dataThe MIT License (MIT) Copyright (c) 2013-2015 Aman Gupta Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. stackprof-0.2.26/ext/0000755000004100000410000000000014605430231014423 5ustar www-datawww-datastackprof-0.2.26/ext/stackprof/0000755000004100000410000000000014605430231016417 5ustar www-datawww-datastackprof-0.2.26/ext/stackprof/stackprof.c0000644000004100000410000007056414605430231020573 0ustar www-datawww-data/********************************************************************** stackprof.c - Sampling call-stack frame profiler for MRI. vim: noexpandtab shiftwidth=4 tabstop=8 softtabstop=4 **********************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #define BUF_SIZE 2048 #define MICROSECONDS_IN_SECOND 1000000 #define NANOSECONDS_IN_SECOND 1000000000 #define FAKE_FRAME_GC INT2FIX(0) #define FAKE_FRAME_MARK INT2FIX(1) #define FAKE_FRAME_SWEEP INT2FIX(2) static const char *fake_frame_cstrs[] = { "(garbage collection)", "(marking)", "(sweeping)", }; static int stackprof_use_postponed_job = 1; static int ruby_vm_running = 0; #define TOTAL_FAKE_FRAMES (sizeof(fake_frame_cstrs) / sizeof(char *)) #ifdef _POSIX_MONOTONIC_CLOCK #define timestamp_t timespec typedef struct timestamp_t timestamp_t; static void capture_timestamp(timestamp_t *ts) { clock_gettime(CLOCK_MONOTONIC, ts); } static int64_t delta_usec(timestamp_t *start, timestamp_t *end) { int64_t result = MICROSECONDS_IN_SECOND * (end->tv_sec - start->tv_sec); if (end->tv_nsec < start->tv_nsec) { result -= MICROSECONDS_IN_SECOND; result += (NANOSECONDS_IN_SECOND + end->tv_nsec - start->tv_nsec) / 1000; } else { result += (end->tv_nsec - start->tv_nsec) / 1000; } return result; } static uint64_t timestamp_usec(timestamp_t *ts) { return (MICROSECONDS_IN_SECOND * ts->tv_sec) + (ts->tv_nsec / 1000); } #else #define timestamp_t timeval typedef struct timestamp_t timestamp_t; static void capture_timestamp(timestamp_t *ts) { gettimeofday(ts, NULL); } static int64_t delta_usec(timestamp_t *start, timestamp_t *end) { struct timeval diff; timersub(end, start, &diff); return (MICROSECONDS_IN_SECOND * diff.tv_sec) + diff.tv_usec; } static uint64_t timestamp_usec(timestamp_t *ts) { return (MICROSECONDS_IN_SECOND * ts.tv_sec) + diff.tv_usec } #endif typedef struct { size_t total_samples; size_t caller_samples; size_t seen_at_sample_number; st_table *edges; st_table *lines; } frame_data_t; typedef struct { uint64_t timestamp_usec; int64_t delta_usec; } sample_time_t; static struct { int running; int raw; int aggregate; VALUE mode; VALUE interval; VALUE out; VALUE metadata; int ignore_gc; uint64_t *raw_samples; size_t raw_samples_len; size_t raw_samples_capa; size_t raw_sample_index; struct timestamp_t last_sample_at; sample_time_t *raw_sample_times; size_t raw_sample_times_len; size_t raw_sample_times_capa; size_t overall_signals; size_t overall_samples; size_t during_gc; size_t unrecorded_gc_samples; size_t unrecorded_gc_marking_samples; size_t unrecorded_gc_sweeping_samples; st_table *frames; timestamp_t gc_start_timestamp; VALUE fake_frame_names[TOTAL_FAKE_FRAMES]; VALUE empty_string; int buffer_count; sample_time_t buffer_time; VALUE frames_buffer[BUF_SIZE]; int lines_buffer[BUF_SIZE]; pthread_t target_thread; } _stackprof; static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line; static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines; static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_raw_lines, sym_metadata, sym_frames, sym_ignore_gc, sym_out; static VALUE sym_aggregate, sym_raw_sample_timestamps, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping; static VALUE sym_gc_samples, objtracer; static VALUE gc_hook; static VALUE rb_mStackProf; static void stackprof_newobj_handler(VALUE, void*); static void stackprof_signal_handler(int sig, siginfo_t* sinfo, void* ucontext); static VALUE stackprof_start(int argc, VALUE *argv, VALUE self) { struct sigaction sa; struct itimerval timer; VALUE opts = Qnil, mode = Qnil, interval = Qnil, metadata = rb_hash_new(), out = Qfalse; int ignore_gc = 0; int raw = 0, aggregate = 1; VALUE metadata_val; if (_stackprof.running) return Qfalse; rb_scan_args(argc, argv, "0:", &opts); if (RTEST(opts)) { mode = rb_hash_aref(opts, sym_mode); interval = rb_hash_aref(opts, sym_interval); out = rb_hash_aref(opts, sym_out); if (RTEST(rb_hash_aref(opts, sym_ignore_gc))) { ignore_gc = 1; } metadata_val = rb_hash_aref(opts, sym_metadata); if (RTEST(metadata_val)) { if (!RB_TYPE_P(metadata_val, T_HASH)) rb_raise(rb_eArgError, "metadata should be a hash"); metadata = metadata_val; } if (RTEST(rb_hash_aref(opts, sym_raw))) raw = 1; if (rb_hash_lookup2(opts, sym_aggregate, Qundef) == Qfalse) aggregate = 0; } if (!RTEST(mode)) mode = sym_wall; if (!NIL_P(interval) && (NUM2INT(interval) < 1 || NUM2INT(interval) >= MICROSECONDS_IN_SECOND)) { rb_raise(rb_eArgError, "interval is a number of microseconds between 1 and 1 million"); } if (!_stackprof.frames) { _stackprof.frames = st_init_numtable(); _stackprof.overall_signals = 0; _stackprof.overall_samples = 0; _stackprof.during_gc = 0; } if (mode == sym_object) { if (!RTEST(interval)) interval = INT2FIX(1); objtracer = rb_tracepoint_new(Qnil, RUBY_INTERNAL_EVENT_NEWOBJ, stackprof_newobj_handler, 0); rb_tracepoint_enable(objtracer); } else if (mode == sym_wall || mode == sym_cpu) { if (!RTEST(interval)) interval = INT2FIX(1000); sa.sa_sigaction = stackprof_signal_handler; sa.sa_flags = SA_RESTART | SA_SIGINFO; sigemptyset(&sa.sa_mask); sigaction(mode == sym_wall ? SIGALRM : SIGPROF, &sa, NULL); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = NUM2LONG(interval); timer.it_value = timer.it_interval; setitimer(mode == sym_wall ? ITIMER_REAL : ITIMER_PROF, &timer, 0); } else if (mode == sym_custom) { /* sampled manually */ interval = Qnil; } else { rb_raise(rb_eArgError, "unknown profiler mode"); } _stackprof.running = 1; _stackprof.raw = raw; _stackprof.aggregate = aggregate; _stackprof.mode = mode; _stackprof.interval = interval; _stackprof.ignore_gc = ignore_gc; _stackprof.metadata = metadata; _stackprof.out = out; _stackprof.target_thread = pthread_self(); if (raw) { capture_timestamp(&_stackprof.last_sample_at); } return Qtrue; } static VALUE stackprof_stop(VALUE self) { struct sigaction sa; struct itimerval timer; if (!_stackprof.running) return Qfalse; _stackprof.running = 0; if (_stackprof.mode == sym_object) { rb_tracepoint_disable(objtracer); } else if (_stackprof.mode == sym_wall || _stackprof.mode == sym_cpu) { memset(&timer, 0, sizeof(timer)); setitimer(_stackprof.mode == sym_wall ? ITIMER_REAL : ITIMER_PROF, &timer, 0); sa.sa_handler = SIG_IGN; sa.sa_flags = SA_RESTART; sigemptyset(&sa.sa_mask); sigaction(_stackprof.mode == sym_wall ? SIGALRM : SIGPROF, &sa, NULL); } else if (_stackprof.mode == sym_custom) { /* sampled manually */ } else { rb_raise(rb_eArgError, "unknown profiler mode"); } return Qtrue; } #if SIZEOF_VOIDP == SIZEOF_LONG # define PTR2NUM(x) (LONG2NUM((long)(x))) #else # define PTR2NUM(x) (LL2NUM((LONG_LONG)(x))) #endif static int frame_edges_i(st_data_t key, st_data_t val, st_data_t arg) { VALUE edges = (VALUE)arg; intptr_t weight = (intptr_t)val; rb_hash_aset(edges, PTR2NUM(key), INT2FIX(weight)); return ST_CONTINUE; } static int frame_lines_i(st_data_t key, st_data_t val, st_data_t arg) { VALUE lines = (VALUE)arg; size_t weight = (size_t)val; size_t total = weight & (~(size_t)0 << (8*SIZEOF_SIZE_T/2)); weight -= total; total = total >> (8*SIZEOF_SIZE_T/2); rb_hash_aset(lines, INT2FIX(key), rb_ary_new3(2, ULONG2NUM(total), ULONG2NUM(weight))); return ST_CONTINUE; } static int frame_i(st_data_t key, st_data_t val, st_data_t arg) { VALUE frame = (VALUE)key; frame_data_t *frame_data = (frame_data_t *)val; VALUE results = (VALUE)arg; VALUE details = rb_hash_new(); VALUE name, file, edges, lines; VALUE line; rb_hash_aset(results, PTR2NUM(frame), details); if (FIXNUM_P(frame)) { name = _stackprof.fake_frame_names[FIX2INT(frame)]; file = _stackprof.empty_string; line = INT2FIX(0); } else { name = rb_profile_frame_full_label(frame); file = rb_profile_frame_absolute_path(frame); if (NIL_P(file)) file = rb_profile_frame_path(frame); line = rb_profile_frame_first_lineno(frame); } rb_hash_aset(details, sym_name, name); rb_hash_aset(details, sym_file, file); if (line != INT2FIX(0)) { rb_hash_aset(details, sym_line, line); } rb_hash_aset(details, sym_total_samples, SIZET2NUM(frame_data->total_samples)); rb_hash_aset(details, sym_samples, SIZET2NUM(frame_data->caller_samples)); if (frame_data->edges) { edges = rb_hash_new(); rb_hash_aset(details, sym_edges, edges); st_foreach(frame_data->edges, frame_edges_i, (st_data_t)edges); st_free_table(frame_data->edges); frame_data->edges = NULL; } if (frame_data->lines) { lines = rb_hash_new(); rb_hash_aset(details, sym_lines, lines); st_foreach(frame_data->lines, frame_lines_i, (st_data_t)lines); st_free_table(frame_data->lines); frame_data->lines = NULL; } xfree(frame_data); return ST_DELETE; } static VALUE stackprof_results(int argc, VALUE *argv, VALUE self) { VALUE results, frames; if (!_stackprof.frames || _stackprof.running) return Qnil; results = rb_hash_new(); rb_hash_aset(results, sym_version, DBL2NUM(1.2)); rb_hash_aset(results, sym_mode, _stackprof.mode); rb_hash_aset(results, sym_interval, _stackprof.interval); rb_hash_aset(results, sym_samples, SIZET2NUM(_stackprof.overall_samples)); rb_hash_aset(results, sym_gc_samples, SIZET2NUM(_stackprof.during_gc)); rb_hash_aset(results, sym_missed_samples, SIZET2NUM(_stackprof.overall_signals - _stackprof.overall_samples)); rb_hash_aset(results, sym_metadata, _stackprof.metadata); _stackprof.metadata = Qnil; frames = rb_hash_new(); rb_hash_aset(results, sym_frames, frames); st_foreach(_stackprof.frames, frame_i, (st_data_t)frames); st_free_table(_stackprof.frames); _stackprof.frames = NULL; if (_stackprof.raw && _stackprof.raw_samples_len) { size_t len, n, o; VALUE raw_sample_timestamps, raw_timestamp_deltas; VALUE raw_samples = rb_ary_new_capa(_stackprof.raw_samples_len); VALUE raw_lines = rb_ary_new_capa(_stackprof.raw_samples_len); for (n = 0; n < _stackprof.raw_samples_len; n++) { len = (size_t)_stackprof.raw_samples[n]; rb_ary_push(raw_samples, SIZET2NUM(len)); rb_ary_push(raw_lines, SIZET2NUM(len)); for (o = 0, n++; o < len; n++, o++) { // Line is in the upper 16 bits rb_ary_push(raw_lines, INT2NUM(_stackprof.raw_samples[n] >> 48)); VALUE frame = _stackprof.raw_samples[n] & ~((uint64_t)0xFFFF << 48); rb_ary_push(raw_samples, PTR2NUM(frame)); } rb_ary_push(raw_samples, SIZET2NUM((size_t)_stackprof.raw_samples[n])); rb_ary_push(raw_lines, SIZET2NUM((size_t)_stackprof.raw_samples[n])); } free(_stackprof.raw_samples); _stackprof.raw_samples = NULL; _stackprof.raw_samples_len = 0; _stackprof.raw_samples_capa = 0; _stackprof.raw_sample_index = 0; rb_hash_aset(results, sym_raw, raw_samples); rb_hash_aset(results, sym_raw_lines, raw_lines); raw_sample_timestamps = rb_ary_new_capa(_stackprof.raw_sample_times_len); raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_sample_times_len); for (n = 0; n < _stackprof.raw_sample_times_len; n++) { rb_ary_push(raw_sample_timestamps, ULL2NUM(_stackprof.raw_sample_times[n].timestamp_usec)); rb_ary_push(raw_timestamp_deltas, LL2NUM(_stackprof.raw_sample_times[n].delta_usec)); } free(_stackprof.raw_sample_times); _stackprof.raw_sample_times = NULL; _stackprof.raw_sample_times_len = 0; _stackprof.raw_sample_times_capa = 0; rb_hash_aset(results, sym_raw_sample_timestamps, raw_sample_timestamps); rb_hash_aset(results, sym_raw_timestamp_deltas, raw_timestamp_deltas); _stackprof.raw = 0; } if (argc == 1) _stackprof.out = argv[0]; if (RTEST(_stackprof.out)) { VALUE file; if (rb_respond_to(_stackprof.out, rb_intern("to_io"))) { file = rb_io_check_io(_stackprof.out); } else { file = rb_file_open_str(_stackprof.out, "w"); } rb_marshal_dump(results, file); rb_io_flush(file); _stackprof.out = Qnil; return file; } else { return results; } } static VALUE stackprof_run(int argc, VALUE *argv, VALUE self) { rb_need_block(); stackprof_start(argc, argv, self); rb_ensure(rb_yield, Qundef, stackprof_stop, self); return stackprof_results(0, 0, self); } static VALUE stackprof_running_p(VALUE self) { return _stackprof.running ? Qtrue : Qfalse; } static inline frame_data_t * sample_for(VALUE frame) { st_data_t key = (st_data_t)frame, val = 0; frame_data_t *frame_data; if (st_lookup(_stackprof.frames, key, &val)) { frame_data = (frame_data_t *)val; } else { frame_data = ALLOC_N(frame_data_t, 1); MEMZERO(frame_data, frame_data_t, 1); val = (st_data_t)frame_data; st_insert(_stackprof.frames, key, val); } return frame_data; } static int numtable_increment_callback(st_data_t *key, st_data_t *value, st_data_t arg, int existing) { size_t *weight = (size_t *)value; size_t increment = (size_t)arg; if (existing) (*weight) += increment; else *weight = increment; return ST_CONTINUE; } void st_numtable_increment(st_table *table, st_data_t key, size_t increment) { st_update(table, key, numtable_increment_callback, (st_data_t)increment); } void stackprof_record_sample_for_stack(int num, uint64_t sample_timestamp, int64_t timestamp_delta) { int i, n; VALUE prev_frame = Qnil; _stackprof.overall_samples++; if (_stackprof.raw && num > 0) { int found = 0; /* If there's no sample buffer allocated, then allocate one. The buffer * format is the number of frames (num), then the list of frames (from * `_stackprof.raw_samples`), followed by the number of times this * particular stack has been seen in a row. Each "new" stack is added * to the end of the buffer, but if the previous stack is the same as * the current stack, the counter will be incremented. */ if (!_stackprof.raw_samples) { _stackprof.raw_samples_capa = num * 100; _stackprof.raw_samples = malloc(sizeof(VALUE) * _stackprof.raw_samples_capa); } /* If we can't fit all the samples in the buffer, double the buffer size. */ while (_stackprof.raw_samples_capa <= _stackprof.raw_samples_len + (num + 2)) { _stackprof.raw_samples_capa *= 2; _stackprof.raw_samples = realloc(_stackprof.raw_samples, sizeof(VALUE) * _stackprof.raw_samples_capa); } /* If we've seen this stack before in the last sample, then increment the "seen" count. */ if (_stackprof.raw_samples_len > 0 && _stackprof.raw_samples[_stackprof.raw_sample_index] == (VALUE)num) { /* The number of samples could have been the same, but the stack * might be different, so we need to check the stack here. Stacks * in the raw buffer are stored in the opposite direction of stacks * in the frames buffer that came from Ruby. */ for (i = num-1, n = 0; i >= 0; i--, n++) { VALUE frame = _stackprof.frames_buffer[i]; int line = _stackprof.lines_buffer[i]; // Encode the line in to the upper 16 bits. uint64_t key = ((uint64_t)line << 48) | (uint64_t)frame; if (_stackprof.raw_samples[_stackprof.raw_sample_index + 1 + n] != key) break; } if (i == -1) { _stackprof.raw_samples[_stackprof.raw_samples_len-1] += 1; found = 1; } } /* If we haven't seen the stack, then add it to the buffer along with * the length of the stack and a 1 for the "seen" count */ if (!found) { /* Bump the `raw_sample_index` up so that the next iteration can * find the previously recorded stack size. */ _stackprof.raw_sample_index = _stackprof.raw_samples_len; _stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)num; for (i = num-1; i >= 0; i--) { VALUE frame = _stackprof.frames_buffer[i]; int line = _stackprof.lines_buffer[i]; // Encode the line in to the upper 16 bits. uint64_t key = ((uint64_t)line << 48) | (uint64_t)frame; _stackprof.raw_samples[_stackprof.raw_samples_len++] = key; } _stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)1; } /* If there's no timestamp delta buffer, allocate one */ if (!_stackprof.raw_sample_times) { _stackprof.raw_sample_times_capa = 100; _stackprof.raw_sample_times = malloc(sizeof(sample_time_t) * _stackprof.raw_sample_times_capa); _stackprof.raw_sample_times_len = 0; } /* Double the buffer size if it's too small */ while (_stackprof.raw_sample_times_capa <= _stackprof.raw_sample_times_len + 1) { _stackprof.raw_sample_times_capa *= 2; _stackprof.raw_sample_times = realloc(_stackprof.raw_sample_times, sizeof(sample_time_t) * _stackprof.raw_sample_times_capa); } /* Store the time delta (which is the amount of microseconds between samples). */ _stackprof.raw_sample_times[_stackprof.raw_sample_times_len++] = (sample_time_t) { .timestamp_usec = sample_timestamp, .delta_usec = timestamp_delta, }; } for (i = 0; i < num; i++) { int line = _stackprof.lines_buffer[i]; VALUE frame = _stackprof.frames_buffer[i]; frame_data_t *frame_data = sample_for(frame); if (frame_data->seen_at_sample_number != _stackprof.overall_samples) { frame_data->total_samples++; } frame_data->seen_at_sample_number = _stackprof.overall_samples; if (i == 0) { frame_data->caller_samples++; } else if (_stackprof.aggregate) { if (!frame_data->edges) frame_data->edges = st_init_numtable(); st_numtable_increment(frame_data->edges, (st_data_t)prev_frame, 1); } if (_stackprof.aggregate && line > 0) { size_t half = (size_t)1<<(8*SIZEOF_SIZE_T/2); size_t increment = i == 0 ? half + 1 : half; if (!frame_data->lines) frame_data->lines = st_init_numtable(); st_numtable_increment(frame_data->lines, (st_data_t)line, increment); } prev_frame = frame; } if (_stackprof.raw) { capture_timestamp(&_stackprof.last_sample_at); } } // buffer the current profile frames // This must be async-signal-safe // Returns immediately if another set of frames are already in the buffer void stackprof_buffer_sample(void) { uint64_t start_timestamp = 0; int64_t timestamp_delta = 0; int num; if (_stackprof.buffer_count > 0) { // Another sample is already pending return; } if (_stackprof.raw) { struct timestamp_t t; capture_timestamp(&t); start_timestamp = timestamp_usec(&t); timestamp_delta = delta_usec(&_stackprof.last_sample_at, &t); } num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer) / sizeof(VALUE), _stackprof.frames_buffer, _stackprof.lines_buffer); _stackprof.buffer_count = num; _stackprof.buffer_time.timestamp_usec = start_timestamp; _stackprof.buffer_time.delta_usec = timestamp_delta; } // Postponed job void stackprof_record_gc_samples(void) { int64_t delta_to_first_unrecorded_gc_sample = 0; uint64_t start_timestamp = 0; size_t i; if (_stackprof.raw) { struct timestamp_t t = _stackprof.gc_start_timestamp; start_timestamp = timestamp_usec(&t); // We don't know when the GC samples were actually marked, so let's // assume that they were marked at a perfectly regular interval. delta_to_first_unrecorded_gc_sample = delta_usec(&_stackprof.last_sample_at, &t) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval); if (delta_to_first_unrecorded_gc_sample < 0) { delta_to_first_unrecorded_gc_sample = 0; } } for (i = 0; i < _stackprof.unrecorded_gc_samples; i++) { int64_t timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval); if (_stackprof.unrecorded_gc_marking_samples) { _stackprof.frames_buffer[0] = FAKE_FRAME_MARK; _stackprof.lines_buffer[0] = 0; _stackprof.frames_buffer[1] = FAKE_FRAME_GC; _stackprof.lines_buffer[1] = 0; _stackprof.unrecorded_gc_marking_samples--; stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta); } else if (_stackprof.unrecorded_gc_sweeping_samples) { _stackprof.frames_buffer[0] = FAKE_FRAME_SWEEP; _stackprof.lines_buffer[0] = 0; _stackprof.frames_buffer[1] = FAKE_FRAME_GC; _stackprof.lines_buffer[1] = 0; _stackprof.unrecorded_gc_sweeping_samples--; stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta); } else { _stackprof.frames_buffer[0] = FAKE_FRAME_GC; _stackprof.lines_buffer[0] = 0; stackprof_record_sample_for_stack(1, start_timestamp, timestamp_delta); } } _stackprof.during_gc += _stackprof.unrecorded_gc_samples; _stackprof.unrecorded_gc_samples = 0; _stackprof.unrecorded_gc_marking_samples = 0; _stackprof.unrecorded_gc_sweeping_samples = 0; } // record the sample previously buffered by stackprof_buffer_sample static void stackprof_record_buffer(void) { stackprof_record_sample_for_stack(_stackprof.buffer_count, _stackprof.buffer_time.timestamp_usec, _stackprof.buffer_time.delta_usec); // reset the buffer _stackprof.buffer_count = 0; } static void stackprof_sample_and_record(void) { stackprof_buffer_sample(); stackprof_record_buffer(); } static void stackprof_job_record_gc(void *data) { if (!_stackprof.running) return; stackprof_record_gc_samples(); } static void stackprof_job_sample_and_record(void *data) { if (!_stackprof.running) return; stackprof_sample_and_record(); } static void stackprof_job_record_buffer(void *data) { if (!_stackprof.running) return; stackprof_record_buffer(); } static void stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext) { static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; _stackprof.overall_signals++; if (!_stackprof.running) return; // There's a possibility that the signal handler is invoked *after* the Ruby // VM has been shut down (e.g. after ruby_cleanup(0)). In this case, things // that rely on global VM state (e.g. rb_during_gc) will segfault. if (!ruby_vm_running) return; if (_stackprof.mode == sym_wall) { // In "wall" mode, the SIGALRM signal will arrive at an arbitrary thread. // In order to provide more useful results, especially under threaded web // servers, we want to forward this signal to the original thread // StackProf was started from. // According to POSIX.1-2008 TC1 pthread_kill and pthread_self should be // async-signal-safe. if (pthread_self() != _stackprof.target_thread) { pthread_kill(_stackprof.target_thread, sig); return; } } else { if (!ruby_native_thread_p()) return; } if (pthread_mutex_trylock(&lock)) return; if (!_stackprof.ignore_gc && rb_during_gc()) { VALUE mode = rb_gc_latest_gc_info(sym_state); if (mode == sym_marking) { _stackprof.unrecorded_gc_marking_samples++; } else if (mode == sym_sweeping) { _stackprof.unrecorded_gc_sweeping_samples++; } if(!_stackprof.unrecorded_gc_samples) { // record start capture_timestamp(&_stackprof.gc_start_timestamp); } _stackprof.unrecorded_gc_samples++; rb_postponed_job_register_one(0, stackprof_job_record_gc, (void*)0); } else { if (stackprof_use_postponed_job) { rb_postponed_job_register_one(0, stackprof_job_sample_and_record, (void*)0); } else { // Buffer a sample immediately, if an existing sample exists this will // return immediately stackprof_buffer_sample(); // Enqueue a job to record the sample rb_postponed_job_register_one(0, stackprof_job_record_buffer, (void*)0); } } pthread_mutex_unlock(&lock); } static void stackprof_newobj_handler(VALUE tpval, void *data) { _stackprof.overall_signals++; if (RTEST(_stackprof.interval) && _stackprof.overall_signals % NUM2LONG(_stackprof.interval)) return; stackprof_sample_and_record(); } static VALUE stackprof_sample(VALUE self) { if (!_stackprof.running) return Qfalse; _stackprof.overall_signals++; stackprof_sample_and_record(); return Qtrue; } static int frame_mark_i(st_data_t key, st_data_t val, st_data_t arg) { VALUE frame = (VALUE)key; rb_gc_mark(frame); return ST_CONTINUE; } static void stackprof_gc_mark(void *data) { if (RTEST(_stackprof.metadata)) rb_gc_mark(_stackprof.metadata); if (RTEST(_stackprof.out)) rb_gc_mark(_stackprof.out); if (_stackprof.frames) st_foreach(_stackprof.frames, frame_mark_i, 0); int i; for (i = 0; i < _stackprof.buffer_count; i++) { rb_gc_mark(_stackprof.frames_buffer[i]); } } static size_t stackprof_memsize(const void *data) { return sizeof(_stackprof); } static void stackprof_atfork_prepare(void) { struct itimerval timer; if (_stackprof.running) { if (_stackprof.mode == sym_wall || _stackprof.mode == sym_cpu) { memset(&timer, 0, sizeof(timer)); setitimer(_stackprof.mode == sym_wall ? ITIMER_REAL : ITIMER_PROF, &timer, 0); } } } static void stackprof_atfork_parent(void) { struct itimerval timer; if (_stackprof.running) { if (_stackprof.mode == sym_wall || _stackprof.mode == sym_cpu) { timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = NUM2LONG(_stackprof.interval); timer.it_value = timer.it_interval; setitimer(_stackprof.mode == sym_wall ? ITIMER_REAL : ITIMER_PROF, &timer, 0); } } } static void stackprof_atfork_child(void) { stackprof_stop(rb_mStackProf); } static VALUE stackprof_use_postponed_job_l(VALUE self) { stackprof_use_postponed_job = 1; return Qnil; } static void stackprof_at_exit(ruby_vm_t* vm) { ruby_vm_running = 0; } static const rb_data_type_t stackprof_type = { "StackProf", { stackprof_gc_mark, NULL, stackprof_memsize, } }; void Init_stackprof(void) { size_t i; /* * As of Ruby 3.0, it should be safe to read stack frames at any time, unless YJIT is enabled * See https://github.com/ruby/ruby/commit/0e276dc458f94d9d79a0f7c7669bde84abe80f21 */ stackprof_use_postponed_job = RUBY_API_VERSION_MAJOR < 3; ruby_vm_running = 1; ruby_vm_at_exit(stackprof_at_exit); #define S(name) sym_##name = ID2SYM(rb_intern(#name)); S(object); S(custom); S(wall); S(cpu); S(name); S(file); S(line); S(total_samples); S(gc_samples); S(missed_samples); S(samples); S(edges); S(lines); S(version); S(mode); S(interval); S(raw); S(raw_lines); S(raw_sample_timestamps); S(raw_timestamp_deltas); S(out); S(metadata); S(ignore_gc); S(frames); S(aggregate); S(state); S(marking); S(sweeping); #undef S /* Need to run this to warm the symbol table before we call this during GC */ rb_gc_latest_gc_info(sym_state); rb_global_variable(&gc_hook); gc_hook = TypedData_Wrap_Struct(rb_cObject, &stackprof_type, &_stackprof); _stackprof.raw_samples = NULL; _stackprof.raw_samples_len = 0; _stackprof.raw_samples_capa = 0; _stackprof.raw_sample_index = 0; _stackprof.raw_sample_times = NULL; _stackprof.raw_sample_times_len = 0; _stackprof.raw_sample_times_capa = 0; _stackprof.empty_string = rb_str_new_cstr(""); rb_global_variable(&_stackprof.empty_string); for (i = 0; i < TOTAL_FAKE_FRAMES; i++) { _stackprof.fake_frame_names[i] = rb_str_new_cstr(fake_frame_cstrs[i]); rb_global_variable(&_stackprof.fake_frame_names[i]); } rb_mStackProf = rb_define_module("StackProf"); rb_define_singleton_method(rb_mStackProf, "running?", stackprof_running_p, 0); rb_define_singleton_method(rb_mStackProf, "run", stackprof_run, -1); rb_define_singleton_method(rb_mStackProf, "start", stackprof_start, -1); rb_define_singleton_method(rb_mStackProf, "stop", stackprof_stop, 0); rb_define_singleton_method(rb_mStackProf, "results", stackprof_results, -1); rb_define_singleton_method(rb_mStackProf, "sample", stackprof_sample, 0); rb_define_singleton_method(rb_mStackProf, "use_postponed_job!", stackprof_use_postponed_job_l, 0); pthread_atfork(stackprof_atfork_prepare, stackprof_atfork_parent, stackprof_atfork_child); } stackprof-0.2.26/ext/stackprof/extconf.rb0000644000004100000410000000060114605430231020407 0ustar www-datawww-datarequire 'mkmf' if RUBY_ENGINE == 'truffleruby' File.write('Makefile', dummy_makefile($srcdir).join("")) return end if have_func('rb_postponed_job_register_one') && have_func('rb_profile_frames') && have_func('rb_tracepoint_new') && have_const('RUBY_INTERNAL_EVENT_NEWOBJ') create_makefile('stackprof/stackprof') else fail 'missing API: are you using ruby 2.1+?' end stackprof-0.2.26/README.md0000644000004100000410000003021514605430231015103 0ustar www-datawww-data# Stackprof A sampling call-stack profiler for Ruby. Inspired heavily by [gperftools](https://code.google.com/p/gperftools/), and written as a replacement for [perftools.rb](https://github.com/tmm1/perftools.rb). ## Requirements * Ruby 2.2+ * Linux-based OS ## Getting Started ### Install In your Gemfile add: ```ruby gem 'stackprof' ``` Then run `$ bundle install`. Alternatively you can run `$ gem install stackprof`. ### Run in ruby: ``` ruby StackProf.run(mode: :cpu, out: 'tmp/stackprof-cpu-myapp.dump') do #... end ``` via rack: ``` ruby use StackProf::Middleware, enabled: true, mode: :cpu, interval: 1000, save_every: 5 ``` reporting: ``` $ stackprof tmp/stackprof-cpu-*.dump --text --limit 1 ================================== Mode: cpu(1000) Samples: 60395 (1.09% miss rate) GC: 2851 (4.72%) ================================== TOTAL (pct) SAMPLES (pct) FRAME 1660 (2.7%) 1595 (2.6%) String#blank? $ stackprof tmp/stackprof-cpu-*.dump --method 'String#blank?' String#blank? (gems/activesupport-2.3.14.github30/lib/active_support/core_ext/object/blank.rb:80) samples: 1595 self (2.6%) / 1660 total (2.7%) callers: 373 ( 41.0%) ApplicationHelper#current_user 192 ( 21.1%) ApplicationHelper#current_repository callers: 803 ( 48.4%) Object#present? code: | 80 | def blank? 1225 (2.0%) / 1225 (2.0%) | 81 | self !~ /[^[:space:]]/ | 82 | end $ stackprof tmp/stackprof-cpu-*.dump --method 'Object#present?' Object#present? (gems/activesupport-2.3.14.github30/lib/active_support/core_ext/object/blank.rb:20) samples: 59 self (0.1%) / 910 total (1.5%) callees (851 total): 803 ( 94.4%) String#blank? 32 ( 3.8%) Object#blank? 16 ( 1.9%) NilClass#blank? code: | 20 | def present? 910 (1.5%) / 59 (0.1%) | 21 | !blank? | 22 | end ``` For an experimental version of WebUI reporting of stackprof, see [stackprof-webnav](https://github.com/alisnic/stackprof-webnav) To generate flamegraphs with Stackprof, additional data must be collected using the `raw: true` flag. Once you've collected results with this flag enabled, generate a flamegraph with: ``` $ stackprof --flamegraph tmp/stackprof-cpu-myapp.dump > tmp/flamegraph ``` After the flamegraph has been generated, you can generate a viewer command with: ``` $ stackprof --flamegraph-viewer=tmp/flamegraph ``` The `--flamegraph-viewer` command will output the exact shell command you need to run in order to open the `tmp/flamegraph` you generated with the built-in stackprof flamegraph viewer: ![Flamegraph Viewer](http://i.imgur.com/EwndrgD.png) Alternatively, you can generate a flamegraph that uses [d3-flame-graph](https://github.com/spiermar/d3-flame-graph): ``` $ stackprof --d3-flamegraph tmp/stackprof-cpu-myapp.dump > flamegraph.html ``` And just open the result by your browser. ## Sampling Four sampling modes are supported: - `:wall` (using `ITIMER_REAL` and `SIGALRM`) [default mode] - `:cpu` (using `ITIMER_PROF` and `SIGPROF`) - `:object` (using `RUBY_INTERNAL_EVENT_NEWOBJ`) - `:custom` (user-defined via `StackProf.sample`) Samplers have a tuneable interval which can be used to reduce overhead or increase granularity: - Wall time: sample every _interval_ microseconds of wallclock time (default: 1000) ```ruby StackProf.run(mode: :wall, out: 'tmp/stackprof.dump', interval: 1000) do #... end ``` - CPU time: sample every _interval_ microseconds of CPU activity (default: 1000 = 1 millisecond) ```ruby StackProf.run(mode: :cpu, out: 'tmp/stackprof.dump', interval: 1000) do #... end ``` - Object allocation: sample every _interval_ allocations (default: 1) ```ruby StackProf.run(mode: :object, out: 'tmp/stackprof.dump', interval: 1) do #... end ``` By default, samples taken during garbage collection will show as garbage collection frames including both mark and sweep phases. For longer traces, these can leave gaps in a flamegraph that are hard to follow. They can be disabled by setting the `ignore_gc` option to true. Garbage collection time will still be present in the profile but not explicitly marked with its own frame. Samples are taken using a combination of three new C-APIs in ruby 2.1: - Signal handlers enqueue a sampling job using `rb_postponed_job_register_one`. this ensures callstack samples can be taken safely, in case the VM is garbage collecting or in some other inconsistent state during the interruption. - Stack frames are collected via `rb_profile_frames`, which provides low-overhead C-API access to the VM's call stack. No object allocations occur in this path, allowing stackprof to collect callstacks in allocation mode. - In allocation mode, samples are taken via `rb_tracepoint_new(RUBY_INTERNAL_EVENT_NEWOBJ)`, which provides a notification every time the VM allocates a new object. ## Aggregation Each sample consists of N stack frames, where a frame looks something like `MyClass#method` or `block in MySingleton.method`. For each of these frames in the sample, the profiler collects a few pieces of metadata: - `samples`: Number of samples where this was the topmost frame - `total_samples`: Samples where this frame was in the stack - `lines`: Samples per line number in this frame - `edges`: Samples per callee frame (methods invoked by this frame) The aggregation algorithm is roughly equivalent to the following pseudo code: ``` ruby trap('PROF') do top, *rest = caller top.samples += 1 top.lines[top.lineno] += 1 top.total_samples += 1 prev = top rest.each do |frame| frame.edges[prev] += 1 frame.total_samples += 1 prev = frame end end ``` This technique builds up an incremental call graph from the samples. On any given frame, the sum of the outbound edge weights is equal to total samples collected on that frame (`frame.total_samples == frame.edges.values.sum`). ## Reporting Multiple reporting modes are supported: - Text - Dotgraph - Source annotation ### `StackProf::Report.new(data).print_text` ``` TOTAL (pct) SAMPLES (pct) FRAME 91 (48.4%) 91 (48.4%) A#pow 58 (30.9%) 58 (30.9%) A.newobj 34 (18.1%) 34 (18.1%) block in A#math 188 (100.0%) 3 (1.6%) block (2 levels) in
185 (98.4%) 1 (0.5%) A#initialize 35 (18.6%) 1 (0.5%) A#math 188 (100.0%) 0 (0.0%)
188 (100.0%) 0 (0.0%) block in
188 (100.0%) 0 (0.0%)
``` ### `StackProf::Report.new(data).print_graphviz` ``` digraph profile { 70346498324780 [size=23.5531914893617] [fontsize=23.5531914893617] [shape=box] [label="A#pow\n91 (48.4%)\r"]; 70346498324680 [size=18.638297872340424] [fontsize=18.638297872340424] [shape=box] [label="A.newobj\n58 (30.9%)\r"]; 70346498324480 [size=15.063829787234042] [fontsize=15.063829787234042] [shape=box] [label="block in A#math\n34 (18.1%)\r"]; 70346498324220 [size=10.446808510638299] [fontsize=10.446808510638299] [shape=box] [label="block (2 levels) in
\n3 (1.6%)\rof 188 (100.0%)\r"]; 70346498324220 -> 70346498324900 [label="185"]; 70346498324900 [size=10.148936170212766] [fontsize=10.148936170212766] [shape=box] [label="A#initialize\n1 (0.5%)\rof 185 (98.4%)\r"]; 70346498324900 -> 70346498324780 [label="91"]; 70346498324900 -> 70346498324680 [label="58"]; 70346498324900 -> 70346498324580 [label="35"]; 70346498324580 [size=10.148936170212766] [fontsize=10.148936170212766] [shape=box] [label="A#math\n1 (0.5%)\rof 35 (18.6%)\r"]; 70346498324580 -> 70346498324480 [label="34"]; 70346497983360 [size=10.0] [fontsize=10.0] [shape=box] [label="
\n0 (0.0%)\rof 188 (100.0%)\r"]; 70346497983360 -> 70346498325080 [label="188"]; 70346498324300 [size=10.0] [fontsize=10.0] [shape=box] [label="block in
\n0 (0.0%)\rof 188 (100.0%)\r"]; 70346498324300 -> 70346498324220 [label="188"]; 70346498325080 [size=10.0] [fontsize=10.0] [shape=box] [label="
\n0 (0.0%)\rof 188 (100.0%)\r"]; 70346498325080 -> 70346498324300 [label="188"]; } ``` ### `StackProf::Report.new(data).print_method(/pow|newobj|math/)` ``` A#pow (/Users/tmm1/code/stackprof/sample.rb:11) | 11 | def pow 91 (48.4% / 100.0%) | 12 | 2 ** 100 | 13 | end A.newobj (/Users/tmm1/code/stackprof/sample.rb:15) | 15 | def self.newobj 33 (17.6% / 56.9%) | 16 | Object.new 25 (13.3% / 43.1%) | 17 | Object.new | 18 | end A#math (/Users/tmm1/code/stackprof/sample.rb:20) | 20 | def math 1 (0.5% / 100.0%) | 21 | 2.times do | 22 | 2 + 3 * 4 ^ 5 / 6 block in A#math (/Users/tmm1/code/stackprof/sample.rb:21) | 21 | 2.times do 34 (18.1% / 100.0%) | 22 | 2 + 3 * 4 ^ 5 / 6 | 23 | end ``` ## Usage The profiler is compiled as a C-extension and exposes a simple api: `StackProf.run(mode: [:cpu|:wall|:object])`. The `run` method takes a block of code and returns a profile as a simple hash. ``` ruby # sample after every 1ms of cpu activity profile = StackProf.run(mode: :cpu, interval: 1000) do MyCode.execute end ``` This profile data structure is part of the public API, and is intended to be saved (as json/marshal for example) for later processing. The reports above can be generated by passing this structure into `StackProf::Report.new`. The format itself is very simple. It contains a header and a list of frames. Each frame has a unique ID and identifying information such as its name, file, and line. The frame also contains sampling data, including per-line samples, and a list of relationships to other frames represented as weighted edges. ``` ruby {:version=>1.0, :mode=>:cpu, :inteval=>1000, :samples=>188, :missed_samples=>0, :frames=> {70346498324780=> {:name=>"A#pow", :file=>"/Users/tmm1/code/stackprof/sample.rb", :line=>11, :total_samples=>91, :samples=>91, :lines=>{12=>91}}, 70346498324900=> {:name=>"A#initialize", :file=>"/Users/tmm1/code/stackprof/sample.rb", :line=>5, :total_samples=>185, :samples=>1, :edges=>{70346498324780=>91, 70346498324680=>58, 70346498324580=>35}, :lines=>{8=>1}}, ``` Above, `A#pow` was involved in 91 samples, and in all cases it was at the top of the stack on line 12. `A#initialize` was in 185 samples, but it was at the top of the stack in only 1 sample. The rest of the samples are divided up between its callee edges. All 91 calls to `A#pow` came from `A#initialize`, as seen by the edge numbered `70346498324780`. ## Advanced usage The profiler can be started and stopped manually. Results are accumulated until retrieval, across multiple `start`/`stop` invocations. ``` ruby StackProf.running? # => false StackProf.start(mode: :cpu) StackProf.running? # => true StackProf.stop StackProf.results('/tmp/some.file') ``` ## All options `StackProf.run` accepts an options hash. Currently, the following options are recognized: Option | Meaning ------- | --------- `mode` | Mode of sampling: `:cpu`, `:wall`, `:object`, or `:custom` [c.f.](#sampling) `out` | The target file, which will be overwritten `interval` | Mode-relative sample rate [c.f.](#sampling) `ignore_gc` | Ignore garbage collection frames `aggregate` | Defaults: `true` - if `false` disables [aggregation](#aggregation) `raw` | Defaults `false` - if `true` collects the extra data required by the `--flamegraph` and `--stackcollapse` report types `metadata` | Defaults to `{}`. Must be a `Hash`. metadata associated with this profile `save_every`| (Rack middleware only) write the target file after this many requests ## Todo * file/iseq blacklist * restore signal handlers on stop stackprof-0.2.26/CHANGELOG.md0000644000004100000410000000066314605430231015441 0ustar www-datawww-data# 0.2.25 * Fix GC marking # 0.2.16 * [flamegraph.pl] Update to latest version * Add option to ignore GC frames * Handle source code not being available * Freeze strings in report.rb * Use a cursor object instead of array slicing * ArgumentError on interval <1 or >1m * fix variable name. * Fix default mode comment in readme # 0.2.15 * Mark the metadata object before the GC is invoked to prevent it from being garbage collected. stackprof-0.2.26/sample.rb0000644000004100000410000000122614605430231015432 0ustar www-datawww-data$:.unshift File.expand_path('../lib', __FILE__) require 'stackprof' class A def initialize pow self.class.newobj math end def pow 2 ** 100 end def self.newobj Object.new Object.new end def math 2.times do 2 + 3 * 4 ^ 5 / 6 end end end #profile = StackProf.run(mode: :object, interval: 1) do #profile = StackProf.run(mode: :wall, interval: 1000) do profile = StackProf.run(mode: :cpu, interval: 1000) do 1_000_000.times do A.new end end result = StackProf::Report.new(profile) puts result.print_method(/pow|newobj|math/) puts result.print_text puts result.print_graphviz puts result.print_debug stackprof-0.2.26/vendor/0000755000004100000410000000000014605430231015120 5ustar www-datawww-datastackprof-0.2.26/vendor/FlameGraph/0000755000004100000410000000000014605430231017126 5ustar www-datawww-datastackprof-0.2.26/vendor/FlameGraph/flamegraph.pl0000755000004100000410000010411614605430231021577 0ustar www-datawww-data#!/usr/bin/perl -w # # flamegraph.pl flame stack grapher. # # This takes stack samples and renders a call graph, allowing hot functions # and codepaths to be quickly identified. Stack samples can be generated using # tools such as DTrace, perf, SystemTap, and Instruments. # # USAGE: ./flamegraph.pl [options] input.txt > graph.svg # # grep funcA input.txt | ./flamegraph.pl [options] > graph.svg # # Then open the resulting .svg in a web browser, for interactivity: mouse-over # frames for info, click to zoom, and ctrl-F to search. # # Options are listed in the usage message (--help). # # The input is stack frames and sample counts formatted as single lines. Each # frame in the stack is semicolon separated, with a space and count at the end # of the line. These can be generated for Linux perf script output using # stackcollapse-perf.pl, for DTrace using stackcollapse.pl, and for other tools # using the other stackcollapse programs. Example input: # # swapper;start_kernel;rest_init;cpu_idle;default_idle;native_safe_halt 1 # # An optional extra column of counts can be provided to generate a differential # flame graph of the counts, colored red for more, and blue for less. This # can be useful when using flame graphs for non-regression testing. # See the header comment in the difffolded.pl program for instructions. # # The input functions can optionally have annotations at the end of each # function name, following a precedent by some tools (Linux perf's _[k]): # _[k] for kernel # _[i] for inlined # _[j] for jit # _[w] for waker # Some of the stackcollapse programs support adding these annotations, eg, # stackcollapse-perf.pl --kernel --jit. They are used merely for colors by # some palettes, eg, flamegraph.pl --color=java. # # The output flame graph shows relative presence of functions in stack samples. # The ordering on the x-axis has no meaning; since the data is samples, time # order of events is not known. The order used sorts function names # alphabetically. # # While intended to process stack samples, this can also process stack traces. # For example, tracing stacks for memory allocation, or resource usage. You # can use --title to set the title to reflect the content, and --countname # to change "samples" to "bytes" etc. # # There are a few different palettes, selectable using --color. By default, # the colors are selected at random (except for differentials). Functions # called "-" will be printed gray, which can be used for stack separators (eg, # between user and kernel stacks). # # HISTORY # # This was inspired by Neelakanth Nadgir's excellent function_call_graph.rb # program, which visualized function entry and return trace events. As Neel # wrote: "The output displayed is inspired by Roch's CallStackAnalyzer which # was in turn inspired by the work on vftrace by Jan Boerhout". See: # https://blogs.oracle.com/realneel/entry/visualizing_callstacks_via_dtrace_and # # Copyright 2016 Netflix, Inc. # Copyright 2011 Joyent, Inc. All rights reserved. # Copyright 2011 Brendan Gregg. All rights reserved. # # CDDL HEADER START # # The contents of this file are subject to the terms of the # Common Development and Distribution License (the "License"). # You may not use this file except in compliance with the License. # # You can obtain a copy of the license at docs/cddl1.txt or # http://opensource.org/licenses/CDDL-1.0. # See the License for the specific language governing permissions # and limitations under the License. # # When distributing Covered Code, include this CDDL HEADER in each # file and include the License file at docs/cddl1.txt. # If applicable, add the following below this CDDL HEADER, with the # fields enclosed by brackets "[]" replaced with your own identifying # information: Portions Copyright [yyyy] [name of copyright owner] # # CDDL HEADER END # # 11-Oct-2014 Adrien Mahieux Added zoom. # 21-Nov-2013 Shawn Sterling Added consistent palette file option # 17-Mar-2013 Tim Bunce Added options and more tunables. # 15-Dec-2011 Dave Pacheco Support for frames with whitespace. # 10-Sep-2011 Brendan Gregg Created this. use strict; use Getopt::Long; use open qw(:std :utf8); # tunables my $encoding; my $fonttype = "Verdana"; my $imagewidth = 1200; # max width, pixels my $frameheight = 16; # max height is dynamic my $fontsize = 12; # base text size my $fontwidth = 0.59; # avg width relative to fontsize my $minwidth = 0.1; # min function width, pixels my $nametype = "Function:"; # what are the names in the data? my $countname = "samples"; # what are the counts in the data? my $colors = "hot"; # color theme my $bgcolors = ""; # background color theme my $nameattrfile; # file holding function attributes my $timemax; # (override the) sum of the counts my $factor = 1; # factor to scale counts by my $hash = 0; # color by function name my $palette = 0; # if we use consistent palettes (default off) my %palette_map; # palette map hash my $pal_file = "palette.map"; # palette map file name my $stackreverse = 0; # reverse stack order, switching merge end my $inverted = 0; # icicle graph my $flamechart = 0; # produce a flame chart (sort by time, do not merge stacks) my $negate = 0; # switch differential hues my $titletext = ""; # centered heading my $titledefault = "Flame Graph"; # overwritten by --title my $titleinverted = "Icicle Graph"; # " " my $searchcolor = "rgb(230,0,230)"; # color for search highlighting my $notestext = ""; # embedded notes in SVG my $subtitletext = ""; # second level title (optional) my $help = 0; sub usage { die < outfile.svg\n --title TEXT # change title text --subtitle TEXT # second level title (optional) --width NUM # width of image (default 1200) --height NUM # height of each frame (default 16) --minwidth NUM # omit smaller functions (default 0.1 pixels) --fonttype FONT # font type (default "Verdana") --fontsize NUM # font size (default 12) --countname TEXT # count type label (default "samples") --nametype TEXT # name type label (default "Function:") --colors PALETTE # set color palette. choices are: hot (default), mem, # io, wakeup, chain, java, js, perl, red, green, blue, # aqua, yellow, purple, orange --bgcolors COLOR # set background colors. gradient choices are yellow # (default), blue, green, grey; flat colors use "#rrggbb" --hash # colors are keyed by function name hash --cp # use consistent palette (palette.map) --reverse # generate stack-reversed flame graph --inverted # icicle graph --flamechart # produce a flame chart (sort by time, do not merge stacks) --negate # switch differential hues (blue<->red) --notes TEXT # add notes comment in SVG (for debugging) --help # this message eg, $0 --title="Flame Graph: malloc()" trace.txt > graph.svg USAGE_END } GetOptions( 'fonttype=s' => \$fonttype, 'width=i' => \$imagewidth, 'height=i' => \$frameheight, 'encoding=s' => \$encoding, 'fontsize=f' => \$fontsize, 'fontwidth=f' => \$fontwidth, 'minwidth=f' => \$minwidth, 'title=s' => \$titletext, 'subtitle=s' => \$subtitletext, 'nametype=s' => \$nametype, 'countname=s' => \$countname, 'nameattr=s' => \$nameattrfile, 'total=s' => \$timemax, 'factor=f' => \$factor, 'colors=s' => \$colors, 'bgcolors=s' => \$bgcolors, 'hash' => \$hash, 'cp' => \$palette, 'reverse' => \$stackreverse, 'inverted' => \$inverted, 'flamechart' => \$flamechart, 'negate' => \$negate, 'notes=s' => \$notestext, 'help' => \$help, ) or usage(); $help && usage(); # internals my $ypad1 = $fontsize * 3; # pad top, include title my $ypad2 = $fontsize * 2 + 10; # pad bottom, include labels my $ypad3 = $fontsize * 2; # pad top, include subtitle (optional) my $xpad = 10; # pad lefm and right my $framepad = 1; # vertical padding for frames my $depthmax = 0; my %Events; my %nameattr; if ($flamechart && $titletext eq "") { $titletext = "Flame Chart"; } if ($titletext eq "") { unless ($inverted) { $titletext = $titledefault; } else { $titletext = $titleinverted; } } if ($nameattrfile) { # The name-attribute file format is a function name followed by a tab then # a sequence of tab separated name=value pairs. open my $attrfh, $nameattrfile or die "Can't read $nameattrfile: $!\n"; while (<$attrfh>) { chomp; my ($funcname, $attrstr) = split /\t/, $_, 2; die "Invalid format in $nameattrfile" unless defined $attrstr; $nameattr{$funcname} = { map { split /=/, $_, 2 } split /\t/, $attrstr }; } } if ($notestext =~ /[<>]/) { die "Notes string can't contain < or >" } # background colors: # - yellow gradient: default (hot, java, js, perl) # - green gradient: mem # - blue gradient: io, wakeup, chain # - gray gradient: flat colors (red, green, blue, ...) if ($bgcolors eq "") { # choose a default if ($colors eq "mem") { $bgcolors = "green"; } elsif ($colors =~ /^(io|wakeup|chain)$/) { $bgcolors = "blue"; } elsif ($colors =~ /^(red|green|blue|aqua|yellow|purple|orange)$/) { $bgcolors = "grey"; } else { $bgcolors = "yellow"; } } my ($bgcolor1, $bgcolor2); if ($bgcolors eq "yellow") { $bgcolor1 = "#eeeeee"; # background color gradient start $bgcolor2 = "#eeeeb0"; # background color gradient stop } elsif ($bgcolors eq "blue") { $bgcolor1 = "#eeeeee"; $bgcolor2 = "#e0e0ff"; } elsif ($bgcolors eq "green") { $bgcolor1 = "#eef2ee"; $bgcolor2 = "#e0ffe0"; } elsif ($bgcolors eq "grey") { $bgcolor1 = "#f8f8f8"; $bgcolor2 = "#e8e8e8"; } elsif ($bgcolors =~ /^#......$/) { $bgcolor1 = $bgcolor2 = $bgcolors; } else { die "Unrecognized bgcolor option \"$bgcolors\"" } # SVG functions { package SVG; sub new { my $class = shift; my $self = {}; bless ($self, $class); return $self; } sub header { my ($self, $w, $h) = @_; my $enc_attr = ''; if (defined $encoding) { $enc_attr = qq{ encoding="$encoding"}; } $self->{svg} .= < SVG } sub include { my ($self, $content) = @_; $self->{svg} .= $content; } sub colorAllocate { my ($self, $r, $g, $b) = @_; return "rgb($r,$g,$b)"; } sub group_start { my ($self, $attr) = @_; my @g_attr = map { exists $attr->{$_} ? sprintf(qq/$_="%s"/, $attr->{$_}) : () } qw(id class); push @g_attr, $attr->{g_extra} if $attr->{g_extra}; if ($attr->{href}) { my @a_attr; push @a_attr, sprintf qq/xlink:href="%s"/, $attr->{href} if $attr->{href}; # default target=_top else links will open within SVG push @a_attr, sprintf qq/target="%s"/, $attr->{target} || "_top"; push @a_attr, $attr->{a_extra} if $attr->{a_extra}; $self->{svg} .= sprintf qq/\n/, join(' ', (@a_attr, @g_attr)); } else { $self->{svg} .= sprintf qq/\n/, join(' ', @g_attr); } $self->{svg} .= sprintf qq/%s<\/title>/, $attr->{title} if $attr->{title}; # should be first element within g container } sub group_end { my ($self, $attr) = @_; $self->{svg} .= $attr->{href} ? qq/<\/a>\n/ : qq/<\/g>\n/; } sub filledRectangle { my ($self, $x1, $y1, $x2, $y2, $fill, $extra) = @_; $x1 = sprintf "%0.1f", $x1; $x2 = sprintf "%0.1f", $x2; my $w = sprintf "%0.1f", $x2 - $x1; my $h = sprintf "%0.1f", $y2 - $y1; $extra = defined $extra ? $extra : ""; $self->{svg} .= qq/\n/; } sub stringTTF { my ($self, $id, $x, $y, $str, $extra) = @_; $x = sprintf "%0.2f", $x; $id = defined $id ? qq/id="$id"/ : ""; $extra ||= ""; $self->{svg} .= qq/$str<\/text>\n/; } sub svg { my $self = shift; return "$self->{svg}\n"; } 1; } sub namehash { # Generate a vector hash for the name string, weighting early over # later characters. We want to pick the same colors for function # names across different flame graphs. my $name = shift; my $vector = 0; my $weight = 1; my $max = 1; my $mod = 10; # if module name present, trunc to 1st char $name =~ s/.(.*?)`//; foreach my $c (split //, $name) { my $i = (ord $c) % $mod; $vector += ($i / ($mod++ - 1)) * $weight; $max += 1 * $weight; $weight *= 0.70; last if $mod > 12; } return (1 - $vector / $max) } sub color { my ($type, $hash, $name) = @_; my ($v1, $v2, $v3); if ($hash) { $v1 = namehash($name); $v2 = $v3 = namehash(scalar reverse $name); } else { $v1 = rand(1); $v2 = rand(1); $v3 = rand(1); } # theme palettes if (defined $type and $type eq "hot") { my $r = 205 + int(50 * $v3); my $g = 0 + int(230 * $v1); my $b = 0 + int(55 * $v2); return "rgb($r,$g,$b)"; } if (defined $type and $type eq "mem") { my $r = 0; my $g = 190 + int(50 * $v2); my $b = 0 + int(210 * $v1); return "rgb($r,$g,$b)"; } if (defined $type and $type eq "io") { my $r = 80 + int(60 * $v1); my $g = $r; my $b = 190 + int(55 * $v2); return "rgb($r,$g,$b)"; } # multi palettes if (defined $type and $type eq "java") { # Handle both annotations (_[j], _[i], ...; which are # accurate), as well as input that lacks any annotations, as # best as possible. Without annotations, we get a little hacky # and match on java|org|com, etc. if ($name =~ m:_\[j\]$:) { # jit annotation $type = "green"; } elsif ($name =~ m:_\[i\]$:) { # inline annotation $type = "aqua"; } elsif ($name =~ m:^L?(java|javax|jdk|net|org|com|io|sun)/:) { # Java $type = "green"; } elsif ($name =~ m:_\[k\]$:) { # kernel annotation $type = "orange"; } elsif ($name =~ /::/) { # C++ $type = "yellow"; } else { # system $type = "red"; } # fall-through to color palettes } if (defined $type and $type eq "perl") { if ($name =~ /::/) { # C++ $type = "yellow"; } elsif ($name =~ m:Perl: or $name =~ m:\.pl:) { # Perl $type = "green"; } elsif ($name =~ m:_\[k\]$:) { # kernel $type = "orange"; } else { # system $type = "red"; } # fall-through to color palettes } if (defined $type and $type eq "js") { # Handle both annotations (_[j], _[i], ...; which are # accurate), as well as input that lacks any annotations, as # best as possible. Without annotations, we get a little hacky, # and match on a "/" with a ".js", etc. if ($name =~ m:_\[j\]$:) { # jit annotation if ($name =~ m:/:) { $type = "green"; # source } else { $type = "aqua"; # builtin } } elsif ($name =~ /::/) { # C++ $type = "yellow"; } elsif ($name =~ m:/.*\.js:) { # JavaScript (match "/" in path) $type = "green"; } elsif ($name =~ m/:/) { # JavaScript (match ":" in builtin) $type = "aqua"; } elsif ($name =~ m/^ $/) { # Missing symbol $type = "green"; } elsif ($name =~ m:_\[k\]:) { # kernel $type = "orange"; } else { # system $type = "red"; } # fall-through to color palettes } if (defined $type and $type eq "wakeup") { $type = "aqua"; # fall-through to color palettes } if (defined $type and $type eq "chain") { if ($name =~ m:_\[w\]:) { # waker $type = "aqua" } else { # off-CPU $type = "blue"; } # fall-through to color palettes } # color palettes if (defined $type and $type eq "red") { my $r = 200 + int(55 * $v1); my $x = 50 + int(80 * $v1); return "rgb($r,$x,$x)"; } if (defined $type and $type eq "green") { my $g = 200 + int(55 * $v1); my $x = 50 + int(60 * $v1); return "rgb($x,$g,$x)"; } if (defined $type and $type eq "blue") { my $b = 205 + int(50 * $v1); my $x = 80 + int(60 * $v1); return "rgb($x,$x,$b)"; } if (defined $type and $type eq "yellow") { my $x = 175 + int(55 * $v1); my $b = 50 + int(20 * $v1); return "rgb($x,$x,$b)"; } if (defined $type and $type eq "purple") { my $x = 190 + int(65 * $v1); my $g = 80 + int(60 * $v1); return "rgb($x,$g,$x)"; } if (defined $type and $type eq "aqua") { my $r = 50 + int(60 * $v1); my $g = 165 + int(55 * $v1); my $b = 165 + int(55 * $v1); return "rgb($r,$g,$b)"; } if (defined $type and $type eq "orange") { my $r = 190 + int(65 * $v1); my $g = 90 + int(65 * $v1); return "rgb($r,$g,0)"; } return "rgb(0,0,0)"; } sub color_scale { my ($value, $max) = @_; my ($r, $g, $b) = (255, 255, 255); $value = -$value if $negate; if ($value > 0) { $g = $b = int(210 * ($max - $value) / $max); } elsif ($value < 0) { $r = $g = int(210 * ($max + $value) / $max); } return "rgb($r,$g,$b)"; } sub color_map { my ($colors, $func) = @_; if (exists $palette_map{$func}) { return $palette_map{$func}; } else { $palette_map{$func} = color($colors, $hash, $func); return $palette_map{$func}; } } sub write_palette { open(FILE, ">$pal_file"); foreach my $key (sort keys %palette_map) { print FILE $key."->".$palette_map{$key}."\n"; } close(FILE); } sub read_palette { if (-e $pal_file) { open(FILE, $pal_file) or die "can't open file $pal_file: $!"; while ( my $line = ) { chomp($line); (my $key, my $value) = split("->",$line); $palette_map{$key}=$value; } close(FILE) } } my %Node; # Hash of merged frame data my %Tmp; # flow() merges two stacks, storing the merged frames and value data in %Node. sub flow { my ($last, $this, $v, $d) = @_; my $len_a = @$last - 1; my $len_b = @$this - 1; my $i = 0; my $len_same; for (; $i <= $len_a; $i++) { last if $i > $len_b; last if $last->[$i] ne $this->[$i]; } $len_same = $i; for ($i = $len_a; $i >= $len_same; $i--) { my $k = "$last->[$i];$i"; # a unique ID is constructed from "func;depth;etime"; # func-depth isn't unique, it may be repeated later. $Node{"$k;$v"}->{stime} = delete $Tmp{$k}->{stime}; if (defined $Tmp{$k}->{delta}) { $Node{"$k;$v"}->{delta} = delete $Tmp{$k}->{delta}; } delete $Tmp{$k}; } for ($i = $len_same; $i <= $len_b; $i++) { my $k = "$this->[$i];$i"; $Tmp{$k}->{stime} = $v; if (defined $d) { $Tmp{$k}->{delta} += $i == $len_b ? $d : 0; } } return $this; } # parse input my @Data; my @SortedData; my $last = []; my $time = 0; my $delta = undef; my $ignored = 0; my $line; my $maxdelta = 1; # reverse if needed foreach (<>) { chomp; $line = $_; if ($stackreverse) { # there may be an extra samples column for differentials # XXX todo: redo these REs as one. It's repeated below. my($stack, $samples) = (/^(.*)\s+?(\d+(?:\.\d*)?)$/); my $samples2 = undef; if ($stack =~ /^(.*)\s+?(\d+(?:\.\d*)?)$/) { $samples2 = $samples; ($stack, $samples) = $stack =~ (/^(.*)\s+?(\d+(?:\.\d*)?)$/); unshift @Data, join(";", reverse split(";", $stack)) . " $samples $samples2"; } else { unshift @Data, join(";", reverse split(";", $stack)) . " $samples"; } } else { unshift @Data, $line; } } if ($flamechart) { # In flame chart mode, just reverse the data so time moves from left to right. @SortedData = reverse @Data; } else { @SortedData = sort @Data; } # process and merge frames foreach (@SortedData) { chomp; # process: folded_stack count # eg: func_a;func_b;func_c 31 my ($stack, $samples) = (/^(.*)\s+?(\d+(?:\.\d*)?)$/); unless (defined $samples and defined $stack) { ++$ignored; next; } # there may be an extra samples column for differentials: my $samples2 = undef; if ($stack =~ /^(.*)\s+?(\d+(?:\.\d*)?)$/) { $samples2 = $samples; ($stack, $samples) = $stack =~ (/^(.*)\s+?(\d+(?:\.\d*)?)$/); } $delta = undef; if (defined $samples2) { $delta = $samples2 - $samples; $maxdelta = abs($delta) if abs($delta) > $maxdelta; } # for chain graphs, annotate waker frames with "_[w]", for later # coloring. This is a hack, but has a precedent ("_[k]" from perf). if ($colors eq "chain") { my @parts = split ";--;", $stack; my @newparts = (); $stack = shift @parts; $stack .= ";--;"; foreach my $part (@parts) { $part =~ s/;/_[w];/g; $part .= "_[w]"; push @newparts, $part; } $stack .= join ";--;", @parts; } # merge frames and populate %Node: $last = flow($last, [ '', split ";", $stack ], $time, $delta); if (defined $samples2) { $time += $samples2; } else { $time += $samples; } } flow($last, [], $time, $delta); warn "Ignored $ignored lines with invalid format\n" if $ignored; unless ($time) { warn "ERROR: No stack counts found\n"; my $im = SVG->new(); # emit an error message SVG, for tools automating flamegraph use my $imageheight = $fontsize * 5; $im->header($imagewidth, $imageheight); $im->stringTTF(undef, int($imagewidth / 2), $fontsize * 2, "ERROR: No valid input provided to flamegraph.pl."); print $im->svg; exit 2; } if ($timemax and $timemax < $time) { warn "Specified --total $timemax is less than actual total $time, so ignored\n" if $timemax/$time > 0.02; # only warn is significant (e.g., not rounding etc) undef $timemax; } $timemax ||= $time; my $widthpertime = ($imagewidth - 2 * $xpad) / $timemax; my $minwidth_time = $minwidth / $widthpertime; # prune blocks that are too narrow and determine max depth while (my ($id, $node) = each %Node) { my ($func, $depth, $etime) = split ";", $id; my $stime = $node->{stime}; die "missing start for $id" if not defined $stime; if (($etime-$stime) < $minwidth_time) { delete $Node{$id}; next; } $depthmax = $depth if $depth > $depthmax; } # draw canvas, and embed interactive JavaScript program my $imageheight = (($depthmax + 1) * $frameheight) + $ypad1 + $ypad2; $imageheight += $ypad3 if $subtitletext ne ""; my $titlesize = $fontsize + 5; my $im = SVG->new(); my ($black, $vdgrey, $dgrey) = ( $im->colorAllocate(0, 0, 0), $im->colorAllocate(160, 160, 160), $im->colorAllocate(200, 200, 200), ); $im->header($imagewidth, $imageheight); my $inc = < INC $im->include($inc); $im->filledRectangle(0, 0, $imagewidth, $imageheight, 'url(#background)'); $im->stringTTF("title", int($imagewidth / 2), $fontsize * 2, $titletext); $im->stringTTF("subtitle", int($imagewidth / 2), $fontsize * 4, $subtitletext) if $subtitletext ne ""; $im->stringTTF("details", $xpad, $imageheight - ($ypad2 / 2), " "); $im->stringTTF("unzoom", $xpad, $fontsize * 2, "Reset Zoom", 'class="hide"'); $im->stringTTF("search", $imagewidth - $xpad - 100, $fontsize * 2, "Search"); $im->stringTTF("matched", $imagewidth - $xpad - 100, $imageheight - ($ypad2 / 2), " "); if ($palette) { read_palette(); } # draw frames $im->group_start({id => "frames"}); while (my ($id, $node) = each %Node) { my ($func, $depth, $etime) = split ";", $id; my $stime = $node->{stime}; my $delta = $node->{delta}; $etime = $timemax if $func eq "" and $depth == 0; my $x1 = $xpad + $stime * $widthpertime; my $x2 = $xpad + $etime * $widthpertime; my ($y1, $y2); unless ($inverted) { $y1 = $imageheight - $ypad2 - ($depth + 1) * $frameheight + $framepad; $y2 = $imageheight - $ypad2 - $depth * $frameheight; } else { $y1 = $ypad1 + $depth * $frameheight; $y2 = $ypad1 + ($depth + 1) * $frameheight - $framepad; } my $samples = sprintf "%.0f", ($etime - $stime) * $factor; (my $samples_txt = $samples) # add commas per perlfaq5 =~ s/(^[-+]?\d+?(?=(?>(?:\d{3})+)(?!\d))|\G\d{3}(?=\d))/$1,/g; my $info; if ($func eq "" and $depth == 0) { $info = "all ($samples_txt $countname, 100%)"; } else { my $pct = sprintf "%.2f", ((100 * $samples) / ($timemax * $factor)); my $escaped_func = $func; # clean up SVG breaking characters: $escaped_func =~ s/&/&/g; $escaped_func =~ s//>/g; $escaped_func =~ s/"/"/g; $escaped_func =~ s/_\[[kwij]\]$//; # strip any annotation unless (defined $delta) { $info = "$escaped_func ($samples_txt $countname, $pct%)"; } else { my $d = $negate ? -$delta : $delta; my $deltapct = sprintf "%.2f", ((100 * $d) / ($timemax * $factor)); $deltapct = $d > 0 ? "+$deltapct" : $deltapct; $info = "$escaped_func ($samples_txt $countname, $pct%; $deltapct%)"; } } my $nameattr = { %{ $nameattr{$func}||{} } }; # shallow clone $nameattr->{title} ||= $info; $im->group_start($nameattr); my $color; if ($func eq "--") { $color = $vdgrey; } elsif ($func eq "-") { $color = $dgrey; } elsif (defined $delta) { $color = color_scale($delta, $maxdelta); } elsif ($palette) { $color = color_map($colors, $func); } else { $color = color($colors, $hash, $func); } $im->filledRectangle($x1, $y1, $x2, $y2, $color, 'rx="2" ry="2"'); my $chars = int( ($x2 - $x1) / ($fontsize * $fontwidth)); my $text = ""; if ($chars >= 3) { # room for one char plus two dots $func =~ s/_\[[kwij]\]$//; # strip any annotation $text = substr $func, 0, $chars; substr($text, -2, 2) = ".." if $chars < length $func; $text =~ s/&/&/g; $text =~ s//>/g; } $im->stringTTF(undef, $x1 + 3, 3 + ($y1 + $y2) / 2, $text); $im->group_end($nameattr); } $im->group_end(); print $im->svg; if ($palette) { write_palette(); } # vim: ts=8 sts=8 sw=8 noexpandtab stackprof-0.2.26/vendor/FlameGraph/README0000644000004100000410000001305614605430231020013 0ustar www-datawww-dataFlame Graphs visualize profiled code-paths. Website: http://www.brendangregg.com/flamegraphs.html CPU profiling using DTrace, perf_events, SystemTap, or ktap: http://www.brendangregg.com/FlameGraphs/cpuflamegraphs.html CPU profiling using XCode Instruments: http://schani.wordpress.com/2012/11/16/flame-graphs-for-instruments/ CPU profiling using Xperf.exe: http://randomascii.wordpress.com/2013/03/26/summarizing-xperf-cpu-usage-with-flame-graphs/ Memory profiling: http://www.brendangregg.com/FlameGraphs/memoryflamegraphs.html These can be created in three steps: 1. Capture stacks 2. Fold stacks 3. flamegraph.pl 1. Capture stacks ================= Stack samples can be captured using DTrace, perf_events or SystemTap. Using DTrace to capture 60 seconds of kernel stacks at 997 Hertz: # dtrace -x stackframes=100 -n 'profile-997 /arg0/ { @[stack()] = count(); } tick-60s { exit(0); }' -o out.kern_stacks Using DTrace to capture 60 seconds of user-level stacks for PID 12345 at 97 Hertz: # dtrace -x ustackframes=100 -n 'profile-97 /pid == 12345 && arg1/ { @[ustack()] = count(); } tick-60s { exit(0); }' -o out.user_stacks Using DTrace to capture 60 seconds of user-level stacks, including while time is spent in the kernel, for PID 12345 at 97 Hertz: # dtrace -x ustackframes=100 -n 'profile-97 /pid == 12345/ { @[ustack()] = count(); } tick-60s { exit(0); }' -o out.user_stacks Switch ustack() for jstack() if the application has a ustack helper to include translated frames (eg, node.js frames; see: http://dtrace.org/blogs/dap/2012/01/05/where-does-your-node-program-spend-its-time/). The rate for user-level stack collection is deliberately slower than kernel, which is especially important when using jstack() as it performs additional work to translate frames. 2. Fold stacks ============== Use the stackcollapse programs to fold stack samples into single lines. The programs provided are: - stackcollapse.pl: for DTrace stacks - stackcollapse-perf.pl: for perf_events "perf script" output - stackcollapse-stap.pl: for SystemTap stacks - stackcollapse-instruments.pl: for XCode Instruments Usage example: $ ./stackcollapse.pl out.kern_stacks > out.kern_folded The output looks like this: unix`_sys_sysenter_post_swapgs 1401 unix`_sys_sysenter_post_swapgs;genunix`close 5 unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf 85 unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;c2audit`audit_closef 26 unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;c2audit`audit_setf 5 unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`audit_getstate 6 unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`audit_unfalloc 2 unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`closef 48 [...] 3. flamegraph.pl ================ Use flamegraph.pl to render a SVG. $ ./flamegraph.pl out.kern_folded > kernel.svg An advantage of having the folded input file (and why this is separate to flamegraph.pl) is that you can use grep for functions of interest. Eg: $ grep cpuid out.kern_folded | ./flamegraph.pl > cpuid.svg Provided Example ================ An example output from DTrace is included, both the captured stacks and the resulting Flame Graph. You can generate it yourself using: $ ./stackcollapse.pl example-stacks.txt | ./flamegraph.pl > example.svg This was from a particular performance investigation: the Flame Graph identified that CPU time was spent in the lofs module, and quantified that time. Options ======= See the USAGE message (--help) for options: USAGE: ./flamegraph.pl [options] infile > outfile.svg --titletext # change title text --width # width of image (default 1200) --height # height of each frame (default 16) --minwidth # omit smaller functions (default 0.1 pixels) --fonttype # font type (default "Verdana") --fontsize # font size (default 12) --countname # count type label (default "samples") --nametype # name type label (default "Function:") --colors # "hot", "mem", "io" palette (default "hot") --hash # colors are keyed by function name hash --cp # use consistent palette (palette.map) eg, ./flamegraph.pl --titletext="Flame Graph: malloc()" trace.txt > graph.svg As suggested in the example, flame graphs can process traces of any event, such as malloc()s, provided stack traces are gathered. Consistent Palette ================== If you use the --cp option, it will use the $colors selection and randomly generate the palette like normal. Any future flamegraphs created using the --cp option will use the same palette map. Any new symbols from future flamegraphs will have their colors randomly generated using the $colors selection. If you don't like the palette, just delete the palette.map file. This allows your to change your colorscheme between flamegraphs to make the differences REALLY stand out. Example: Say we have 2 captures, one with a problem, and one when it was working (whatever "it" is): cat working.folded | ./flamegraph.pl --cp > working.svg # this generates a palette.map, as per the normal random generated look. cat broken.folded | ./flamegraph.pl --cp --colors mem > broken.svg # this svg will use the same palette.map for the same events, but a very # different colorscheme for any new events. Take a look at the demo directory for an example: palette-example-working.svg palette-example-broken.svg stackprof-0.2.26/vendor/gprof2dot/0000755000004100000410000000000014605430231017026 5ustar www-datawww-datastackprof-0.2.26/vendor/gprof2dot/hotshotmain.py0000755000004100000410000000432714605430231021746 0ustar www-datawww-data#!/usr/bin/env python # # Copyright 2007 Jose Fonseca # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Lesser General Public License as published # by the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # def run(statement, filename=None, sort=-1): import os, tempfile, hotshot, hotshot.stats logfd, logfn = tempfile.mkstemp() prof = hotshot.Profile(logfn) try: prof = prof.run(statement) except SystemExit: pass try: try: prof = prof.run(statement) except SystemExit: pass prof.close() finally: stats = hotshot.stats.load(logfn) stats.strip_dirs() stats.sort_stats(sort) if filename is not None: result = stats.dump_stats(filename) else: result = stats.print_stats() os.unlink(logfn) return result def main(): import os, sys from optparse import OptionParser usage = "hotshotmain.py [-o output_file_path] [-s sort] scriptfile [arg] ..." parser = OptionParser(usage=usage) parser.allow_interspersed_args = False parser.add_option('-o', '--outfile', dest="outfile", help="Save stats to ", default=None) parser.add_option('-s', '--sort', dest="sort", help="Sort order when printing to stdout, based on pstats.Stats class", default=-1) if not sys.argv[1:]: parser.print_usage() sys.exit(2) (options, args) = parser.parse_args() sys.argv[:] = args if (len(sys.argv) > 0): sys.path.insert(0, os.path.dirname(sys.argv[0])) run('execfile(%r)' % (sys.argv[0],), options.outfile, options.sort) else: parser.print_usage() return parser if __name__ == "__main__": main() stackprof-0.2.26/vendor/gprof2dot/gprof2dot.py0000755000004100000410000031745114605430231021324 0ustar www-datawww-data#!/usr/bin/env python # # Copyright 2008-2009 Jose Fonseca # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Lesser General Public License as published # by the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # """Generate a dot graph from the output of several profilers.""" __author__ = "Jose Fonseca et al" import sys import math import os.path import re import textwrap import optparse import xml.parsers.expat import collections import locale # Python 2.x/3.x compatibility if sys.version_info[0] >= 3: PYTHON_3 = True def compat_iteritems(x): return x.items() # No iteritems() in Python 3 def compat_itervalues(x): return x.values() # No itervalues() in Python 3 def compat_keys(x): return list(x.keys()) # keys() is a generator in Python 3 basestring = str # No class basestring in Python 3 unichr = chr # No unichr in Python 3 xrange = range # No xrange in Python 3 else: PYTHON_3 = False def compat_iteritems(x): return x.iteritems() def compat_itervalues(x): return x.itervalues() def compat_keys(x): return x.keys() try: # Debugging helper module import debug except ImportError: pass MULTIPLICATION_SIGN = unichr(0xd7) def times(x): return "%u%s" % (x, MULTIPLICATION_SIGN) def percentage(p): return "%.02f%%" % (p*100.0,) def add(a, b): return a + b def equal(a, b): if a == b: return a else: return None def fail(a, b): assert False tol = 2 ** -23 def ratio(numerator, denominator): try: ratio = float(numerator)/float(denominator) except ZeroDivisionError: # 0/0 is undefined, but 1.0 yields more useful results return 1.0 if ratio < 0.0: if ratio < -tol: sys.stderr.write('warning: negative ratio (%s/%s)\n' % (numerator, denominator)) return 0.0 if ratio > 1.0: if ratio > 1.0 + tol: sys.stderr.write('warning: ratio greater than one (%s/%s)\n' % (numerator, denominator)) return 1.0 return ratio class UndefinedEvent(Exception): """Raised when attempting to get an event which is undefined.""" def __init__(self, event): Exception.__init__(self) self.event = event def __str__(self): return 'unspecified event %s' % self.event.name class Event(object): """Describe a kind of event, and its basic operations.""" def __init__(self, name, null, aggregator, formatter = str): self.name = name self._null = null self._aggregator = aggregator self._formatter = formatter def __eq__(self, other): return self is other def __hash__(self): return id(self) def null(self): return self._null def aggregate(self, val1, val2): """Aggregate two event values.""" assert val1 is not None assert val2 is not None return self._aggregator(val1, val2) def format(self, val): """Format an event value.""" assert val is not None return self._formatter(val) CALLS = Event("Calls", 0, add, times) SAMPLES = Event("Samples", 0, add, times) SAMPLES2 = Event("Samples", 0, add, times) # Count of samples where a given function was either executing or on the stack. # This is used to calculate the total time ratio according to the # straightforward method described in Mike Dunlavey's answer to # stackoverflow.com/questions/1777556/alternatives-to-gprof, item 4 (the myth # "that recursion is a tricky confusing issue"), last edited 2012-08-30: it's # just the ratio of TOTAL_SAMPLES over the number of samples in the profile. # # Used only when totalMethod == callstacks TOTAL_SAMPLES = Event("Samples", 0, add, times) TIME = Event("Time", 0.0, add, lambda x: '(' + str(x) + ')') TIME_RATIO = Event("Time ratio", 0.0, add, lambda x: '(' + percentage(x) + ')') TOTAL_TIME = Event("Total time", 0.0, fail) TOTAL_TIME_RATIO = Event("Total time ratio", 0.0, fail, percentage) totalMethod = 'callratios' class Object(object): """Base class for all objects in profile which can store events.""" def __init__(self, events=None): if events is None: self.events = {} else: self.events = events def __hash__(self): return id(self) def __eq__(self, other): return self is other def __contains__(self, event): return event in self.events def __getitem__(self, event): try: return self.events[event] except KeyError: raise UndefinedEvent(event) def __setitem__(self, event, value): if value is None: if event in self.events: del self.events[event] else: self.events[event] = value class Call(Object): """A call between functions. There should be at most one call object for every pair of functions. """ def __init__(self, callee_id): Object.__init__(self) self.callee_id = callee_id self.ratio = None self.weight = None class Function(Object): """A function.""" def __init__(self, id, name): Object.__init__(self) self.id = id self.name = name self.module = None self.process = None self.calls = {} self.called = None self.weight = None self.cycle = None def add_call(self, call): if call.callee_id in self.calls: sys.stderr.write('warning: overwriting call from function %s to %s\n' % (str(self.id), str(call.callee_id))) self.calls[call.callee_id] = call def get_call(self, callee_id): if not callee_id in self.calls: call = Call(callee_id) call[SAMPLES] = 0 call[SAMPLES2] = 0 call[CALLS] = 0 self.calls[callee_id] = call return self.calls[callee_id] _parenthesis_re = re.compile(r'\([^()]*\)') _angles_re = re.compile(r'<[^<>]*>') _const_re = re.compile(r'\s+const$') def stripped_name(self): """Remove extraneous information from C++ demangled function names.""" name = self.name # Strip function parameters from name by recursively removing paired parenthesis while True: name, n = self._parenthesis_re.subn('', name) if not n: break # Strip const qualifier name = self._const_re.sub('', name) # Strip template parameters from name by recursively removing paired angles while True: name, n = self._angles_re.subn('', name) if not n: break return name # TODO: write utility functions def __repr__(self): return self.name class Cycle(Object): """A cycle made from recursive function calls.""" def __init__(self): Object.__init__(self) # XXX: Do cycles need an id? self.functions = set() def add_function(self, function): assert function not in self.functions self.functions.add(function) # XXX: Aggregate events? if function.cycle is not None: for other in function.cycle.functions: if function not in self.functions: self.add_function(other) function.cycle = self class Profile(Object): """The whole profile.""" def __init__(self): Object.__init__(self) self.functions = {} self.cycles = [] def add_function(self, function): if function.id in self.functions: sys.stderr.write('warning: overwriting function %s (id %s)\n' % (function.name, str(function.id))) self.functions[function.id] = function def add_cycle(self, cycle): self.cycles.append(cycle) def validate(self): """Validate the edges.""" for function in compat_itervalues(self.functions): for callee_id in compat_keys(function.calls): assert function.calls[callee_id].callee_id == callee_id if callee_id not in self.functions: sys.stderr.write('warning: call to undefined function %s from function %s\n' % (str(callee_id), function.name)) del function.calls[callee_id] def find_cycles(self): """Find cycles using Tarjan's strongly connected components algorithm.""" # Apply the Tarjan's algorithm successively until all functions are visited visited = set() for function in compat_itervalues(self.functions): if function not in visited: self._tarjan(function, 0, [], {}, {}, visited) cycles = [] for function in compat_itervalues(self.functions): if function.cycle is not None and function.cycle not in cycles: cycles.append(function.cycle) self.cycles = cycles if 0: for cycle in cycles: sys.stderr.write("Cycle:\n") for member in cycle.functions: sys.stderr.write("\tFunction %s\n" % member.name) def prune_root(self, root): visited = set() frontier = set([root]) while len(frontier) > 0: node = frontier.pop() visited.add(node) f = self.functions[node] newNodes = f.calls.keys() frontier = frontier.union(set(newNodes) - visited) subtreeFunctions = {} for n in visited: subtreeFunctions[n] = self.functions[n] self.functions = subtreeFunctions def prune_leaf(self, leaf): edgesUp = collections.defaultdict(set) for f in self.functions.keys(): for n in self.functions[f].calls.keys(): edgesUp[n].add(f) # build the tree up visited = set() frontier = set([leaf]) while len(frontier) > 0: node = frontier.pop() visited.add(node) frontier = frontier.union(edgesUp[node] - visited) downTree = set(self.functions.keys()) upTree = visited path = downTree.intersection(upTree) pathFunctions = {} for n in path: f = self.functions[n] newCalls = {} for c in f.calls.keys(): if c in path: newCalls[c] = f.calls[c] f.calls = newCalls pathFunctions[n] = f self.functions = pathFunctions def getFunctionId(self, funcName): for f in self.functions: if self.functions[f].name == funcName: return f return False def _tarjan(self, function, order, stack, orders, lowlinks, visited): """Tarjan's strongly connected components algorithm. See also: - http://en.wikipedia.org/wiki/Tarjan's_strongly_connected_components_algorithm """ visited.add(function) orders[function] = order lowlinks[function] = order order += 1 pos = len(stack) stack.append(function) for call in compat_itervalues(function.calls): callee = self.functions[call.callee_id] # TODO: use a set to optimize lookup if callee not in orders: order = self._tarjan(callee, order, stack, orders, lowlinks, visited) lowlinks[function] = min(lowlinks[function], lowlinks[callee]) elif callee in stack: lowlinks[function] = min(lowlinks[function], orders[callee]) if lowlinks[function] == orders[function]: # Strongly connected component found members = stack[pos:] del stack[pos:] if len(members) > 1: cycle = Cycle() for member in members: cycle.add_function(member) return order def call_ratios(self, event): # Aggregate for incoming calls cycle_totals = {} for cycle in self.cycles: cycle_totals[cycle] = 0.0 function_totals = {} for function in compat_itervalues(self.functions): function_totals[function] = 0.0 # Pass 1: function_total gets the sum of call[event] for all # incoming arrows. Same for cycle_total for all arrows # that are coming into the *cycle* but are not part of it. for function in compat_itervalues(self.functions): for call in compat_itervalues(function.calls): if call.callee_id != function.id: callee = self.functions[call.callee_id] if event in call.events: function_totals[callee] += call[event] if callee.cycle is not None and callee.cycle is not function.cycle: cycle_totals[callee.cycle] += call[event] else: sys.stderr.write("call_ratios: No data for " + function.name + " call to " + callee.name + "\n") # Pass 2: Compute the ratios. Each call[event] is scaled by the # function_total of the callee. Calls into cycles use the # cycle_total, but not calls within cycles. for function in compat_itervalues(self.functions): for call in compat_itervalues(function.calls): assert call.ratio is None if call.callee_id != function.id: callee = self.functions[call.callee_id] if event in call.events: if callee.cycle is not None and callee.cycle is not function.cycle: total = cycle_totals[callee.cycle] else: total = function_totals[callee] call.ratio = ratio(call[event], total) else: # Warnings here would only repeat those issued above. call.ratio = 0.0 def integrate(self, outevent, inevent): """Propagate function time ratio along the function calls. Must be called after finding the cycles. See also: - http://citeseer.ist.psu.edu/graham82gprof.html """ # Sanity checking assert outevent not in self for function in compat_itervalues(self.functions): assert outevent not in function assert inevent in function for call in compat_itervalues(function.calls): assert outevent not in call if call.callee_id != function.id: assert call.ratio is not None # Aggregate the input for each cycle for cycle in self.cycles: total = inevent.null() for function in compat_itervalues(self.functions): total = inevent.aggregate(total, function[inevent]) self[inevent] = total # Integrate along the edges total = inevent.null() for function in compat_itervalues(self.functions): total = inevent.aggregate(total, function[inevent]) self._integrate_function(function, outevent, inevent) self[outevent] = total def _integrate_function(self, function, outevent, inevent): if function.cycle is not None: return self._integrate_cycle(function.cycle, outevent, inevent) else: if outevent not in function: total = function[inevent] for call in compat_itervalues(function.calls): if call.callee_id != function.id: total += self._integrate_call(call, outevent, inevent) function[outevent] = total return function[outevent] def _integrate_call(self, call, outevent, inevent): assert outevent not in call assert call.ratio is not None callee = self.functions[call.callee_id] subtotal = call.ratio *self._integrate_function(callee, outevent, inevent) call[outevent] = subtotal return subtotal def _integrate_cycle(self, cycle, outevent, inevent): if outevent not in cycle: # Compute the outevent for the whole cycle total = inevent.null() for member in cycle.functions: subtotal = member[inevent] for call in compat_itervalues(member.calls): callee = self.functions[call.callee_id] if callee.cycle is not cycle: subtotal += self._integrate_call(call, outevent, inevent) total += subtotal cycle[outevent] = total # Compute the time propagated to callers of this cycle callees = {} for function in compat_itervalues(self.functions): if function.cycle is not cycle: for call in compat_itervalues(function.calls): callee = self.functions[call.callee_id] if callee.cycle is cycle: try: callees[callee] += call.ratio except KeyError: callees[callee] = call.ratio for member in cycle.functions: member[outevent] = outevent.null() for callee, call_ratio in compat_iteritems(callees): ranks = {} call_ratios = {} partials = {} self._rank_cycle_function(cycle, callee, 0, ranks) self._call_ratios_cycle(cycle, callee, ranks, call_ratios, set()) partial = self._integrate_cycle_function(cycle, callee, call_ratio, partials, ranks, call_ratios, outevent, inevent) assert partial == max(partials.values()) assert not total or abs(1.0 - partial/(call_ratio*total)) <= 0.001 return cycle[outevent] def _rank_cycle_function(self, cycle, function, rank, ranks): if function not in ranks or ranks[function] > rank: ranks[function] = rank for call in compat_itervalues(function.calls): if call.callee_id != function.id: callee = self.functions[call.callee_id] if callee.cycle is cycle: self._rank_cycle_function(cycle, callee, rank + 1, ranks) def _call_ratios_cycle(self, cycle, function, ranks, call_ratios, visited): if function not in visited: visited.add(function) for call in compat_itervalues(function.calls): if call.callee_id != function.id: callee = self.functions[call.callee_id] if callee.cycle is cycle: if ranks[callee] > ranks[function]: call_ratios[callee] = call_ratios.get(callee, 0.0) + call.ratio self._call_ratios_cycle(cycle, callee, ranks, call_ratios, visited) def _integrate_cycle_function(self, cycle, function, partial_ratio, partials, ranks, call_ratios, outevent, inevent): if function not in partials: partial = partial_ratio*function[inevent] for call in compat_itervalues(function.calls): if call.callee_id != function.id: callee = self.functions[call.callee_id] if callee.cycle is not cycle: assert outevent in call partial += partial_ratio*call[outevent] else: if ranks[callee] > ranks[function]: callee_partial = self._integrate_cycle_function(cycle, callee, partial_ratio, partials, ranks, call_ratios, outevent, inevent) call_ratio = ratio(call.ratio, call_ratios[callee]) call_partial = call_ratio*callee_partial try: call[outevent] += call_partial except UndefinedEvent: call[outevent] = call_partial partial += call_partial partials[function] = partial try: function[outevent] += partial except UndefinedEvent: function[outevent] = partial return partials[function] def aggregate(self, event): """Aggregate an event for the whole profile.""" total = event.null() for function in compat_itervalues(self.functions): try: total = event.aggregate(total, function[event]) except UndefinedEvent: return self[event] = total def ratio(self, outevent, inevent): assert outevent not in self assert inevent in self for function in compat_itervalues(self.functions): assert outevent not in function assert inevent in function function[outevent] = ratio(function[inevent], self[inevent]) for call in compat_itervalues(function.calls): assert outevent not in call if inevent in call: call[outevent] = ratio(call[inevent], self[inevent]) self[outevent] = 1.0 def prune(self, node_thres, edge_thres): """Prune the profile""" # compute the prune ratios for function in compat_itervalues(self.functions): try: function.weight = function[TOTAL_TIME_RATIO] except UndefinedEvent: pass for call in compat_itervalues(function.calls): callee = self.functions[call.callee_id] if TOTAL_TIME_RATIO in call: # handle exact cases first call.weight = call[TOTAL_TIME_RATIO] else: try: # make a safe estimate call.weight = min(function[TOTAL_TIME_RATIO], callee[TOTAL_TIME_RATIO]) except UndefinedEvent: pass # prune the nodes for function_id in compat_keys(self.functions): function = self.functions[function_id] if function.weight is not None: if function.weight < node_thres: del self.functions[function_id] # prune the egdes for function in compat_itervalues(self.functions): for callee_id in compat_keys(function.calls): call = function.calls[callee_id] if callee_id not in self.functions or call.weight is not None and call.weight < edge_thres: del function.calls[callee_id] def dump(self): for function in compat_itervalues(self.functions): sys.stderr.write('Function %s:\n' % (function.name,)) self._dump_events(function.events) for call in compat_itervalues(function.calls): callee = self.functions[call.callee_id] sys.stderr.write(' Call %s:\n' % (callee.name,)) self._dump_events(call.events) for cycle in self.cycles: sys.stderr.write('Cycle:\n') self._dump_events(cycle.events) for function in cycle.functions: sys.stderr.write(' Function %s\n' % (function.name,)) def _dump_events(self, events): for event, value in compat_iteritems(events): sys.stderr.write(' %s: %s\n' % (event.name, event.format(value))) class Struct: """Masquerade a dictionary with a structure-like behavior.""" def __init__(self, attrs = None): if attrs is None: attrs = {} self.__dict__['_attrs'] = attrs def __getattr__(self, name): try: return self._attrs[name] except KeyError: raise AttributeError(name) def __setattr__(self, name, value): self._attrs[name] = value def __str__(self): return str(self._attrs) def __repr__(self): return repr(self._attrs) class ParseError(Exception): """Raised when parsing to signal mismatches.""" def __init__(self, msg, line): self.msg = msg # TODO: store more source line information self.line = line def __str__(self): return '%s: %r' % (self.msg, self.line) class Parser: """Parser interface.""" stdinInput = True multipleInput = False def __init__(self): pass def parse(self): raise NotImplementedError class LineParser(Parser): """Base class for parsers that read line-based formats.""" def __init__(self, stream): Parser.__init__(self) self._stream = stream self.__line = None self.__eof = False self.line_no = 0 def readline(self): line = self._stream.readline() if not line: self.__line = '' self.__eof = True else: self.line_no += 1 line = line.rstrip('\r\n') if not PYTHON_3: encoding = self._stream.encoding if encoding is None: encoding = locale.getpreferredencoding() line = line.decode(encoding) self.__line = line def lookahead(self): assert self.__line is not None return self.__line def consume(self): assert self.__line is not None line = self.__line self.readline() return line def eof(self): assert self.__line is not None return self.__eof XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF = range(4) class XmlToken: def __init__(self, type, name_or_data, attrs = None, line = None, column = None): assert type in (XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF) self.type = type self.name_or_data = name_or_data self.attrs = attrs self.line = line self.column = column def __str__(self): if self.type == XML_ELEMENT_START: return '<' + self.name_or_data + ' ...>' if self.type == XML_ELEMENT_END: return '' if self.type == XML_CHARACTER_DATA: return self.name_or_data if self.type == XML_EOF: return 'end of file' assert 0 class XmlTokenizer: """Expat based XML tokenizer.""" def __init__(self, fp, skip_ws = True): self.fp = fp self.tokens = [] self.index = 0 self.final = False self.skip_ws = skip_ws self.character_pos = 0, 0 self.character_data = '' self.parser = xml.parsers.expat.ParserCreate() self.parser.StartElementHandler = self.handle_element_start self.parser.EndElementHandler = self.handle_element_end self.parser.CharacterDataHandler = self.handle_character_data def handle_element_start(self, name, attributes): self.finish_character_data() line, column = self.pos() token = XmlToken(XML_ELEMENT_START, name, attributes, line, column) self.tokens.append(token) def handle_element_end(self, name): self.finish_character_data() line, column = self.pos() token = XmlToken(XML_ELEMENT_END, name, None, line, column) self.tokens.append(token) def handle_character_data(self, data): if not self.character_data: self.character_pos = self.pos() self.character_data += data def finish_character_data(self): if self.character_data: if not self.skip_ws or not self.character_data.isspace(): line, column = self.character_pos token = XmlToken(XML_CHARACTER_DATA, self.character_data, None, line, column) self.tokens.append(token) self.character_data = '' def next(self): size = 16*1024 while self.index >= len(self.tokens) and not self.final: self.tokens = [] self.index = 0 data = self.fp.read(size) self.final = len(data) < size try: self.parser.Parse(data, self.final) except xml.parsers.expat.ExpatError as e: #if e.code == xml.parsers.expat.errors.XML_ERROR_NO_ELEMENTS: if e.code == 3: pass else: raise e if self.index >= len(self.tokens): line, column = self.pos() token = XmlToken(XML_EOF, None, None, line, column) else: token = self.tokens[self.index] self.index += 1 return token def pos(self): return self.parser.CurrentLineNumber, self.parser.CurrentColumnNumber class XmlTokenMismatch(Exception): def __init__(self, expected, found): self.expected = expected self.found = found def __str__(self): return '%u:%u: %s expected, %s found' % (self.found.line, self.found.column, str(self.expected), str(self.found)) class XmlParser(Parser): """Base XML document parser.""" def __init__(self, fp): Parser.__init__(self) self.tokenizer = XmlTokenizer(fp) self.consume() def consume(self): self.token = self.tokenizer.next() def match_element_start(self, name): return self.token.type == XML_ELEMENT_START and self.token.name_or_data == name def match_element_end(self, name): return self.token.type == XML_ELEMENT_END and self.token.name_or_data == name def element_start(self, name): while self.token.type == XML_CHARACTER_DATA: self.consume() if self.token.type != XML_ELEMENT_START: raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token) if self.token.name_or_data != name: raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token) attrs = self.token.attrs self.consume() return attrs def element_end(self, name): while self.token.type == XML_CHARACTER_DATA: self.consume() if self.token.type != XML_ELEMENT_END: raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token) if self.token.name_or_data != name: raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token) self.consume() def character_data(self, strip = True): data = '' while self.token.type == XML_CHARACTER_DATA: data += self.token.name_or_data self.consume() if strip: data = data.strip() return data class GprofParser(Parser): """Parser for GNU gprof output. See also: - Chapter "Interpreting gprof's Output" from the GNU gprof manual http://sourceware.org/binutils/docs-2.18/gprof/Call-Graph.html#Call-Graph - File "cg_print.c" from the GNU gprof source code http://sourceware.org/cgi-bin/cvsweb.cgi/~checkout~/src/gprof/cg_print.c?rev=1.12&cvsroot=src """ def __init__(self, fp): Parser.__init__(self) self.fp = fp self.functions = {} self.cycles = {} def readline(self): line = self.fp.readline() if not line: sys.stderr.write('error: unexpected end of file\n') sys.exit(1) line = line.rstrip('\r\n') return line _int_re = re.compile(r'^\d+$') _float_re = re.compile(r'^\d+\.\d+$') def translate(self, mo): """Extract a structure from a match object, while translating the types in the process.""" attrs = {} groupdict = mo.groupdict() for name, value in compat_iteritems(groupdict): if value is None: value = None elif self._int_re.match(value): value = int(value) elif self._float_re.match(value): value = float(value) attrs[name] = (value) return Struct(attrs) _cg_header_re = re.compile( # original gprof header r'^\s+called/total\s+parents\s*$|' + r'^index\s+%time\s+self\s+descendents\s+called\+self\s+name\s+index\s*$|' + r'^\s+called/total\s+children\s*$|' + # GNU gprof header r'^index\s+%\s+time\s+self\s+children\s+called\s+name\s*$' ) _cg_ignore_re = re.compile( # spontaneous r'^\s+\s*$|' # internal calls (such as "mcount") r'^.*\((\d+)\)$' ) _cg_primary_re = re.compile( r'^\[(?P\d+)\]?' + r'\s+(?P\d+\.\d+)' + r'\s+(?P\d+\.\d+)' + r'\s+(?P\d+\.\d+)' + r'\s+(?:(?P\d+)(?:\+(?P\d+))?)?' + r'\s+(?P\S.*?)' + r'(?:\s+\d+)>)?' + r'\s\[(\d+)\]$' ) _cg_parent_re = re.compile( r'^\s+(?P\d+\.\d+)?' + r'\s+(?P\d+\.\d+)?' + r'\s+(?P\d+)(?:/(?P\d+))?' + r'\s+(?P\S.*?)' + r'(?:\s+\d+)>)?' + r'\s\[(?P\d+)\]$' ) _cg_child_re = _cg_parent_re _cg_cycle_header_re = re.compile( r'^\[(?P\d+)\]?' + r'\s+(?P\d+\.\d+)' + r'\s+(?P\d+\.\d+)' + r'\s+(?P\d+\.\d+)' + r'\s+(?:(?P\d+)(?:\+(?P\d+))?)?' + r'\s+\d+)\sas\sa\swhole>' + r'\s\[(\d+)\]$' ) _cg_cycle_member_re = re.compile( r'^\s+(?P\d+\.\d+)?' + r'\s+(?P\d+\.\d+)?' + r'\s+(?P\d+)(?:\+(?P\d+))?' + r'\s+(?P\S.*?)' + r'(?:\s+\d+)>)?' + r'\s\[(?P\d+)\]$' ) _cg_sep_re = re.compile(r'^--+$') def parse_function_entry(self, lines): parents = [] children = [] while True: if not lines: sys.stderr.write('warning: unexpected end of entry\n') line = lines.pop(0) if line.startswith('['): break # read function parent line mo = self._cg_parent_re.match(line) if not mo: if self._cg_ignore_re.match(line): continue sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line) else: parent = self.translate(mo) parents.append(parent) # read primary line mo = self._cg_primary_re.match(line) if not mo: sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line) return else: function = self.translate(mo) while lines: line = lines.pop(0) # read function subroutine line mo = self._cg_child_re.match(line) if not mo: if self._cg_ignore_re.match(line): continue sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line) else: child = self.translate(mo) children.append(child) function.parents = parents function.children = children self.functions[function.index] = function def parse_cycle_entry(self, lines): # read cycle header line line = lines[0] mo = self._cg_cycle_header_re.match(line) if not mo: sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line) return cycle = self.translate(mo) # read cycle member lines cycle.functions = [] for line in lines[1:]: mo = self._cg_cycle_member_re.match(line) if not mo: sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line) continue call = self.translate(mo) cycle.functions.append(call) self.cycles[cycle.cycle] = cycle def parse_cg_entry(self, lines): if lines[0].startswith("["): self.parse_cycle_entry(lines) else: self.parse_function_entry(lines) def parse_cg(self): """Parse the call graph.""" # skip call graph header while not self._cg_header_re.match(self.readline()): pass line = self.readline() while self._cg_header_re.match(line): line = self.readline() # process call graph entries entry_lines = [] while line != '\014': # form feed if line and not line.isspace(): if self._cg_sep_re.match(line): self.parse_cg_entry(entry_lines) entry_lines = [] else: entry_lines.append(line) line = self.readline() def parse(self): self.parse_cg() self.fp.close() profile = Profile() profile[TIME] = 0.0 cycles = {} for index in self.cycles: cycles[index] = Cycle() for entry in compat_itervalues(self.functions): # populate the function function = Function(entry.index, entry.name) function[TIME] = entry.self if entry.called is not None: function.called = entry.called if entry.called_self is not None: call = Call(entry.index) call[CALLS] = entry.called_self function.called += entry.called_self # populate the function calls for child in entry.children: call = Call(child.index) assert child.called is not None call[CALLS] = child.called if child.index not in self.functions: # NOTE: functions that were never called but were discovered by gprof's # static call graph analysis dont have a call graph entry so we need # to add them here missing = Function(child.index, child.name) function[TIME] = 0.0 function.called = 0 profile.add_function(missing) function.add_call(call) profile.add_function(function) if entry.cycle is not None: try: cycle = cycles[entry.cycle] except KeyError: sys.stderr.write('warning: entry missing\n' % entry.cycle) cycle = Cycle() cycles[entry.cycle] = cycle cycle.add_function(function) profile[TIME] = profile[TIME] + function[TIME] for cycle in compat_itervalues(cycles): profile.add_cycle(cycle) # Compute derived events profile.validate() profile.ratio(TIME_RATIO, TIME) profile.call_ratios(CALLS) profile.integrate(TOTAL_TIME, TIME) profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME) return profile # Clone&hack of GprofParser for VTune Amplifier XE 2013 gprof-cc output. # Tested only with AXE 2013 for Windows. # - Use total times as reported by AXE. # - In the absence of call counts, call ratios are faked from the relative # proportions of total time. This affects only the weighting of the calls. # - Different header, separator, and end marker. # - Extra whitespace after function names. # - You get a full entry for , which does not have parents. # - Cycles do have parents. These are saved but unused (as they are # for functions). # - Disambiguated "unrecognized call graph entry" error messages. # Notes: # - Total time of functions as reported by AXE passes the val3 test. # - CPU Time:Children in the input is sometimes a negative number. This # value goes to the variable descendants, which is unused. # - The format of gprof-cc reports is unaffected by the use of # -knob enable-call-counts=true (no call counts, ever), or # -show-as=samples (results are quoted in seconds regardless). class AXEParser(Parser): "Parser for VTune Amplifier XE 2013 gprof-cc report output." def __init__(self, fp): Parser.__init__(self) self.fp = fp self.functions = {} self.cycles = {} def readline(self): line = self.fp.readline() if not line: sys.stderr.write('error: unexpected end of file\n') sys.exit(1) line = line.rstrip('\r\n') return line _int_re = re.compile(r'^\d+$') _float_re = re.compile(r'^\d+\.\d+$') def translate(self, mo): """Extract a structure from a match object, while translating the types in the process.""" attrs = {} groupdict = mo.groupdict() for name, value in compat_iteritems(groupdict): if value is None: value = None elif self._int_re.match(value): value = int(value) elif self._float_re.match(value): value = float(value) attrs[name] = (value) return Struct(attrs) _cg_header_re = re.compile( '^Index |' '^-----+ ' ) _cg_footer_re = re.compile('^Index\s+Function\s*$') _cg_primary_re = re.compile( r'^\[(?P\d+)\]?' + r'\s+(?P\d+\.\d+)' + r'\s+(?P\d+\.\d+)' + r'\s+(?P\d+\.\d+)' + r'\s+(?P\S.*?)' + r'(?:\s+\d+)>)?' + r'\s+\[(\d+)\]$' ) _cg_parent_re = re.compile( r'^\s+(?P\d+\.\d+)?' + r'\s+(?P\d+\.\d+)?' + r'\s+(?P\S.*?)' + r'(?:\s+\d+)>)?' + r'\s+\[(?P\d+)\]$' ) _cg_child_re = _cg_parent_re _cg_cycle_header_re = re.compile( r'^\[(?P\d+)\]?' + r'\s+(?P\d+\.\d+)' + r'\s+(?P\d+\.\d+)' + r'\s+(?P\d+\.\d+)' + r'\s+\d+)\sas\sa\swhole>' + r'\s+\[(\d+)\]$' ) _cg_cycle_member_re = re.compile( r'^\s+(?P\d+\.\d+)?' + r'\s+(?P\d+\.\d+)?' + r'\s+(?P\S.*?)' + r'(?:\s+\d+)>)?' + r'\s+\[(?P\d+)\]$' ) def parse_function_entry(self, lines): parents = [] children = [] while True: if not lines: sys.stderr.write('warning: unexpected end of entry\n') return line = lines.pop(0) if line.startswith('['): break # read function parent line mo = self._cg_parent_re.match(line) if not mo: sys.stderr.write('warning: unrecognized call graph entry (1): %r\n' % line) else: parent = self.translate(mo) if parent.name != '': parents.append(parent) # read primary line mo = self._cg_primary_re.match(line) if not mo: sys.stderr.write('warning: unrecognized call graph entry (2): %r\n' % line) return else: function = self.translate(mo) while lines: line = lines.pop(0) # read function subroutine line mo = self._cg_child_re.match(line) if not mo: sys.stderr.write('warning: unrecognized call graph entry (3): %r\n' % line) else: child = self.translate(mo) if child.name != '': children.append(child) if function.name != '': function.parents = parents function.children = children self.functions[function.index] = function def parse_cycle_entry(self, lines): # Process the parents that were not there in gprof format. parents = [] while True: if not lines: sys.stderr.write('warning: unexpected end of cycle entry\n') return line = lines.pop(0) if line.startswith('['): break mo = self._cg_parent_re.match(line) if not mo: sys.stderr.write('warning: unrecognized call graph entry (6): %r\n' % line) else: parent = self.translate(mo) if parent.name != '': parents.append(parent) # read cycle header line mo = self._cg_cycle_header_re.match(line) if not mo: sys.stderr.write('warning: unrecognized call graph entry (4): %r\n' % line) return cycle = self.translate(mo) # read cycle member lines cycle.functions = [] for line in lines[1:]: mo = self._cg_cycle_member_re.match(line) if not mo: sys.stderr.write('warning: unrecognized call graph entry (5): %r\n' % line) continue call = self.translate(mo) cycle.functions.append(call) cycle.parents = parents self.cycles[cycle.cycle] = cycle def parse_cg_entry(self, lines): if any("as a whole" in linelooper for linelooper in lines): self.parse_cycle_entry(lines) else: self.parse_function_entry(lines) def parse_cg(self): """Parse the call graph.""" # skip call graph header line = self.readline() while self._cg_header_re.match(line): line = self.readline() # process call graph entries entry_lines = [] # An EOF in readline terminates the program without returning. while not self._cg_footer_re.match(line): if line.isspace(): self.parse_cg_entry(entry_lines) entry_lines = [] else: entry_lines.append(line) line = self.readline() def parse(self): sys.stderr.write('warning: for axe format, edge weights are unreliable estimates derived from\nfunction total times.\n') self.parse_cg() self.fp.close() profile = Profile() profile[TIME] = 0.0 cycles = {} for index in self.cycles: cycles[index] = Cycle() for entry in compat_itervalues(self.functions): # populate the function function = Function(entry.index, entry.name) function[TIME] = entry.self function[TOTAL_TIME_RATIO] = entry.percentage_time / 100.0 # populate the function calls for child in entry.children: call = Call(child.index) # The following bogus value affects only the weighting of # the calls. call[TOTAL_TIME_RATIO] = function[TOTAL_TIME_RATIO] if child.index not in self.functions: # NOTE: functions that were never called but were discovered by gprof's # static call graph analysis dont have a call graph entry so we need # to add them here # FIXME: Is this applicable? missing = Function(child.index, child.name) function[TIME] = 0.0 profile.add_function(missing) function.add_call(call) profile.add_function(function) if entry.cycle is not None: try: cycle = cycles[entry.cycle] except KeyError: sys.stderr.write('warning: entry missing\n' % entry.cycle) cycle = Cycle() cycles[entry.cycle] = cycle cycle.add_function(function) profile[TIME] = profile[TIME] + function[TIME] for cycle in compat_itervalues(cycles): profile.add_cycle(cycle) # Compute derived events. profile.validate() profile.ratio(TIME_RATIO, TIME) # Lacking call counts, fake call ratios based on total times. profile.call_ratios(TOTAL_TIME_RATIO) # The TOTAL_TIME_RATIO of functions is already set. Propagate that # total time to the calls. (TOTAL_TIME is neither set nor used.) for function in compat_itervalues(profile.functions): for call in compat_itervalues(function.calls): if call.ratio is not None: callee = profile.functions[call.callee_id] call[TOTAL_TIME_RATIO] = call.ratio * callee[TOTAL_TIME_RATIO]; return profile class CallgrindParser(LineParser): """Parser for valgrind's callgrind tool. See also: - http://valgrind.org/docs/manual/cl-format.html """ _call_re = re.compile('^calls=\s*(\d+)\s+((\d+|\+\d+|-\d+|\*)\s+)+$') def __init__(self, infile): LineParser.__init__(self, infile) # Textual positions self.position_ids = {} self.positions = {} # Numeric positions self.num_positions = 1 self.cost_positions = ['line'] self.last_positions = [0] # Events self.num_events = 0 self.cost_events = [] self.profile = Profile() self.profile[SAMPLES] = 0 def parse(self): # read lookahead self.readline() self.parse_key('version') self.parse_key('creator') while self.parse_part(): pass if not self.eof(): sys.stderr.write('warning: line %u: unexpected line\n' % self.line_no) sys.stderr.write('%s\n' % self.lookahead()) # compute derived data self.profile.validate() self.profile.find_cycles() self.profile.ratio(TIME_RATIO, SAMPLES) self.profile.call_ratios(CALLS) self.profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) return self.profile def parse_part(self): if not self.parse_header_line(): return False while self.parse_header_line(): pass if not self.parse_body_line(): return False while self.parse_body_line(): pass return True def parse_header_line(self): return \ self.parse_empty() or \ self.parse_comment() or \ self.parse_part_detail() or \ self.parse_description() or \ self.parse_event_specification() or \ self.parse_cost_line_def() or \ self.parse_cost_summary() _detail_keys = set(('cmd', 'pid', 'thread', 'part')) def parse_part_detail(self): return self.parse_keys(self._detail_keys) def parse_description(self): return self.parse_key('desc') is not None def parse_event_specification(self): event = self.parse_key('event') if event is None: return False return True def parse_cost_line_def(self): pair = self.parse_keys(('events', 'positions')) if pair is None: return False key, value = pair items = value.split() if key == 'events': self.num_events = len(items) self.cost_events = items if key == 'positions': self.num_positions = len(items) self.cost_positions = items self.last_positions = [0]*self.num_positions return True def parse_cost_summary(self): pair = self.parse_keys(('summary', 'totals')) if pair is None: return False return True def parse_body_line(self): return \ self.parse_empty() or \ self.parse_comment() or \ self.parse_cost_line() or \ self.parse_position_spec() or \ self.parse_association_spec() __subpos_re = r'(0x[0-9a-fA-F]+|\d+|\+\d+|-\d+|\*)' _cost_re = re.compile(r'^' + __subpos_re + r'( +' + __subpos_re + r')*' + r'( +\d+)*' + '$') def parse_cost_line(self, calls=None): line = self.lookahead().rstrip() mo = self._cost_re.match(line) if not mo: return False function = self.get_function() if calls is None: # Unlike other aspects, call object (cob) is relative not to the # last call object, but to the caller's object (ob), so try to # update it when processing a functions cost line try: self.positions['cob'] = self.positions['ob'] except KeyError: pass values = line.split() assert len(values) <= self.num_positions + self.num_events positions = values[0 : self.num_positions] events = values[self.num_positions : ] events += ['0']*(self.num_events - len(events)) for i in range(self.num_positions): position = positions[i] if position == '*': position = self.last_positions[i] elif position[0] in '-+': position = self.last_positions[i] + int(position) elif position.startswith('0x'): position = int(position, 16) else: position = int(position) self.last_positions[i] = position events = [float(event) for event in events] if calls is None: function[SAMPLES] += events[0] self.profile[SAMPLES] += events[0] else: callee = self.get_callee() callee.called += calls try: call = function.calls[callee.id] except KeyError: call = Call(callee.id) call[CALLS] = calls call[SAMPLES] = events[0] function.add_call(call) else: call[CALLS] += calls call[SAMPLES] += events[0] self.consume() return True def parse_association_spec(self): line = self.lookahead() if not line.startswith('calls='): return False _, values = line.split('=', 1) values = values.strip().split() calls = int(values[0]) call_position = values[1:] self.consume() self.parse_cost_line(calls) return True _position_re = re.compile('^(?P[cj]?(?:ob|fl|fi|fe|fn))=\s*(?:\((?P\d+)\))?(?:\s*(?P.+))?') _position_table_map = { 'ob': 'ob', 'fl': 'fl', 'fi': 'fl', 'fe': 'fl', 'fn': 'fn', 'cob': 'ob', 'cfl': 'fl', 'cfi': 'fl', 'cfe': 'fl', 'cfn': 'fn', 'jfi': 'fl', } _position_map = { 'ob': 'ob', 'fl': 'fl', 'fi': 'fl', 'fe': 'fl', 'fn': 'fn', 'cob': 'cob', 'cfl': 'cfl', 'cfi': 'cfl', 'cfe': 'cfl', 'cfn': 'cfn', 'jfi': 'jfi', } def parse_position_spec(self): line = self.lookahead() if line.startswith('jump=') or line.startswith('jcnd='): self.consume() return True mo = self._position_re.match(line) if not mo: return False position, id, name = mo.groups() if id: table = self._position_table_map[position] if name: self.position_ids[(table, id)] = name else: name = self.position_ids.get((table, id), '') self.positions[self._position_map[position]] = name self.consume() return True def parse_empty(self): if self.eof(): return False line = self.lookahead() if line.strip(): return False self.consume() return True def parse_comment(self): line = self.lookahead() if not line.startswith('#'): return False self.consume() return True _key_re = re.compile(r'^(\w+):') def parse_key(self, key): pair = self.parse_keys((key,)) if not pair: return None key, value = pair return value line = self.lookahead() mo = self._key_re.match(line) if not mo: return None key, value = line.split(':', 1) if key not in keys: return None value = value.strip() self.consume() return key, value def parse_keys(self, keys): line = self.lookahead() mo = self._key_re.match(line) if not mo: return None key, value = line.split(':', 1) if key not in keys: return None value = value.strip() self.consume() return key, value def make_function(self, module, filename, name): # FIXME: module and filename are not being tracked reliably #id = '|'.join((module, filename, name)) id = name try: function = self.profile.functions[id] except KeyError: function = Function(id, name) if module: function.module = os.path.basename(module) function[SAMPLES] = 0 function.called = 0 self.profile.add_function(function) return function def get_function(self): module = self.positions.get('ob', '') filename = self.positions.get('fl', '') function = self.positions.get('fn', '') return self.make_function(module, filename, function) def get_callee(self): module = self.positions.get('cob', '') filename = self.positions.get('cfi', '') function = self.positions.get('cfn', '') return self.make_function(module, filename, function) class PerfParser(LineParser): """Parser for linux perf callgraph output. It expects output generated with perf record -g perf script | gprof2dot.py --format=perf """ def __init__(self, infile): LineParser.__init__(self, infile) self.profile = Profile() def readline(self): # Override LineParser.readline to ignore comment lines while True: LineParser.readline(self) if self.eof() or not self.lookahead().startswith('#'): break def parse(self): # read lookahead self.readline() profile = self.profile profile[SAMPLES] = 0 while not self.eof(): self.parse_event() # compute derived data profile.validate() profile.find_cycles() profile.ratio(TIME_RATIO, SAMPLES) profile.call_ratios(SAMPLES2) if totalMethod == "callratios": # Heuristic approach. TOTAL_SAMPLES is unused. profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) elif totalMethod == "callstacks": # Use the actual call chains for functions. profile[TOTAL_SAMPLES] = profile[SAMPLES] profile.ratio(TOTAL_TIME_RATIO, TOTAL_SAMPLES) # Then propagate that total time to the calls. for function in compat_itervalues(profile.functions): for call in compat_itervalues(function.calls): if call.ratio is not None: callee = profile.functions[call.callee_id] call[TOTAL_TIME_RATIO] = call.ratio * callee[TOTAL_TIME_RATIO]; else: assert False return profile def parse_event(self): if self.eof(): return line = self.consume() assert line callchain = self.parse_callchain() if not callchain: return callee = callchain[0] callee[SAMPLES] += 1 self.profile[SAMPLES] += 1 for caller in callchain[1:]: try: call = caller.calls[callee.id] except KeyError: call = Call(callee.id) call[SAMPLES2] = 1 caller.add_call(call) else: call[SAMPLES2] += 1 callee = caller # Increment TOTAL_SAMPLES only once on each function. stack = set(callchain) for function in stack: function[TOTAL_SAMPLES] += 1 def parse_callchain(self): callchain = [] while self.lookahead(): function = self.parse_call() if function is None: break callchain.append(function) if self.lookahead() == '': self.consume() return callchain call_re = re.compile(r'^\s+(?P
[0-9a-fA-F]+)\s+(?P.*)\s+\((?P[^)]*)\)$') def parse_call(self): line = self.consume() mo = self.call_re.match(line) assert mo if not mo: return None function_name = mo.group('symbol') if not function_name: function_name = mo.group('address') module = mo.group('module') function_id = function_name + ':' + module try: function = self.profile.functions[function_id] except KeyError: function = Function(function_id, function_name) function.module = os.path.basename(module) function[SAMPLES] = 0 function[TOTAL_SAMPLES] = 0 self.profile.add_function(function) return function class OprofileParser(LineParser): """Parser for oprofile callgraph output. See also: - http://oprofile.sourceforge.net/doc/opreport.html#opreport-callgraph """ _fields_re = { 'samples': r'(\d+)', '%': r'(\S+)', 'linenr info': r'(?P\(no location information\)|\S+:\d+)', 'image name': r'(?P\S+(?:\s\(tgid:[^)]*\))?)', 'app name': r'(?P\S+)', 'symbol name': r'(?P\(no symbols\)|.+?)', } def __init__(self, infile): LineParser.__init__(self, infile) self.entries = {} self.entry_re = None def add_entry(self, callers, function, callees): try: entry = self.entries[function.id] except KeyError: self.entries[function.id] = (callers, function, callees) else: callers_total, function_total, callees_total = entry self.update_subentries_dict(callers_total, callers) function_total.samples += function.samples self.update_subentries_dict(callees_total, callees) def update_subentries_dict(self, totals, partials): for partial in compat_itervalues(partials): try: total = totals[partial.id] except KeyError: totals[partial.id] = partial else: total.samples += partial.samples def parse(self): # read lookahead self.readline() self.parse_header() while self.lookahead(): self.parse_entry() profile = Profile() reverse_call_samples = {} # populate the profile profile[SAMPLES] = 0 for _callers, _function, _callees in compat_itervalues(self.entries): function = Function(_function.id, _function.name) function[SAMPLES] = _function.samples profile.add_function(function) profile[SAMPLES] += _function.samples if _function.application: function.process = os.path.basename(_function.application) if _function.image: function.module = os.path.basename(_function.image) total_callee_samples = 0 for _callee in compat_itervalues(_callees): total_callee_samples += _callee.samples for _callee in compat_itervalues(_callees): if not _callee.self: call = Call(_callee.id) call[SAMPLES2] = _callee.samples function.add_call(call) # compute derived data profile.validate() profile.find_cycles() profile.ratio(TIME_RATIO, SAMPLES) profile.call_ratios(SAMPLES2) profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) return profile def parse_header(self): while not self.match_header(): self.consume() line = self.lookahead() fields = re.split(r'\s\s+', line) entry_re = r'^\s*' + r'\s+'.join([self._fields_re[field] for field in fields]) + r'(?P\s+\[self\])?$' self.entry_re = re.compile(entry_re) self.skip_separator() def parse_entry(self): callers = self.parse_subentries() if self.match_primary(): function = self.parse_subentry() if function is not None: callees = self.parse_subentries() self.add_entry(callers, function, callees) self.skip_separator() def parse_subentries(self): subentries = {} while self.match_secondary(): subentry = self.parse_subentry() subentries[subentry.id] = subentry return subentries def parse_subentry(self): entry = Struct() line = self.consume() mo = self.entry_re.match(line) if not mo: raise ParseError('failed to parse', line) fields = mo.groupdict() entry.samples = int(mo.group(1)) if 'source' in fields and fields['source'] != '(no location information)': source = fields['source'] filename, lineno = source.split(':') entry.filename = filename entry.lineno = int(lineno) else: source = '' entry.filename = None entry.lineno = None entry.image = fields.get('image', '') entry.application = fields.get('application', '') if 'symbol' in fields and fields['symbol'] != '(no symbols)': entry.symbol = fields['symbol'] else: entry.symbol = '' if entry.symbol.startswith('"') and entry.symbol.endswith('"'): entry.symbol = entry.symbol[1:-1] entry.id = ':'.join((entry.application, entry.image, source, entry.symbol)) entry.self = fields.get('self', None) != None if entry.self: entry.id += ':self' if entry.symbol: entry.name = entry.symbol else: entry.name = entry.image return entry def skip_separator(self): while not self.match_separator(): self.consume() self.consume() def match_header(self): line = self.lookahead() return line.startswith('samples') def match_separator(self): line = self.lookahead() return line == '-'*len(line) def match_primary(self): line = self.lookahead() return not line[:1].isspace() def match_secondary(self): line = self.lookahead() return line[:1].isspace() class HProfParser(LineParser): """Parser for java hprof output See also: - http://java.sun.com/developer/technicalArticles/Programming/HPROF.html """ trace_re = re.compile(r'\t(.*)\((.*):(.*)\)') trace_id_re = re.compile(r'^TRACE (\d+):$') def __init__(self, infile): LineParser.__init__(self, infile) self.traces = {} self.samples = {} def parse(self): # read lookahead self.readline() while not self.lookahead().startswith('------'): self.consume() while not self.lookahead().startswith('TRACE '): self.consume() self.parse_traces() while not self.lookahead().startswith('CPU'): self.consume() self.parse_samples() # populate the profile profile = Profile() profile[SAMPLES] = 0 functions = {} # build up callgraph for id, trace in compat_iteritems(self.traces): if not id in self.samples: continue mtime = self.samples[id][0] last = None for func, file, line in trace: if not func in functions: function = Function(func, func) function[SAMPLES] = 0 profile.add_function(function) functions[func] = function function = functions[func] # allocate time to the deepest method in the trace if not last: function[SAMPLES] += mtime profile[SAMPLES] += mtime else: c = function.get_call(last) c[SAMPLES2] += mtime last = func # compute derived data profile.validate() profile.find_cycles() profile.ratio(TIME_RATIO, SAMPLES) profile.call_ratios(SAMPLES2) profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) return profile def parse_traces(self): while self.lookahead().startswith('TRACE '): self.parse_trace() def parse_trace(self): l = self.consume() mo = self.trace_id_re.match(l) tid = mo.group(1) last = None trace = [] while self.lookahead().startswith('\t'): l = self.consume() match = self.trace_re.search(l) if not match: #sys.stderr.write('Invalid line: %s\n' % l) break else: function_name, file, line = match.groups() trace += [(function_name, file, line)] self.traces[int(tid)] = trace def parse_samples(self): self.consume() self.consume() while not self.lookahead().startswith('CPU'): rank, percent_self, percent_accum, count, traceid, method = self.lookahead().split() self.samples[int(traceid)] = (int(count), method) self.consume() class SysprofParser(XmlParser): def __init__(self, stream): XmlParser.__init__(self, stream) def parse(self): objects = {} nodes = {} self.element_start('profile') while self.token.type == XML_ELEMENT_START: if self.token.name_or_data == 'objects': assert not objects objects = self.parse_items('objects') elif self.token.name_or_data == 'nodes': assert not nodes nodes = self.parse_items('nodes') else: self.parse_value(self.token.name_or_data) self.element_end('profile') return self.build_profile(objects, nodes) def parse_items(self, name): assert name[-1] == 's' items = {} self.element_start(name) while self.token.type == XML_ELEMENT_START: id, values = self.parse_item(name[:-1]) assert id not in items items[id] = values self.element_end(name) return items def parse_item(self, name): attrs = self.element_start(name) id = int(attrs['id']) values = self.parse_values() self.element_end(name) return id, values def parse_values(self): values = {} while self.token.type == XML_ELEMENT_START: name = self.token.name_or_data value = self.parse_value(name) assert name not in values values[name] = value return values def parse_value(self, tag): self.element_start(tag) value = self.character_data() self.element_end(tag) if value.isdigit(): return int(value) if value.startswith('"') and value.endswith('"'): return value[1:-1] return value def build_profile(self, objects, nodes): profile = Profile() profile[SAMPLES] = 0 for id, object in compat_iteritems(objects): # Ignore fake objects (process names, modules, "Everything", "kernel", etc.) if object['self'] == 0: continue function = Function(id, object['name']) function[SAMPLES] = object['self'] profile.add_function(function) profile[SAMPLES] += function[SAMPLES] for id, node in compat_iteritems(nodes): # Ignore fake calls if node['self'] == 0: continue # Find a non-ignored parent parent_id = node['parent'] while parent_id != 0: parent = nodes[parent_id] caller_id = parent['object'] if objects[caller_id]['self'] != 0: break parent_id = parent['parent'] if parent_id == 0: continue callee_id = node['object'] assert objects[caller_id]['self'] assert objects[callee_id]['self'] function = profile.functions[caller_id] samples = node['self'] try: call = function.calls[callee_id] except KeyError: call = Call(callee_id) call[SAMPLES2] = samples function.add_call(call) else: call[SAMPLES2] += samples # Compute derived events profile.validate() profile.find_cycles() profile.ratio(TIME_RATIO, SAMPLES) profile.call_ratios(SAMPLES2) profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) return profile class XPerfParser(Parser): """Parser for CSVs generted by XPerf, from Microsoft Windows Performance Tools. """ def __init__(self, stream): Parser.__init__(self) self.stream = stream self.profile = Profile() self.profile[SAMPLES] = 0 self.column = {} def parse(self): import csv reader = csv.reader( self.stream, delimiter = ',', quotechar = None, escapechar = None, doublequote = False, skipinitialspace = True, lineterminator = '\r\n', quoting = csv.QUOTE_NONE) header = True for row in reader: if header: self.parse_header(row) header = False else: self.parse_row(row) # compute derived data self.profile.validate() self.profile.find_cycles() self.profile.ratio(TIME_RATIO, SAMPLES) self.profile.call_ratios(SAMPLES2) self.profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) return self.profile def parse_header(self, row): for column in range(len(row)): name = row[column] assert name not in self.column self.column[name] = column def parse_row(self, row): fields = {} for name, column in compat_iteritems(self.column): value = row[column] for factory in int, float: try: value = factory(value) except ValueError: pass else: break fields[name] = value process = fields['Process Name'] symbol = fields['Module'] + '!' + fields['Function'] weight = fields['Weight'] count = fields['Count'] if process == 'Idle': return function = self.get_function(process, symbol) function[SAMPLES] += weight * count self.profile[SAMPLES] += weight * count stack = fields['Stack'] if stack != '?': stack = stack.split('/') assert stack[0] == '[Root]' if stack[-1] != symbol: # XXX: some cases the sampled function does not appear in the stack stack.append(symbol) caller = None for symbol in stack[1:]: callee = self.get_function(process, symbol) if caller is not None: try: call = caller.calls[callee.id] except KeyError: call = Call(callee.id) call[SAMPLES2] = count caller.add_call(call) else: call[SAMPLES2] += count caller = callee def get_function(self, process, symbol): function_id = process + '!' + symbol try: function = self.profile.functions[function_id] except KeyError: module, name = symbol.split('!', 1) function = Function(function_id, name) function.process = process function.module = module function[SAMPLES] = 0 self.profile.add_function(function) return function class SleepyParser(Parser): """Parser for GNU gprof output. See also: - http://www.codersnotes.com/sleepy/ - http://sleepygraph.sourceforge.net/ """ stdinInput = False def __init__(self, filename): Parser.__init__(self) from zipfile import ZipFile self.database = ZipFile(filename) self.symbols = {} self.calls = {} self.profile = Profile() _symbol_re = re.compile( r'^(?P\w+)' + r'\s+"(?P[^"]*)"' + r'\s+"(?P[^"]*)"' + r'\s+"(?P[^"]*)"' + r'\s+(?P\d+)$' ) def openEntry(self, name): # Some versions of verysleepy use lowercase filenames for database_name in self.database.namelist(): if name.lower() == database_name.lower(): name = database_name break return self.database.open(name, 'rU') def parse_symbols(self): for line in self.openEntry('Symbols.txt'): line = line.decode('UTF-8') mo = self._symbol_re.match(line) if mo: symbol_id, module, procname, sourcefile, sourceline = mo.groups() function_id = ':'.join([module, procname]) try: function = self.profile.functions[function_id] except KeyError: function = Function(function_id, procname) function.module = module function[SAMPLES] = 0 self.profile.add_function(function) self.symbols[symbol_id] = function def parse_callstacks(self): for line in self.openEntry('Callstacks.txt'): line = line.decode('UTF-8') fields = line.split() samples = float(fields[0]) callstack = fields[1:] callstack = [self.symbols[symbol_id] for symbol_id in callstack] callee = callstack[0] callee[SAMPLES] += samples self.profile[SAMPLES] += samples for caller in callstack[1:]: try: call = caller.calls[callee.id] except KeyError: call = Call(callee.id) call[SAMPLES2] = samples caller.add_call(call) else: call[SAMPLES2] += samples callee = caller def parse(self): profile = self.profile profile[SAMPLES] = 0 self.parse_symbols() self.parse_callstacks() # Compute derived events profile.validate() profile.find_cycles() profile.ratio(TIME_RATIO, SAMPLES) profile.call_ratios(SAMPLES2) profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) return profile class AQtimeTable: def __init__(self, name, fields): self.name = name self.fields = fields self.field_column = {} for column in range(len(fields)): self.field_column[fields[column]] = column self.rows = [] def __len__(self): return len(self.rows) def __iter__(self): for values, children in self.rows: fields = {} for name, value in zip(self.fields, values): fields[name] = value children = dict([(child.name, child) for child in children]) yield fields, children raise StopIteration def add_row(self, values, children=()): self.rows.append((values, children)) class AQtimeParser(XmlParser): def __init__(self, stream): XmlParser.__init__(self, stream) self.tables = {} def parse(self): self.element_start('AQtime_Results') self.parse_headers() results = self.parse_results() self.element_end('AQtime_Results') return self.build_profile(results) def parse_headers(self): self.element_start('HEADERS') while self.token.type == XML_ELEMENT_START: self.parse_table_header() self.element_end('HEADERS') def parse_table_header(self): attrs = self.element_start('TABLE_HEADER') name = attrs['NAME'] id = int(attrs['ID']) field_types = [] field_names = [] while self.token.type == XML_ELEMENT_START: field_type, field_name = self.parse_table_field() field_types.append(field_type) field_names.append(field_name) self.element_end('TABLE_HEADER') self.tables[id] = name, field_types, field_names def parse_table_field(self): attrs = self.element_start('TABLE_FIELD') type = attrs['TYPE'] name = self.character_data() self.element_end('TABLE_FIELD') return type, name def parse_results(self): self.element_start('RESULTS') table = self.parse_data() self.element_end('RESULTS') return table def parse_data(self): rows = [] attrs = self.element_start('DATA') table_id = int(attrs['TABLE_ID']) table_name, field_types, field_names = self.tables[table_id] table = AQtimeTable(table_name, field_names) while self.token.type == XML_ELEMENT_START: row, children = self.parse_row(field_types) table.add_row(row, children) self.element_end('DATA') return table def parse_row(self, field_types): row = [None]*len(field_types) children = [] self.element_start('ROW') while self.token.type == XML_ELEMENT_START: if self.token.name_or_data == 'FIELD': field_id, field_value = self.parse_field(field_types) row[field_id] = field_value elif self.token.name_or_data == 'CHILDREN': children = self.parse_children() else: raise XmlTokenMismatch(" or ", self.token) self.element_end('ROW') return row, children def parse_field(self, field_types): attrs = self.element_start('FIELD') id = int(attrs['ID']) type = field_types[id] value = self.character_data() if type == 'Integer': value = int(value) elif type == 'Float': value = float(value) elif type == 'Address': value = int(value) elif type == 'String': pass else: assert False self.element_end('FIELD') return id, value def parse_children(self): children = [] self.element_start('CHILDREN') while self.token.type == XML_ELEMENT_START: table = self.parse_data() assert table.name not in children children.append(table) self.element_end('CHILDREN') return children def build_profile(self, results): assert results.name == 'Routines' profile = Profile() profile[TIME] = 0.0 for fields, tables in results: function = self.build_function(fields) children = tables['Children'] for fields, _ in children: call = self.build_call(fields) function.add_call(call) profile.add_function(function) profile[TIME] = profile[TIME] + function[TIME] profile[TOTAL_TIME] = profile[TIME] profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME) return profile def build_function(self, fields): function = Function(self.build_id(fields), self.build_name(fields)) function[TIME] = fields['Time'] function[TOTAL_TIME] = fields['Time with Children'] #function[TIME_RATIO] = fields['% Time']/100.0 #function[TOTAL_TIME_RATIO] = fields['% with Children']/100.0 return function def build_call(self, fields): call = Call(self.build_id(fields)) call[TIME] = fields['Time'] call[TOTAL_TIME] = fields['Time with Children'] #call[TIME_RATIO] = fields['% Time']/100.0 #call[TOTAL_TIME_RATIO] = fields['% with Children']/100.0 return call def build_id(self, fields): return ':'.join([fields['Module Name'], fields['Unit Name'], fields['Routine Name']]) def build_name(self, fields): # TODO: use more fields return fields['Routine Name'] class PstatsParser: """Parser python profiling statistics saved with te pstats module.""" stdinInput = False multipleInput = True def __init__(self, *filename): import pstats try: self.stats = pstats.Stats(*filename) except ValueError: if sys.version_info[0] >= 3: raise import hotshot.stats self.stats = hotshot.stats.load(filename[0]) self.profile = Profile() self.function_ids = {} def get_function_name(self, key): filename, line, name = key module = os.path.splitext(filename)[0] module = os.path.basename(module) return "%s:%d:%s" % (module, line, name) def get_function(self, key): try: id = self.function_ids[key] except KeyError: id = len(self.function_ids) name = self.get_function_name(key) function = Function(id, name) self.profile.functions[id] = function self.function_ids[key] = id else: function = self.profile.functions[id] return function def parse(self): self.profile[TIME] = 0.0 self.profile[TOTAL_TIME] = self.stats.total_tt for fn, (cc, nc, tt, ct, callers) in compat_iteritems(self.stats.stats): callee = self.get_function(fn) callee.called = nc callee[TOTAL_TIME] = ct callee[TIME] = tt self.profile[TIME] += tt self.profile[TOTAL_TIME] = max(self.profile[TOTAL_TIME], ct) for fn, value in compat_iteritems(callers): caller = self.get_function(fn) call = Call(callee.id) if isinstance(value, tuple): for i in xrange(0, len(value), 4): nc, cc, tt, ct = value[i:i+4] if CALLS in call: call[CALLS] += cc else: call[CALLS] = cc if TOTAL_TIME in call: call[TOTAL_TIME] += ct else: call[TOTAL_TIME] = ct else: call[CALLS] = value call[TOTAL_TIME] = ratio(value, nc)*ct caller.add_call(call) #self.stats.print_stats() #self.stats.print_callees() # Compute derived events self.profile.validate() self.profile.ratio(TIME_RATIO, TIME) self.profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME) return self.profile class Theme: def __init__(self, bgcolor = (0.0, 0.0, 1.0), mincolor = (0.0, 0.0, 0.0), maxcolor = (0.0, 0.0, 1.0), fontname = "Arial", fontcolor = "white", nodestyle = "filled", minfontsize = 10.0, maxfontsize = 10.0, minpenwidth = 0.5, maxpenwidth = 4.0, gamma = 2.2, skew = 1.0): self.bgcolor = bgcolor self.mincolor = mincolor self.maxcolor = maxcolor self.fontname = fontname self.fontcolor = fontcolor self.nodestyle = nodestyle self.minfontsize = minfontsize self.maxfontsize = maxfontsize self.minpenwidth = minpenwidth self.maxpenwidth = maxpenwidth self.gamma = gamma self.skew = skew def graph_bgcolor(self): return self.hsl_to_rgb(*self.bgcolor) def graph_fontname(self): return self.fontname def graph_fontcolor(self): return self.fontcolor def graph_fontsize(self): return self.minfontsize def node_bgcolor(self, weight): return self.color(weight) def node_fgcolor(self, weight): if self.nodestyle == "filled": return self.graph_bgcolor() else: return self.color(weight) def node_fontsize(self, weight): return self.fontsize(weight) def node_style(self): return self.nodestyle def edge_color(self, weight): return self.color(weight) def edge_fontsize(self, weight): return self.fontsize(weight) def edge_penwidth(self, weight): return max(weight*self.maxpenwidth, self.minpenwidth) def edge_arrowsize(self, weight): return 0.5 * math.sqrt(self.edge_penwidth(weight)) def fontsize(self, weight): return max(weight**2 * self.maxfontsize, self.minfontsize) def color(self, weight): weight = min(max(weight, 0.0), 1.0) hmin, smin, lmin = self.mincolor hmax, smax, lmax = self.maxcolor if self.skew < 0: raise ValueError("Skew must be greater than 0") elif self.skew == 1.0: h = hmin + weight*(hmax - hmin) s = smin + weight*(smax - smin) l = lmin + weight*(lmax - lmin) else: base = self.skew h = hmin + ((hmax-hmin)*(-1.0 + (base ** weight)) / (base - 1.0)) s = smin + ((smax-smin)*(-1.0 + (base ** weight)) / (base - 1.0)) l = lmin + ((lmax-lmin)*(-1.0 + (base ** weight)) / (base - 1.0)) return self.hsl_to_rgb(h, s, l) def hsl_to_rgb(self, h, s, l): """Convert a color from HSL color-model to RGB. See also: - http://www.w3.org/TR/css3-color/#hsl-color """ h = h % 1.0 s = min(max(s, 0.0), 1.0) l = min(max(l, 0.0), 1.0) if l <= 0.5: m2 = l*(s + 1.0) else: m2 = l + s - l*s m1 = l*2.0 - m2 r = self._hue_to_rgb(m1, m2, h + 1.0/3.0) g = self._hue_to_rgb(m1, m2, h) b = self._hue_to_rgb(m1, m2, h - 1.0/3.0) # Apply gamma correction r **= self.gamma g **= self.gamma b **= self.gamma return (r, g, b) def _hue_to_rgb(self, m1, m2, h): if h < 0.0: h += 1.0 elif h > 1.0: h -= 1.0 if h*6 < 1.0: return m1 + (m2 - m1)*h*6.0 elif h*2 < 1.0: return m2 elif h*3 < 2.0: return m1 + (m2 - m1)*(2.0/3.0 - h)*6.0 else: return m1 TEMPERATURE_COLORMAP = Theme( mincolor = (2.0/3.0, 0.80, 0.25), # dark blue maxcolor = (0.0, 1.0, 0.5), # satured red gamma = 1.0 ) PINK_COLORMAP = Theme( mincolor = (0.0, 1.0, 0.90), # pink maxcolor = (0.0, 1.0, 0.5), # satured red ) GRAY_COLORMAP = Theme( mincolor = (0.0, 0.0, 0.85), # light gray maxcolor = (0.0, 0.0, 0.0), # black ) BW_COLORMAP = Theme( minfontsize = 8.0, maxfontsize = 24.0, mincolor = (0.0, 0.0, 0.0), # black maxcolor = (0.0, 0.0, 0.0), # black minpenwidth = 0.1, maxpenwidth = 8.0, ) PRINT_COLORMAP = Theme( minfontsize = 18.0, maxfontsize = 30.0, fontcolor = "black", nodestyle = "solid", mincolor = (0.0, 0.0, 0.0), # black maxcolor = (0.0, 0.0, 0.0), # black minpenwidth = 0.1, maxpenwidth = 8.0, ) class DotWriter: """Writer for the DOT language. See also: - "The DOT Language" specification http://www.graphviz.org/doc/info/lang.html """ strip = False wrap = False def __init__(self, fp): self.fp = fp def wrap_function_name(self, name): """Split the function name on multiple lines.""" if len(name) > 32: ratio = 2.0/3.0 height = max(int(len(name)/(1.0 - ratio) + 0.5), 1) width = max(len(name)/height, 32) # TODO: break lines in symbols name = textwrap.fill(name, width, break_long_words=False) # Take away spaces name = name.replace(", ", ",") name = name.replace("> >", ">>") name = name.replace("> >", ">>") # catch consecutive return name show_function_events = [TOTAL_TIME_RATIO, TIME_RATIO] show_edge_events = [TOTAL_TIME_RATIO, CALLS] def graph(self, profile, theme): self.begin_graph() fontname = theme.graph_fontname() fontcolor = theme.graph_fontcolor() nodestyle = theme.node_style() self.attr('graph', fontname=fontname, ranksep=0.25, nodesep=0.125) self.attr('node', fontname=fontname, shape="box", style=nodestyle, fontcolor=fontcolor, width=0, height=0) self.attr('edge', fontname=fontname) for function in compat_itervalues(profile.functions): labels = [] if function.process is not None: labels.append(function.process) if function.module is not None: labels.append(function.module) if self.strip: function_name = function.stripped_name() else: function_name = function.name if self.wrap: function_name = self.wrap_function_name(function_name) labels.append(function_name) for event in self.show_function_events: if event in function.events: label = event.format(function[event]) labels.append(label) if function.called is not None: labels.append("%u%s" % (function.called, MULTIPLICATION_SIGN)) if function.weight is not None: weight = function.weight else: weight = 0.0 label = '\n'.join(labels) self.node(function.id, label = label, color = self.color(theme.node_bgcolor(weight)), fontcolor = self.color(theme.node_fgcolor(weight)), fontsize = "%.2f" % theme.node_fontsize(weight), ) for call in compat_itervalues(function.calls): callee = profile.functions[call.callee_id] labels = [] for event in self.show_edge_events: if event in call.events: label = event.format(call[event]) labels.append(label) if call.weight is not None: weight = call.weight elif callee.weight is not None: weight = callee.weight else: weight = 0.0 label = '\n'.join(labels) self.edge(function.id, call.callee_id, label = label, color = self.color(theme.edge_color(weight)), fontcolor = self.color(theme.edge_color(weight)), fontsize = "%.2f" % theme.edge_fontsize(weight), penwidth = "%.2f" % theme.edge_penwidth(weight), labeldistance = "%.2f" % theme.edge_penwidth(weight), arrowsize = "%.2f" % theme.edge_arrowsize(weight), ) self.end_graph() def begin_graph(self): self.write('digraph {\n') def end_graph(self): self.write('}\n') def attr(self, what, **attrs): self.write("\t") self.write(what) self.attr_list(attrs) self.write(";\n") def node(self, node, **attrs): self.write("\t") self.id(node) self.attr_list(attrs) self.write(";\n") def edge(self, src, dst, **attrs): self.write("\t") self.id(src) self.write(" -> ") self.id(dst) self.attr_list(attrs) self.write(";\n") def attr_list(self, attrs): if not attrs: return self.write(' [') first = True for name, value in compat_iteritems(attrs): if first: first = False else: self.write(", ") self.id(name) self.write('=') self.id(value) self.write(']') def id(self, id): if isinstance(id, (int, float)): s = str(id) elif isinstance(id, basestring): if id.isalnum() and not id.startswith('0x'): s = id else: s = self.escape(id) else: raise TypeError self.write(s) def color(self, rgb): r, g, b = rgb def float2int(f): if f <= 0.0: return 0 if f >= 1.0: return 255 return int(255.0*f + 0.5) return "#" + "".join(["%02x" % float2int(c) for c in (r, g, b)]) def escape(self, s): if not PYTHON_3: s = s.encode('utf-8') s = s.replace('\\', r'\\') s = s.replace('\n', r'\n') s = s.replace('\t', r'\t') s = s.replace('"', r'\"') return '"' + s + '"' def write(self, s): self.fp.write(s) class Main: """Main program.""" themes = { "color": TEMPERATURE_COLORMAP, "pink": PINK_COLORMAP, "gray": GRAY_COLORMAP, "bw": BW_COLORMAP, "print": PRINT_COLORMAP, } formats = { "aqtime": AQtimeParser, "axe": AXEParser, "callgrind": CallgrindParser, "hprof": HProfParser, "oprofile": OprofileParser, "perf": PerfParser, "prof": GprofParser, "pstats": PstatsParser, "sleepy": SleepyParser, "sysprof": SysprofParser, "xperf": XPerfParser, } def naturalJoin(self, values): if len(values) >= 2: return ', '.join(values[:-1]) + ' or ' + values[-1] else: return ''.join(values) def main(self): """Main program.""" global totalMethod formatNames = list(self.formats.keys()) formatNames.sort() optparser = optparse.OptionParser( usage="\n\t%prog [options] [file] ...") optparser.add_option( '-o', '--output', metavar='FILE', type="string", dest="output", help="output filename [stdout]") optparser.add_option( '-n', '--node-thres', metavar='PERCENTAGE', type="float", dest="node_thres", default=0.5, help="eliminate nodes below this threshold [default: %default]") optparser.add_option( '-e', '--edge-thres', metavar='PERCENTAGE', type="float", dest="edge_thres", default=0.1, help="eliminate edges below this threshold [default: %default]") optparser.add_option( '-f', '--format', type="choice", choices=formatNames, dest="format", default="prof", help="profile format: %s [default: %%default]" % self.naturalJoin(formatNames)) optparser.add_option( '--total', type="choice", choices=('callratios', 'callstacks'), dest="totalMethod", default=totalMethod, help="preferred method of calculating total time: callratios or callstacks (currently affects only perf format) [default: %default]") optparser.add_option( '-c', '--colormap', type="choice", choices=('color', 'pink', 'gray', 'bw', 'print'), dest="theme", default="color", help="color map: color, pink, gray, bw, or print [default: %default]") optparser.add_option( '-s', '--strip', action="store_true", dest="strip", default=False, help="strip function parameters, template parameters, and const modifiers from demangled C++ function names") optparser.add_option( '-w', '--wrap', action="store_true", dest="wrap", default=False, help="wrap function names") optparser.add_option( '--show-samples', action="store_true", dest="show_samples", default=False, help="show function samples") # add option to create subtree or show paths optparser.add_option( '-z', '--root', type="string", dest="root", default="", help="prune call graph to show only descendants of specified root function") optparser.add_option( '-l', '--leaf', type="string", dest="leaf", default="", help="prune call graph to show only ancestors of specified leaf function") # add a new option to control skew of the colorization curve optparser.add_option( '--skew', type="float", dest="theme_skew", default=1.0, help="skew the colorization curve. Values < 1.0 give more variety to lower percentages. Values > 1.0 give less variety to lower percentages") (self.options, self.args) = optparser.parse_args(sys.argv[1:]) if len(self.args) > 1 and self.options.format != 'pstats': optparser.error('incorrect number of arguments') try: self.theme = self.themes[self.options.theme] except KeyError: optparser.error('invalid colormap \'%s\'' % self.options.theme) # set skew on the theme now that it has been picked. if self.options.theme_skew: self.theme.skew = self.options.theme_skew totalMethod = self.options.totalMethod try: Format = self.formats[self.options.format] except KeyError: optparser.error('invalid format \'%s\'' % self.options.format) if Format.stdinInput: if not self.args: fp = sys.stdin else: fp = open(self.args[0], 'rt') parser = Format(fp) elif Format.multipleInput: if not self.args: optparser.error('at least a file must be specified for %s input' % self.options.format) parser = Format(*self.args) else: if len(self.args) != 1: optparser.error('exactly one file must be specified for %s input' % self.options.format) parser = Format(self.args[0]) self.profile = parser.parse() if self.options.output is None: self.output = sys.stdout else: if PYTHON_3: self.output = open(self.options.output, 'wt', encoding='UTF-8') else: self.output = open(self.options.output, 'wt') self.write_graph() def write_graph(self): dot = DotWriter(self.output) dot.strip = self.options.strip dot.wrap = self.options.wrap if self.options.show_samples: dot.show_function_events.append(SAMPLES) profile = self.profile profile.prune(self.options.node_thres/100.0, self.options.edge_thres/100.0) if self.options.root: rootId = profile.getFunctionId(self.options.root) if not rootId: sys.stderr.write('root node ' + self.options.root + ' not found (might already be pruned : try -e0 -n0 flags)\n') sys.exit(1) profile.prune_root(rootId) if self.options.leaf: leafId = profile.getFunctionId(self.options.leaf) if not leafId: sys.stderr.write('leaf node ' + self.options.leaf + ' not found (maybe already pruned : try -e0 -n0 flags)\n') sys.exit(1) profile.prune_leaf(leafId) dot.graph(profile, self.theme) if __name__ == '__main__': Main().main()