The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# vim: ts=8 sw=4 expandtab:
##########################################################
## This script is part of the Devel::NYTProf distribution
##
## Copyright, contact and other information can be found
## at the bottom of this file, or by going to:
## http://metacpan.org/release/Devel-NYTProf/
##
###########################################################
package Devel::NYTProf::Reader;

our $VERSION = '4.06';

use warnings;
use strict;
use Carp;
use Config;

use List::Util qw(sum max);
use Data::Dumper;

use Devel::NYTProf::Data;
use Devel::NYTProf::Util qw(
    fmt_float
    fmt_time
    html_safe_filename
    calculate_median_absolute_deviation
    trace_level
);

# These control the limits for what the script will consider ok to severe times
# specified in standard deviations from the mean time
use constant SEVERITY_SEVERE => 2.0;    # above this deviation, a bottleneck
use constant SEVERITY_BAD    => 1.0;
use constant SEVERITY_GOOD   => 0.5;    # within this deviation, okay


# Static class variables
our $FLOAT_FORMAT = $Config{nvfformat};
$FLOAT_FORMAT =~ s/"//g;

# Class methods
sub new {
    my $class = shift;
    my $file  = shift;
    my $opts  = shift || {};

    my $self = {
        file => $file || 'nytprof.out',
        output_dir => '.',
        suffix     => '.csv',
        header     => "# Profile data generated by Devel::NYTProf::Reader\n"
            . "# Version: v$Devel::NYTProf::Core::VERSION\n"
            . "# More information at http://metacpan.org/release/Devel-NYTProf/\n"
            . "# Format: time,calls,time/call,code\n",
        datastart => '',
        mk_report_source_line => undef,
        mk_report_xsub_line   => undef,
        mk_report_separator_line => undef,
        line      => [
            {},
            {value => 'time',      end => ',', default => '0'},
            {value => 'calls',     end => ',', default => '0'},
            {value => 'time/call', end => ',', default => '0'},
            {value => 'source',    end => '',  default => ''},
            {end   => "\n"}
        ],
        dataend  => '',
        footer   => '',
        merged_fids => '',
        taintmsg => "# WARNING!\n"
            . "# The source file used in generating this report has been modified\n"
            . "# since generating the profiler database.  It might be out of sync\n",
        sawampersand => "# NOTE!\n"
            . "# This file uses special regexp match variables that impact the performance\n"
            . "# of all regular expression in the program!\n"
            . "# See WARNING in http://perldoc.perl.org/perlre.html#Capture-buffers\n",
    };

    bless($self, $class);
    $self->{profile} = Devel::NYTProf::Data->new({
        %$opts,
        filename => $self->{file},
    });

    return $self;
}



##
sub set_param {
    my ($self, $param, $value) = @_;

    if (!exists $self->{$param}) {
        confess "Attempt to set $param to $value failed: $param is not a valid " . "parameter\n";
    }
    else {
        return $self->{$param} unless defined($value);
        $self->{$param} = $value;
    }
    undef;
}


sub get_param {
    my ($self, $param, $code_args) = @_;
    my $value = $self->{$param};
    if (ref $value eq 'CODE') {
        $code_args ||= [];
        $value = $value->(@$code_args);
    }
    return $value;
}

##
sub file_has_been_modified {
    my $self = shift;
    my $file = shift;
    return undef unless -f $file;
    my $mtime = (stat $file)[9];
    return ($mtime > $self->{profile}{attribute}{basetime});
}

##
sub _output_additional {
    my ($self, $fname, $content) = @_;
    open(OUT, '>', "$self->{output_dir}/$fname")
        or confess "Unable to open $self->{output_dir}/$fname for writing; $!\n";
    print OUT $content;
    close OUT;
}

##
sub output_dir {
    my ($self, $dir) = @_;
    return $self->{output_dir} unless defined($dir);
    if (!mkdir $dir) {
        confess "Unable to create directory $dir: $!\n" if !$! =~ /exists/;
    }
    $self->{output_dir} = $dir;
}

##
sub report {
    my $self = shift;
    my ($opts) = @_;

    my $level_additional_sub = $opts->{level_additional};
    my $profile              = $self->{profile};
    my $modes                = $profile->get_profile_levels;
    my @levels = grep { {reverse %$modes}->{$_} } qw(sub block line);
    for my $level (@levels) {
        print "Writing $level reports to $self->{output_dir} directory\n"
            unless $opts->{quiet};
        $self->_generate_report($profile, $level,
            show_progress => (not $opts->{quiet} and -t STDOUT)
        );
        $level_additional_sub->($profile, $level)
            if $level_additional_sub;
    }
}

sub current_level {
    my $self = shift;
    $self->{current_level} = shift if @_;
    return $self->{current_level} || 'line';
}

sub fname_for_fileinfo {
    my ($self, $fi, $level) = @_;
    confess "No fileinfo" unless $fi;
    $level ||= $self->current_level;

    my $fname = $fi->filename_without_inc;

    # We want to have descriptive and unambiguous filename
    # but we don't want to risk failure due to filenames being longer
    # than MAXPATH (including the length of whatever dir we're writing
    # the report files into). So we truncate to the last component if
    # the filenames seems 'dangerously long'. XXX be smarter about this.
    # This is safe from ambiguity because we add the fid to the filename below.
    my $max_len = $ENV{NYTPROF_FNAME_TRIM} || 50;
    $fname =~ s!/.*/!/.../! if length($fname) > $max_len; # remove dir path
    $fname = "TOOLONG"      if length($fname) > $max_len; # just in case

    $fname = html_safe_filename($fname);
    $fname .= "-".$fi->fid; # to ensure uniqueness and for info
    $fname .= "-$level" if $level;

    return $fname;
}


##
sub _generate_report {
    my $self = shift;
    my ($profile, $LEVEL, %opts) = @_;

    $self->current_level($LEVEL);

    my @all_fileinfos = $profile->all_fileinfos
        or carp "Profile report data contains no files";

    #$profile->dump_profile_data({ filehandle => \*STDERR, separator=>"\t", });

    my @fis = @all_fileinfos;
    if ($LEVEL ne 'line') {
        # we only generate line-level reports for evals
        # for efficiency and because some data model editing only
        # is only implemented for line-level data
        @fis = grep { not $_->is_eval } @fis;
    }

    my $progress;
    foreach my $fi (@fis) {

        if ($opts{show_progress}) {
            local $| = 1;
            ++$progress;
            printf "\r %3d%% ... ", $progress/@fis*100;
        }

        my $meta = $fi->meta;
        my $filestr = $fi->filename;

        # { linenumber => { subname => [ count, time ] } }
        my $subcalls_at_line = { %{ $fi->sub_call_lines } };
        my $subcalls_max_line = max( keys %$subcalls_at_line ) || 0;

        # { linenumber => [ $subinfo, ... ] }
        my $subdefs_at_line = { %{ $profile->subs_defined_in_file_by_line($filestr) } };
        my $subdefs_max_line = max( keys %$subdefs_at_line ) || 0;
        delete $subdefs_at_line->{0}; # xsubs handled separately

        # { linenumber => { fid => $fileinfo } }
        my $evals_at_line = { %{ $fi->evals_by_line } };
        my $evals_max_line = max( keys %$evals_at_line ) || 0;

        # note that a file may have no source lines executed, so no keys here
        # (but is included because some xsubs in the package were executed)
        my $lines_array = $fi->line_time_data([$LEVEL]) || [];
        my $src_max_line = scalar @$lines_array;

        for ($src_max_line, $subcalls_max_line, $subdefs_max_line, $evals_max_line) {
            next if $_ < 2**16;
            warn "Ignoring indication that $filestr has $_ lines! (Possibly corrupt data)\n";
            $_ = 0;
        }

        my $max_linenum = max(
            $src_max_line,
            $subcalls_max_line,
            $subdefs_max_line,
            $evals_max_line,
        );

        warn sprintf "%s max lines: %s (stmts %s, subcalls %s, subdefs %s, evals %s)\n",
                $filestr, $max_linenum, scalar @$lines_array,
                $subcalls_max_line, $subdefs_max_line, $evals_max_line
            if trace_level() >= 4 or $max_linenum > 2**15;

        my %stats_accum;           # holds all line times. used to find median
        my %stats_by_line;         # holds individual line stats
        my $runningTotalTime = 0;  # holds the running total
        # (should equal sum of $stats_accum)
        my $runningTotalCalls = 0; # holds the running total number of calls.

        for (my $linenum = 0; $linenum <= $max_linenum; ++$linenum) {

            if (my $subdefs = delete $subdefs_at_line->{$linenum}) {
                $stats_by_line{$linenum}->{'subdef_info'}  = $subdefs;
            }

            if (my $subcalls = delete $subcalls_at_line->{$linenum}) {
                my $line_stats = $stats_by_line{$linenum} ||= {};

                $line_stats->{'subcall_info'}  = $subcalls;
                $line_stats->{'subcall_count'} = sum(map { $_->[0] } values %$subcalls);
                $line_stats->{'subcall_time'}  = sum(map { $_->[1] } values %$subcalls);

                push @{$stats_accum{$_}}, $line_stats->{$_}
                    for (qw(subcall_count subcall_time));
            }

            if (my $evalcalls = delete $evals_at_line->{$linenum}) {
                my $line_stats = $stats_by_line{$linenum} ||= {};

                # %$evals => { fid => $fileinfo }
                $line_stats->{'evalcall_info'}  = $evalcalls;
                $line_stats->{'evalcall_count'} = values %$evalcalls;

                # get list of evals, including nested evals
                my @eval_fis = map { ($_, $_->has_evals(1)) } values %$evalcalls;
                $line_stats->{'evalcall_count_nested'} = @eval_fis;
                $line_stats->{'evalcall_stmts_time_nested'} = sum(
                    map { $_->sum_of_stmts_time } @eval_fis);
            }

            if (my $stmts = $lines_array->[$linenum]) {
                next if !@$stmts; # XXX happens for evals, investigate

                my ($stmt_time, $stmt_count) = @$stmts;
                my $line_stats = $stats_by_line{$linenum} ||= {};

                # The debugger cannot stop on BEGIN{...} lines.  A line in a begin
                # may set a scalar reference to something that needs to be eval'd later.
                # as a result, if the variable is expanded outside of the BEGIN, we'll
                # see the original BEGIN line, but it won't have any calls or times
                # associated. This will cause a divide by zero error.
                $stmt_count ||= 1;

                $line_stats->{'time'}  = $stmt_time;
                $line_stats->{'calls'} = $stmt_count;
                $line_stats->{'time/call'} = $stmt_time/$stmt_count;

                push @{$stats_accum{$_}}, $line_stats->{$_}
                    for (qw(time calls time/call));

                $runningTotalTime  += $stmt_time;
                $runningTotalCalls += $stmt_count;
            }

            warn "$linenum: @{[ %{ $stats_by_line{$linenum} } ]}\n"
                if trace_level() >= 3 && $stats_by_line{$linenum};
        }

        warn "unprocessed keys in subdefs_at_line: @{[ keys %$subdefs_at_line ]}\n"
            if %$subdefs_at_line;
        warn "unprocessed keys in subcalls_at_line: @{[ keys %$subcalls_at_line ]}\n"
            if %$subcalls_at_line;
        warn "unprocessed keys in evals_at_line: @{[ keys %$evals_at_line ]}\n"
            if %$evals_at_line;

        $meta->{'time'}      = $runningTotalTime;
        $meta->{'calls'}     = $runningTotalCalls;
        $meta->{'time/call'} =
            ($runningTotalCalls) ? $runningTotalTime / $runningTotalCalls: 0;

        # Use Median Absolute Deviation Formula to get file deviations for each of
        # calls, time and time/call values
        my %stats_for_file = (
            'calls'     => calculate_median_absolute_deviation($stats_accum{'calls'}||[]),
            'time'      => calculate_median_absolute_deviation($stats_accum{'time'}||[]),
            'time/call' => calculate_median_absolute_deviation($stats_accum{'time/call'}||[]),
            subcall_count => calculate_median_absolute_deviation($stats_accum{subcall_count}||[]),
            subcall_time  => calculate_median_absolute_deviation($stats_accum{subcall_time}||[]),
        );

        # the output file name that will be open later.  Not including directory at this time.
        # keep here so that the variable replacement subs can get at it.
        my $fname = $self->fname_for_fileinfo($fi) . $self->{suffix};

        # localize header and footer for variable replacement
        my $header    = $self->get_param('header',    [$profile, $fi, $fname, $LEVEL]);
        my $datastart = $self->get_param('datastart', [$profile, $fi]);
        my $dataend   = $self->get_param('dataend',   [$profile, $fi]);
        my $FILE      = $filestr;
#warn Dumper(\%stats_by_line);
        # open output file
        #warn "$self->{output_dir}/$fname";
        open(OUT, ">", "$self->{output_dir}/$fname")
            or confess "Unable to open $self->{output_dir}/$fname " . "for writing: $!\n";

        # begin output
        print OUT $header;

        # If we don't have savesrc for the file then we'll be reading the current
        # file contents which may have changed since the profile was run.
        # In this case we need to warn the user as the report would be garbled.
        print OUT $self->get_param('taintmsg', [$profile, $fi])
            if !$fi->has_savesrc and $self->file_has_been_modified($filestr);

        print OUT $self->get_param('sawampersand', [$profile, $fi])
            if $profile->{attribute}{sawampersand_fid}
            && $fi->fid == $profile->{attribute}{sawampersand_fid};

        print OUT $self->get_param('merged_fids', [$profile, $fi])
            if $fi->meta->{merged_fids};

        print OUT $datastart;

        my $LINE = 1;    # line number in source code
        my $src_lines = $fi->srclines_array;
        if (!$src_lines) { # no savesrc, and no file available

            my $msg;
            if ($fi->is_fake) {
                # eg the "/unknown-eval-invoker"
                $msg = "No source code available for synthetic (fake) file $filestr.",
            }
            elsif ($fi->is_eval) {
                $msg = "No source code available for string eval $filestr.\nYou probably need to use a more recent version of perl. See savesrc option in documentation.",
            }
            elsif ($filestr =~ m{^/loader/0x[0-9a-zA-Z]+/}) {
                # a synthetic file name that perl assigns when reading
                # code returned by a CODE ref in @INC
                $msg = "No source code available for 'file' loaded via CODE reference in \@INC.\nSee savesrc option in documentation.",
            }
            elsif (not $fi->is_file) {
                $msg = "No source code available for non-file '$filestr'.\nYou probably need to use a more recent version of perl. See savesrc option in documentation.",
            }
            else {
                $msg = "Unable to open '$filestr' for reading: $!";

                # clarify some current Moose limitations XXX
                if ($filestr =~ m!/(accessor .*) defined at /!) {
                    $msg = "Source for generated Moose $1 isn't available ($filestr: $!)";
                }
                elsif ($filestr =~ m!/(generated method \(unknown origin\))!) {
                    $msg = "Source for Moose $1 isn't available ($filestr: $!)";
                }

                # the report will not be complete, but this doesn't need to be fatal
                my $hint = '';
                $hint .= " Try running $0 in the same directory as you ran Devel::NYTProf, "
                      . "or ensure \@INC is correct."
                    if $filestr ne '-e'
                    and $filestr !~ m:^/:
                    and not our $_generate_report_inc_hint++;                # only once

                warn "$msg$hint\n"
                    unless our $_generate_report_filestr_warn->{$filestr}++; # only once per filestr

            }

            $src_lines = [ $msg ];
            $LINE = 0; # start numbering from 0 to flag fake contents
        }

        # ensure we don't have any undef source lines
        # (to avoid warnings from the code below)
        my $src_undefs;
        defined $_ or $_ = '' && ++$src_undefs for @$src_lines;
        # XXX shouldn't be need but don't have a test case so grumble
        # about it in the hope of getting a test case
        warn sprintf "Saw %d missing (undef) lines in the %d lines of source code for %s\n",
                $src_undefs, scalar @$src_lines, $filestr
            if $src_undefs;

        # Since we use @$src_lines to drive the report generation, pad the array to
        # ensure it has enough lines to include all the available profile info.
        # Then the report is still useful even if we have no source code.
        push @$src_lines, '' while @$src_lines < $max_linenum-1;

        if (my $z = $stats_by_line{0}) {
            # typically indicates cases where we could do better
            if (trace_level()) {
                warn "$filestr has unexpected info for line 0: @{[ %$z ]}\n";
                # sub defs: used to be xsubs but they're handled separately now
                # so there are no known causes of this any more
                if (my $i = $z->{subdef_info}) {
                    warn "0: @{[ map { $_->subname } @$i ]}\n"
                }
                # sub calls: they're typically END blocks that appear to be
                # invoked from the main .pl script perl ran.
                # Also some BEGINs and things like main::CORE:ftfile
                # (see CPANDB's cpangraph script for some examples)
                if (my $i = $z->{subcall_info}) {
                    warn sprintf "0: called %20s %s\n", $_, join " ", @{ $i->{$_} }
                        for sort keys %$i;
                }
            }

            $LINE = 0;
            unshift @$src_lines, "Profile data that couldn't be associated with a specific line:";
        }

        my $line_sub = $self->{mk_report_source_line}
            or die "mk_report_source_line not set";

        my $prev_line = '-';
        while ( @$src_lines ) {
            my $line = shift @$src_lines;
            chomp $line;

            # detect a series of blank lines, e.g. a chunk of pod savesrc didn't store
            my $skip_blanks = (
                $prev_line eq '' && $line eq '' &&            # blank behind and here
                @$src_lines && $src_lines->[0] =~ /^\s*$/ &&  # blank ahead
                !$stats_by_line{$LINE}                        # nothing to report
            );

            if ($line =~ m/^\# \s* line \s+ (\d+) \b/x) {
                # XXX we should be smarter about this - patches welcome!
                # We should at least ignore the common AutoSplit case
                # which we detect and workaround elsewhere.
                warn "Ignoring '$line' directive at line $LINE - profile data for $filestr will be out of sync with source\n"
                    unless our $line_directive_warn->{$filestr}++; # once per file
            }

            print OUT $line_sub->(
                ($skip_blanks) ? "- -" : $LINE, $line,
                $stats_by_line{$LINE} || {},
                \%stats_for_file,
                $profile,
                $fi,
            );

            if ($skip_blanks) {
                while (
                    @$src_lines && $src_lines->[0] =~ /^\s*$/ &&
                    !$stats_by_line{$LINE+1}
                ) {
                    shift @$src_lines;
                    $LINE++;
                }
            }
            $prev_line = $line;
        }
        continue {
            $LINE++;
        }

        my $separator_sub = $self->{mk_report_separator_line};

        # iterate over xsubs 
        $line_sub = $self->{mk_report_xsub_line}
            or die "mk_report_xsub_line not set";
        my $subs_defined_in_file = $profile->subs_defined_in_file($filestr);
        foreach my $subname (sort keys %$subs_defined_in_file) {
            my $subinfo = $subs_defined_in_file->{$subname};
            my $kind = $subinfo->kind;

            next if $kind eq 'perl';
            next if $subinfo->calls == 0;

            if ($separator_sub) {
                print OUT $separator_sub->($profile, $fi);
                undef $separator_sub; # do mk_report_separator_line just once
            }

            print OUT $line_sub->(
                $subname,
                "sub $subname; # $kind\n\t",
                { subdef_info => [ $subinfo ], },  #stats_for_line
                undef, # stats_for_file
                $profile, $fi
            );
        }

        print OUT $dataend;
        print OUT $self->get_param('footer', [$profile, $filestr]);
        close OUT;
    }
    print "\n" if $opts{show_progress};
}


sub url_for_file {
    my ($self, $file, $anchor, $level) = @_;
    confess "No file specified" unless $file;
    $level ||= '';

    my $url = $self->{_cache}{"url_for_file,$file,$level"} ||= do {
        my $fi = $self->{profile}->fileinfo_of($file);
        $level = 'line' if $fi->is_eval;
        $self->fname_for_fileinfo($fi, $level) . ".html";
    };

    $url .= "#$anchor" if defined $anchor;
    return $url;
}

sub href_for_file {
    my $url = shift->url_for_file(@_);
    return qq{href="$url"} if $url;
    return $url;
}


sub url_for_sub {
    my ($self, $sub, %opts) = @_;
    my $profile = $self->{profile};

    my ($file, $fid, $first, $last, $fi) = $profile->file_line_range_of_sub($sub);
    return "" unless $file;
    if (!$first) {
        # use sanitized subname as label for xsubs
        # XXX must match what nytprofhtml does for xsubs
        ($first = $sub) =~ s/\W/_/g;
    }
    return $self->url_for_file($fi, $first);
}

sub href_for_sub {
    my $url = shift->url_for_sub(@_);
    return qq{href="$url"} if $url;
    return $url;
}


1;