The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package B::Deobfuscate;
use strict;
use warnings;
use vars qw( @ISA $VERSION );
use B qw( main_cv main_root main_start );
use B::Deparse;

BEGIN {
    @ISA     = 'B::Deparse';
    $VERSION = '0.20';

    for my $func (qw( begin_av init_av check_av end_av )) {

        ## no critic
        no strict 'refs';
        if ( defined &{"B::$func"} ) {
            B->import($func);
        }
        else {

           # If I couldn't create it, I'll just declare it to keep lint happy.
            eval "sub $func;";
        }
    }

    # B::perlstring was added in 5.8.0
    if ( defined &B::perlstring ) {
        B->import('perlstring');
    }
    else {
        *perlstring = sub { '"' . quotemeta( shift @_ ) . '"' };
    }

}
use B::Keywords qw( @Barewords @Symbols );

use Carp 'confess';
use IO::Handle ();
use YAML qw( LoadFile Dump );

# use Data::Postponed 'postpone_forever';
sub postpone_forever { return shift @_ }

sub load_keywords {
    my $self = shift @_;
    my $p    = $self->{ +__PACKAGE__ };

    return $p->{keywords} = {
        map { $_, undef } @Barewords,

        # Snip the sigils.
        map { substr $_, 1 } @Symbols
    };
}

sub load_unknown_dict {
    my $self = shift @_;
    my $p    = $self->{ +__PACKAGE__ };

    my $dict_data;

    # slurp the entire dictionary at once
    if ( defined( my $dict_file = $p->{unknown_dict_file} ) ) {
        open my $fh, '<', $dict_file
            or confess "Cannot open dictionary $dict_file: $!";
        local $/;    ## no critic
        $dict_data = [<$fh>];
    }
    else {
    LOAD_DICTIONARY_MODULE:
        for my $module ( $p->{unknown_dict_module}, 'PGPHashKeywords',
            'Flowers' )
        {
            next if not defined $module;
            eval "require B::Deobfuscate::Dict::$module";    ## no critic
            next if $@;

            no strict 'refs';                                ## no critic
            $dict_data = ${"B::Deobfuscate::Dict::$module"};
            last LOAD_DICTIONARY_MODULE;
        }
    }

    unless ($dict_data) {
        confess "The symbol dictionary was empty!";
    }

    my $k = $self->load_keywords;

    $p->{unknown_dict_data} = [
        sort { length $a <=> length $b or $a cmp $b }
            grep { not( /\W/ or exists $k->{$_} ) }
            split /\n/,
        $dict_data
    ];

    unless ( scalar @{ $p->{'unknown_dict_data'} } ) {
        confess "The symbol dictionary is empty!";
    }

    return;
}

sub next_short_dict_symbol {
    my $self = shift @_;
    my $p    = $self->{ +__PACKAGE__ };

    my $sym = shift @{ $p->{unknown_dict_data} };
    push @{ $p->{used_symbols} }, $sym;

    unless ($sym) {
        confess "The symbol dictionary has run out and is now empty";
    }

    return $sym;
}

sub next_long_dict_symbol {
    my $self = shift @_;
    my $p    = $self->{ +__PACKAGE__ };

    my $sym = pop @{ $p->{unknown_dict_data} };
    push @{ $p->{used_symbols} }, $sym;

    unless ($sym) {
        confess "The symbol dictionary has run out and is now empty";
    }

    return $sym;
}

sub load_user_config {
    my $self        = shift @_;
    my $p           = $self->{ +__PACKAGE__ };
    my $config_file = $p->{user_config};

    return unless $config_file;

    unless ( -f $config_file ) {
        confess "Configuration file $config_file doesn't exist";
    }

    my $config = ( LoadFile($config_file) )[0];
    $p->{globals_to_ignore} = $config->{globals_to_ignore};
    $p->{pad_symbols}       = $config->{lexicals};
    $p->{gv_symbols}        = $config->{globals};
    if ( $config->{dictionary} ) {
        $p->{unknown_dict_file} = $config->{dictionary};
    }
    if ( $config->{global_regex} ) {
        $p->{global_regex} = qr/$config->{global_regex}/;
    }

    # Symbols that are listed with an undef value actually
    # just aren't renamed at all.
    for my $symt_nym (qw/pad gv/) {
        my $symt = $p->{ $symt_nym . "_symbols" };
        for my $symt_key ( keys %$symt ) {
            if ( not defined $symt->{$symt_key} ) {
                $symt->{$symt_key} = $symt_key;
            }
        }
    }

    return;
}

sub gv_should_be_renamed {
    my ( $self, $sigil, $name ) = @_;
    my $p = $self->{ +__PACKAGE__ };
    my $k = $p->{keywords};

    confess("Undefined sigil") unless defined $sigil;
    confess("Undefined name")  unless defined $name;

# Bug 24334: $1 gets passed in w/o a sigil. Dunno why. That's wrong and broke the previous version of
# the regexp which read m{^\$\d+\z}

    # Ignore keywords.
    return
        if exists $k->{$name}
        or "$sigil$name" =~ m{^\$?\d+\z};

    if ( exists $p->{gv_symbols}{$name}
        or $name =~ $p->{gv_match} )
    {
        return 1;
    }
    return;
}

sub rename_pad {
    my ( $self, $name ) = @_;
    my $p = $self->{ +__PACKAGE__ };

    my ($sigil) = $name =~ m{^(\W+)}
        or confess "Invalid pad variable name $name";

    my $dict = $p->{pad_symbols};
    return $dict->{$name} if $dict->{$name};

    #    $dict->{$name} = $name;
    $dict->{$name} = postpone_forever $sigil . $self->next_short_dict_symbol;

    unless ( $dict->{$name} ) {
        confess "The suggested name for the lexical variable $name is empty";
    }
    return $dict->{$name};
}

sub lookup_sigil {
    my $rv = shift @_;

    return $rv =~ /(?:gv|pad|rv2)sv\z/ ? '$'
        : $rv =~ /(?:gvav|padav|av2arylen|rv2av|aelemfast|aelem|aslice)\z/
        ? '@'
        : $rv =~ /(?:padhv|rv2hv|helem|hslice)\z/ ? '%'
        : $rv =~ /rv2cv\z/                        ? '&'
        : $rv =~ /(?:gv|gelem|rv2gv)\z/           ? ''
        :

        # Nothing valid;
        ();
}

sub rename_gv {
    my ( $self, $name ) = @_;
    my $p = $self->{ +__PACKAGE__ };

    my $sigil_debug = '';
    my $sigil;
FIND_SIGIL: {
        for ( my $cx = 0; not defined $sigil; ++$cx ) {
            my ( undef, undef, undef, $rv ) = caller $cx;
            if ( not $rv ) {
                confess
                    "No sigil could be found. Please report the following text:\n$sigil_debug\n";
            }

            $sigil = lookup_sigil($rv);

            $sigil_debug .= "$cx = $rv\n";
        }
    }

    unless ( defined $sigil ) {
        confess
            "No sigil could be found. Please report the following text:\n$sigil_debug\n";
    }

    return $name unless $self->gv_should_be_renamed( $sigil, $name );

    my $dict = $p->{gv_symbols};

    my $sname = "$sigil$name";
    return $dict->{$sname} if exists $dict->{$sname};
    $dict->{$sname} = postpone_forever $self->next_long_dict_symbol;

    unless ( $dict->{$sname} ) {
        confess "$sname could not be renamed.";
    }

    return $dict->{$sname};
}

## OVERRIDE METHODS FROM B::Deparse

sub new {
    my $class = shift @_;
    my $self  = $class->SUPER::new(@_);
    my $p     = $self->{ +__PACKAGE__ } = {};
    $p->{unknown_dict_file}   = undef;
    $p->{unknown_dict_module} = undef;
    $p->{unknown_dict_data}   = undef;
    $p->{user_config}         = undef;
    $p->{gv_match}            = qw/^[[:lower:][:digit:]_]+\z/;
    $p->{pad_symbols}         = {};
    $p->{gv_symbols}          = {};
    $p->{output_yaml}         = 0;
    $p->{output_fh}           = \*STDOUT;

    while ( my $arg = shift @_ ) {
        ## no critic
        if ( $arg =~ m{^-d([^,]+)} ) {
            $p->{unknown_dict_file} = $1;
        }
        elsif ( $arg =~ m{^-D([^,]+)} ) {
            $p->{unknown_dict_module} = $1;
        }
        elsif ( $arg =~ m{^-c([^,]+)} ) {
            $p->{user_config} = $1;
        }
        elsif ( $arg =~ m{^-m/([^/]+)/} ) {
            $p->{gv_match} = $1;
        }
        elsif ( $arg =~ m{^-y} ) {
            $p->{output_yaml} = 1;
        }
    }

    $self->load_user_config;
    $self->load_unknown_dict;

    return $self;
}

sub compile {    ## no critic Complex
    my (@args) = @_;

    return sub {
        my $source = '';
        my $self   = __PACKAGE__->new(@args);

        # First deparse command-line args
        if ( defined $^I ) {    # deparse -i
            $source .= q(BEGIN { $^I = ) . perlstring($^I) . qq(; }\n);
        }
        if ($^W) {              # deparse -w
            $source .= qq(BEGIN { \$^W = $^W; }\n);
        }
        ## no critic PackageVar
        if ( $/ ne "\n" or defined $O::savebackslash ) {    # deparse -l -0
            my $fs = perlstring($/) || 'undef';
            my $bs = perlstring($O::savebackslash) || 'undef';
            $source .= qq(BEGIN { \$/ = $fs; \$\\ = $bs; }\n);
        }

        # I need to do things differently depending on the perl
        # version.
        if ( $] >= 5.008 ) {
            if ( defined &begin_av
                and begin_av->isa('B::AV') )
            {
                $self->todo( $_, 0 ) for begin_av->ARRAY;
            }
            if ( defined &check_av
                and check_av->isa('B::AV') )
            {
                $self->todo( $_, 0 ) for check_av->ARRAY;
            }
            if ( defined &init_av
                and init_av->isa('B::AV') )
            {
                $self->todo( $_, 0 ) for init_av->ARRAY;
            }
            if ( defined &end_av
                and end_av->isa('B::AV') )
            {
                $self->todo( $_, 0 ) for end_av->ARRAY;
            }

            $self->stash_subs;
            $self->{curcv}    = main_cv;
            $self->{curcvlex} = undef;
        }
        else {

            # 5.6.x
            $self->stash_subs('main');
            $self->{curcv} = main_cv;
            $self->walk_sub( main_cv, main_start );
        }

        $source .= join "\n", $self->print_protos;
        @{ $self->{subs_todo} }
            = sort { $a->[0] <=> $b->[0] } @{ $self->{subs_todo} };
        $source .= join "\n", $self->indent( $self->deparse( main_root, 0 ) ),
            "\n"
            unless B::Deparse::null main_root;
        my @text;
        while ( scalar @{ $self->{subs_todo} } ) {
            push @text, $self->next_todo;
        }
        $source .= join "\n", $self->indent( join "", @text ), "\n"
            if @text;

        # Print __DATA__ section, if necessary
        my $laststash
            = defined $self->{curcop}
            ? $self->{curcop}->stash->NAME
            : $self->{curstash};
        {
            ## no critic
            no strict 'refs';
            ## use critic
            if ( defined *{ $laststash . "::DATA" } ) {
                if ( eof *{ $laststash . "::DATA" } ) {

                    # I think this only happens when using B::Deobfuscate
                    # on itself.
                    {
                        local $/ = "__DATA__\n";
                        seek *{ $laststash . "::DATA" }, 0, 0;
                        readline *{ $laststash . "::DATA" };
                    }
                }

                $source .= "__DATA__\n";
                $source .= join '', readline *{ $laststash . "::DATA" };
            }
        }

        my $p    = $self->{ +__PACKAGE__ };
        my %dump = (
            lexicals     => $p->{pad_symbols},
            globals      => $p->{gv_symbols},
            dictionary   => $p->{unknown_dict_file},
            global_regex => $p->{gv_match}
        );

        if ( $p->{output_yaml} ) {
            $p->{output_fh}->print( Dump( \%dump, $source ) );
        }
        else {
            $p->{output_fh}->print($source);
        }

        return;
    };
}

sub padname {
    my $self    = shift @_;
    my $padname = $self->SUPER::padname(@_);

    return $self->rename_pad($padname);
}

sub gv_name {
    my $self    = shift @_;
    my $gv_name = $self->SUPER::gv_name(@_);

    return $self->rename_gv($gv_name);
}

# BEGIN {
#     ## no critic
#     no strict 'refs';
#     for my $sub ( grep defined &$_, keys %B::Deobfuscate:: ) {
#         my $orig = \&$sub;
#         *$sub = sub {
#             print "$sub\n";
#             &$orig;
#         };
#     }
# }

1;

## Local Variables:
## perl-lint-bin: "/home/josh/bin/perl/5.9.4/bin/perl5.9.4"
## eval: (setenv "/home/josh/src/B-Deobfuscate/lib" "PERL5LIB")
## End: