package B::Deobfuscate;
use strict;
use warnings;
use vars qw( @ISA $VERSION );
use B qw( main_cv main_root main_start );
use B::Deparse;
BEGIN {
@ISA = 'B::Deparse';
$VERSION = '0.20';
for my $func (qw( begin_av init_av check_av end_av )) {
## no critic
no strict 'refs';
if ( defined &{"B::$func"} ) {
B->import($func);
}
else {
# If I couldn't create it, I'll just declare it to keep lint happy.
eval "sub $func;";
}
}
# B::perlstring was added in 5.8.0
if ( defined &B::perlstring ) {
B->import('perlstring');
}
else {
*perlstring = sub { '"' . quotemeta( shift @_ ) . '"' };
}
}
use B::Keywords qw( @Barewords @Symbols );
use Carp 'confess';
use IO::Handle ();
use YAML qw( LoadFile Dump );
# use Data::Postponed 'postpone_forever';
sub postpone_forever { return shift @_ }
sub load_keywords {
my $self = shift @_;
my $p = $self->{ +__PACKAGE__ };
return $p->{keywords} = {
map { $_, undef } @Barewords,
# Snip the sigils.
map { substr $_, 1 } @Symbols
};
}
sub load_unknown_dict {
my $self = shift @_;
my $p = $self->{ +__PACKAGE__ };
my $dict_data;
# slurp the entire dictionary at once
if ( defined( my $dict_file = $p->{unknown_dict_file} ) ) {
open my $fh, '<', $dict_file
or confess "Cannot open dictionary $dict_file: $!";
local $/; ## no critic
$dict_data = [<$fh>];
}
else {
LOAD_DICTIONARY_MODULE:
for my $module ( $p->{unknown_dict_module}, 'PGPHashKeywords',
'Flowers' )
{
next if not defined $module;
eval "require B::Deobfuscate::Dict::$module"; ## no critic
next if $@;
no strict 'refs'; ## no critic
$dict_data = ${"B::Deobfuscate::Dict::$module"};
last LOAD_DICTIONARY_MODULE;
}
}
unless ($dict_data) {
confess "The symbol dictionary was empty!";
}
my $k = $self->load_keywords;
$p->{unknown_dict_data} = [
sort { length $a <=> length $b or $a cmp $b }
grep { not( /\W/ or exists $k->{$_} ) }
split /\n/,
$dict_data
];
unless ( scalar @{ $p->{'unknown_dict_data'} } ) {
confess "The symbol dictionary is empty!";
}
return;
}
sub next_short_dict_symbol {
my $self = shift @_;
my $p = $self->{ +__PACKAGE__ };
my $sym = shift @{ $p->{unknown_dict_data} };
push @{ $p->{used_symbols} }, $sym;
unless ($sym) {
confess "The symbol dictionary has run out and is now empty";
}
return $sym;
}
sub next_long_dict_symbol {
my $self = shift @_;
my $p = $self->{ +__PACKAGE__ };
my $sym = pop @{ $p->{unknown_dict_data} };
push @{ $p->{used_symbols} }, $sym;
unless ($sym) {
confess "The symbol dictionary has run out and is now empty";
}
return $sym;
}
sub load_user_config {
my $self = shift @_;
my $p = $self->{ +__PACKAGE__ };
my $config_file = $p->{user_config};
return unless $config_file;
unless ( -f $config_file ) {
confess "Configuration file $config_file doesn't exist";
}
my $config = ( LoadFile($config_file) )[0];
$p->{globals_to_ignore} = $config->{globals_to_ignore};
$p->{pad_symbols} = $config->{lexicals};
$p->{gv_symbols} = $config->{globals};
if ( $config->{dictionary} ) {
$p->{unknown_dict_file} = $config->{dictionary};
}
if ( $config->{global_regex} ) {
$p->{global_regex} = qr/$config->{global_regex}/;
}
# Symbols that are listed with an undef value actually
# just aren't renamed at all.
for my $symt_nym (qw/pad gv/) {
my $symt = $p->{ $symt_nym . "_symbols" };
for my $symt_key ( keys %$symt ) {
if ( not defined $symt->{$symt_key} ) {
$symt->{$symt_key} = $symt_key;
}
}
}
return;
}
sub gv_should_be_renamed {
my ( $self, $sigil, $name ) = @_;
my $p = $self->{ +__PACKAGE__ };
my $k = $p->{keywords};
confess("Undefined sigil") unless defined $sigil;
confess("Undefined name") unless defined $name;
# Bug 24334: $1 gets passed in w/o a sigil. Dunno why. That's wrong and broke the previous version of
# the regexp which read m{^\$\d+\z}
# Ignore keywords.
return
if exists $k->{$name}
or "$sigil$name" =~ m{^\$?\d+\z};
if ( exists $p->{gv_symbols}{$name}
or $name =~ $p->{gv_match} )
{
return 1;
}
return;
}
sub rename_pad {
my ( $self, $name ) = @_;
my $p = $self->{ +__PACKAGE__ };
my ($sigil) = $name =~ m{^(\W+)}
or confess "Invalid pad variable name $name";
my $dict = $p->{pad_symbols};
return $dict->{$name} if $dict->{$name};
# $dict->{$name} = $name;
$dict->{$name} = postpone_forever $sigil . $self->next_short_dict_symbol;
unless ( $dict->{$name} ) {
confess "The suggested name for the lexical variable $name is empty";
}
return $dict->{$name};
}
sub lookup_sigil {
my $rv = shift @_;
return $rv =~ /(?:gv|pad|rv2)sv\z/ ? '$'
: $rv =~ /(?:gvav|padav|av2arylen|rv2av|aelemfast|aelem|aslice)\z/
? '@'
: $rv =~ /(?:padhv|rv2hv|helem|hslice)\z/ ? '%'
: $rv =~ /rv2cv\z/ ? '&'
: $rv =~ /(?:gv|gelem|rv2gv)\z/ ? ''
:
# Nothing valid;
();
}
sub rename_gv {
my ( $self, $name ) = @_;
my $p = $self->{ +__PACKAGE__ };
my $sigil_debug = '';
my $sigil;
FIND_SIGIL: {
for ( my $cx = 0; not defined $sigil; ++$cx ) {
my ( undef, undef, undef, $rv ) = caller $cx;
if ( not $rv ) {
confess
"No sigil could be found. Please report the following text:\n$sigil_debug\n";
}
$sigil = lookup_sigil($rv);
$sigil_debug .= "$cx = $rv\n";
}
}
unless ( defined $sigil ) {
confess
"No sigil could be found. Please report the following text:\n$sigil_debug\n";
}
return $name unless $self->gv_should_be_renamed( $sigil, $name );
my $dict = $p->{gv_symbols};
my $sname = "$sigil$name";
return $dict->{$sname} if exists $dict->{$sname};
$dict->{$sname} = postpone_forever $self->next_long_dict_symbol;
unless ( $dict->{$sname} ) {
confess "$sname could not be renamed.";
}
return $dict->{$sname};
}
## OVERRIDE METHODS FROM B::Deparse
sub new {
my $class = shift @_;
my $self = $class->SUPER::new(@_);
my $p = $self->{ +__PACKAGE__ } = {};
$p->{unknown_dict_file} = undef;
$p->{unknown_dict_module} = undef;
$p->{unknown_dict_data} = undef;
$p->{user_config} = undef;
$p->{gv_match} = qw/^[[:lower:][:digit:]_]+\z/;
$p->{pad_symbols} = {};
$p->{gv_symbols} = {};
$p->{output_yaml} = 0;
$p->{output_fh} = \*STDOUT;
while ( my $arg = shift @_ ) {
## no critic
if ( $arg =~ m{^-d([^,]+)} ) {
$p->{unknown_dict_file} = $1;
}
elsif ( $arg =~ m{^-D([^,]+)} ) {
$p->{unknown_dict_module} = $1;
}
elsif ( $arg =~ m{^-c([^,]+)} ) {
$p->{user_config} = $1;
}
elsif ( $arg =~ m{^-m/([^/]+)/} ) {
$p->{gv_match} = $1;
}
elsif ( $arg =~ m{^-y} ) {
$p->{output_yaml} = 1;
}
}
$self->load_user_config;
$self->load_unknown_dict;
return $self;
}
sub compile { ## no critic Complex
my (@args) = @_;
return sub {
my $source = '';
my $self = __PACKAGE__->new(@args);
# First deparse command-line args
if ( defined $^I ) { # deparse -i
$source .= q(BEGIN { $^I = ) . perlstring($^I) . qq(; }\n);
}
if ($^W) { # deparse -w
$source .= qq(BEGIN { \$^W = $^W; }\n);
}
## no critic PackageVar
if ( $/ ne "\n" or defined $O::savebackslash ) { # deparse -l -0
my $fs = perlstring($/) || 'undef';
my $bs = perlstring($O::savebackslash) || 'undef';
$source .= qq(BEGIN { \$/ = $fs; \$\\ = $bs; }\n);
}
# I need to do things differently depending on the perl
# version.
if ( $] >= 5.008 ) {
if ( defined &begin_av
and begin_av->isa('B::AV') )
{
$self->todo( $_, 0 ) for begin_av->ARRAY;
}
if ( defined &check_av
and check_av->isa('B::AV') )
{
$self->todo( $_, 0 ) for check_av->ARRAY;
}
if ( defined &init_av
and init_av->isa('B::AV') )
{
$self->todo( $_, 0 ) for init_av->ARRAY;
}
if ( defined &end_av
and end_av->isa('B::AV') )
{
$self->todo( $_, 0 ) for end_av->ARRAY;
}
$self->stash_subs;
$self->{curcv} = main_cv;
$self->{curcvlex} = undef;
}
else {
# 5.6.x
$self->stash_subs('main');
$self->{curcv} = main_cv;
$self->walk_sub( main_cv, main_start );
}
$source .= join "\n", $self->print_protos;
@{ $self->{subs_todo} }
= sort { $a->[0] <=> $b->[0] } @{ $self->{subs_todo} };
$source .= join "\n", $self->indent( $self->deparse( main_root, 0 ) ),
"\n"
unless B::Deparse::null main_root;
my @text;
while ( scalar @{ $self->{subs_todo} } ) {
push @text, $self->next_todo;
}
$source .= join "\n", $self->indent( join "", @text ), "\n"
if @text;
# Print __DATA__ section, if necessary
my $laststash
= defined $self->{curcop}
? $self->{curcop}->stash->NAME
: $self->{curstash};
{
## no critic
no strict 'refs';
## use critic
if ( defined *{ $laststash . "::DATA" } ) {
if ( eof *{ $laststash . "::DATA" } ) {
# I think this only happens when using B::Deobfuscate
# on itself.
{
local $/ = "__DATA__\n";
seek *{ $laststash . "::DATA" }, 0, 0;
readline *{ $laststash . "::DATA" };
}
}
$source .= "__DATA__\n";
$source .= join '', readline *{ $laststash . "::DATA" };
}
}
my $p = $self->{ +__PACKAGE__ };
my %dump = (
lexicals => $p->{pad_symbols},
globals => $p->{gv_symbols},
dictionary => $p->{unknown_dict_file},
global_regex => $p->{gv_match}
);
if ( $p->{output_yaml} ) {
$p->{output_fh}->print( Dump( \%dump, $source ) );
}
else {
$p->{output_fh}->print($source);
}
return;
};
}
sub padname {
my $self = shift @_;
my $padname = $self->SUPER::padname(@_);
return $self->rename_pad($padname);
}
sub gv_name {
my $self = shift @_;
my $gv_name = $self->SUPER::gv_name(@_);
return $self->rename_gv($gv_name);
}
# BEGIN {
# ## no critic
# no strict 'refs';
# for my $sub ( grep defined &$_, keys %B::Deobfuscate:: ) {
# my $orig = \&$sub;
# *$sub = sub {
# print "$sub\n";
# &$orig;
# };
# }
# }
1;
## Local Variables:
## perl-lint-bin: "/home/josh/bin/perl/5.9.4/bin/perl5.9.4"
## eval: (setenv "/home/josh/src/B-Deobfuscate/lib" "PERL5LIB")
## End: