The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl
# PODNAME: locale_simple_scraper_old_old
# ABSTRACT: Old Command line tool for finding tokens in code


$|=1;

use strict;
use warnings;
use Getopt::Long;
use File::Find;
use Cwd;
use IO::All;
use Locale::Simple;
use Data::Dumper;

#
# !!! Work in progress !!!
#

                 # Supported filetypes:
my $js_ext = ""; # Javascript
my $pl_ext = ""; # Perl
my $py_ext = ""; # Python
my $tx_ext = ""; # Text::Xslate (Kolon or Metakolon)

my @ignores;
my @only;

my $output = 'po';
my $md5;

GetOptions(
  "js=s" => \$js_ext,
  "pl=s" => \$pl_ext,
  "py=s" => \$py_ext,
  "tx=s" => \$tx_ext,
  "ignore=s" => \@ignores,
  "only=s" => \@only,
  "output=s" => \$output,
  "md5" => \$md5,
);

# could add Getopt::Long here for override

my @js = split(",",$js_ext);
push @js, 'js';

my @pl = split(",",$pl_ext);
push @pl, 'pl', 'pm', 't';

my @tx = split(",",$tx_ext);
push @tx, 'tx';

my @py = split(",",$py_ext);
push @py, 'py';

# extension list
my %e = (
  ( map { $_ => 'js' } @js ),
  ( map { $_ => 'pl' } @pl ),
  ( map { $_ => 'tx' } @tx ),
  ( map { $_ => 'py' } @py ),
);

# functions with count of locale simple with function of parameter
#
# 1 = msgid
# 2 = msgid_plural
# 3 = msgctxt
# 4 = domain
#
my %f = (
  l => [1],
  ln => [1,2],
  ld => [4,1],
  lp => [3,1],
  lnp => [3,1,2],
  ldn => [4,1,2],
  ldp => [4,3,1],
  ldnp => [4,3,1,2],
);

my @found;

my $dir = getcwd;
my $re_dir = $dir;
$re_dir =~ s/\./\\./g;

finddepth(sub {
  my $filename = $File::Find::name;
  my $stored_filename = $filename;
  if ($md5) {
    eval {
      require Digest::MD5;
      Digest::MD5->import('md5_hex');
    };
    die "This feature requires Digest::MD5" if $@;
    $stored_filename = md5_hex($filename);
  }
  $filename =~ s/^$dir\///g;
  for (@ignores) {
    return if $filename =~ /$_/;
  }
  if (@only) {
    my $found = 0;
    for (@only) {
      $found = 1 if $filename =~ /$_/;
    }
    return unless $found;
  }
  my @fileparts = split('\.',$File::Find::name);
  my $ext = pop @fileparts;
  if (grep { $ext eq $_ } keys %e) {
    my $file = $File::Find::name;
    my $type = $e{$ext};
    print STDERR $type." => ".$file."\n";
    return if -l $file and not -e readlink($file);
    my @lines = io($file)->slurp;
    my $line = 0;
    for (@lines) {
      $line++;
      my @results = parse_line($_, $type);
      for (@results) {
        push @found, {
          %{$_},
          line => $line,
          file => $stored_filename,
          type => $type,
        }
      }
    }
  }
}, $dir);

if ($output eq 'po') {
  my %files;
  my %token;
  for (@found) {
    my $key .= defined $_->{domain} ? '"'.$_->{domain}.'"' : 'undef';
    $key .= defined $_->{msgctxt} ? '"'.$_->{msgctxt}.'"' : 'undef';
    $key .= defined $_->{msgid} ? '"'.$_->{msgid}.'"' : 'undef';
    $key .= defined $_->{msgid_plural} ? '"'.$_->{msgid_plural}.'"' : 'undef';
    $token{$key} = $_ unless defined $token{$key};
    $files{$key} = [] unless defined $files{$key};
    push @{$files{$key}}, $_->{file}.':'.$_->{line};
  }
  for my $k (sort { $a cmp $b } keys %files) {
    print "\n";
    print "#: ".join(' ',@{$files{$k}})."\n";
    print "#, locale-simple-format";
    print " ".$token{$k}{domain} if defined $token{$k}{domain};
    print "\n";
    for (qw( msgctxt msgid msgid_plural )) {
      print $_.' "'.Locale::Simple::gettext_escape($token{$k}{$_}).'"'."\n" if defined $token{$k}{$_};
    }
    my $plural_marker = $token{$k}{msgid_plural} ? "[0]" : "";
    print qq[msgstr$plural_marker ""\n];
  }
} elsif ($output eq 'perl') {
  print Dumper \@found;
} elsif ($output eq 'json') {
  eval {
    require JSON;
    JSON->import;
    print encode_json(\@found);
  } or do {
    die "You require the module JSON for this output";
  };
} elsif ($output eq 'yaml') {
  eval {
    require YAML;
    YAML->import;
    print Dump(\@found);
  } or do {
    die "You require the module YAML for this output";
  };
}

sub parse_line {
  my ( $line, $type, @results ) = @_;
  return if $line =~ /^\s*\#.*/;
  for (keys %f) {
    my @args = @{$f{$_}};
    for ($line =~ /[^\w]${_}\((.*)/) {
      my $argc = scalar @args;
      my ( $remainder, @params ) = parse_params($1, $type, $argc);
      if (scalar @params == $argc) {
        my %result;
        my $pos = 0;
        for (@args) {
          $result{msgid} = $params[$pos] if $_ eq 1;
          $result{msgid_plural} = $params[$pos] if $_ eq 2;
          $result{msgctxt} = $params[$pos] if $_ eq 3;
          $result{domain} = $params[$pos] if $_ eq 4;
          $pos++;
        }
        push @results, \%result, parse_line( $remainder, $type );
      }
    }
  }
  return @results;
}

sub parse_params {
  my ( $params, $type, $argc ) = @_;
  my @chars = split('',$params);
  my @args;
  my $arg = "";
  my $q_state = 0; # 0 = code, 1 = qoute, 2 = double qoute
  my $comma_state = 1;
  while (defined (my $c = shift @chars)) {
    next if $c =~ /\s/ and !$q_state;
    if ($q_state) {
      if ($c eq '\\') {
        my $esc = shift @chars;
        if ($esc eq "'" or $esc eq '"' or $esc eq '\\') {
          $arg .= $c;
        } else {
          warn "Unknown escape char '".$esc."'";
        }
      } elsif ( ( $c eq "'" and $q_state == 1 ) or ( $c eq '"' and $q_state == 2 ) ) {
        $q_state = 0;
        $comma_state = 0;
        push @args, $arg;
        $arg = "";
        last if scalar @args == $argc;
      } else {
        $arg .= $c;
      }
    } else {
      if ($c eq "'" or $c eq '"') {
        die "quote found where comma expected: ".$params unless $comma_state;
        $q_state = $c eq "'" ? 1 : 2;
      } elsif ($c eq ',') {
        die "comma found after comma in code: ".$params if $comma_state;
        $comma_state = 1;
      } elsif ($type eq 'js') {
        last;
      } else {
        last;
      }
    }
  }
  return join('', @chars), @args;
}

__END__

=pod

=head1 NAME

locale_simple_scraper_old_old - Old Command line tool for finding tokens in code

=head1 VERSION

version 0.019

=head1 SYNOPSIS

  # ignoring specific regexps for the filename
  # please be aware that you must escape the . and because we use bash here you
  # must also be aware to escape the \ for escaping the .
  locale_simple_scraper_old --ignores \\.build --ignores some/dir

  # only use specific regexps for the filename
  locale_simple_scraper_old --only core --only site/something

  # different output parameter, might require more modules
  locale_simple_scraper_old --output perl
  locale_simple_scraper_old --output yaml
  locale_simple_scraper_old --output json

  # scramble real filename with md5 (for security)
  locale_simple_scraper_old --md5

  # setting additional extensions for Javascript (default: js)
  locale_simple_scraper_old --js jjs,ajs

  # setting additional extensions for Perl (default: pl, pm, t)
  locale_simple_scraper_old --pl ppl,pppl

  # setting additional extensions for Text::Xslate (default: tx)
  locale_simple_scraper_old --tx ttx,xxx

=head1 DESCRIPTION

This tool parses all Perl, Python, Javascript and Text::Xslate templates in the
current directory and subdirectories to find calls to the L<Locale::Simple>
API.

It gives out on STDERR which files are parsed right now, while it dumps the
resulting data to the screen. By default it generates a B<.po> file, but you can
specify via I<--output> to dump B<json>, B<yaml> or a B<perl> data structure.

Be aware that we add the domain as flag in the B<.po> file. So double token
may appear which only differs through a B<#,> flag.

=head1 SUPPORT

Repository

  http://github.com/Getty/p5-locale-simple
  Pull request and additional contributors are welcome

Issue Tracker

  http://github.com/Getty/p5-locale-simple/issues

=head1 AUTHOR

Torsten Raudssus <torsten@raudss.us>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2012 by DuckDuckGo, Inc. L<http://duckduckgo.com/>, Torsten Raudssus <torsten@raudss.us>.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut