#!/usr/bin/env perl
# PODNAME: locale_simple_scraper_old_old
# ABSTRACT: Old Command line tool for finding tokens in code
$|=1;
use strict;
use warnings;
use Getopt::Long;
use File::Find;
use Cwd;
use IO::All;
use Locale::Simple;
use Data::Dumper;
#
# !!! Work in progress !!!
#
# Supported filetypes:
my $js_ext = ""; # Javascript
my $pl_ext = ""; # Perl
my $py_ext = ""; # Python
my $tx_ext = ""; # Text::Xslate (Kolon or Metakolon)
my @ignores;
my @only;
my $output = 'po';
my $md5;
GetOptions(
"js=s" => \$js_ext,
"pl=s" => \$pl_ext,
"py=s" => \$py_ext,
"tx=s" => \$tx_ext,
"ignore=s" => \@ignores,
"only=s" => \@only,
"output=s" => \$output,
"md5" => \$md5,
);
# could add Getopt::Long here for override
my @js = split(",",$js_ext);
push @js, 'js';
my @pl = split(",",$pl_ext);
push @pl, 'pl', 'pm', 't';
my @tx = split(",",$tx_ext);
push @tx, 'tx';
my @py = split(",",$py_ext);
push @py, 'py';
# extension list
my %e = (
( map { $_ => 'js' } @js ),
( map { $_ => 'pl' } @pl ),
( map { $_ => 'tx' } @tx ),
( map { $_ => 'py' } @py ),
);
# functions with count of locale simple with function of parameter
#
# 1 = msgid
# 2 = msgid_plural
# 3 = msgctxt
# 4 = domain
#
my %f = (
l => [1],
ln => [1,2],
ld => [4,1],
lp => [3,1],
lnp => [3,1,2],
ldn => [4,1,2],
ldp => [4,3,1],
ldnp => [4,3,1,2],
);
my @found;
my $dir = getcwd;
my $re_dir = $dir;
$re_dir =~ s/\./\\./g;
finddepth(sub {
my $filename = $File::Find::name;
my $stored_filename = $filename;
if ($md5) {
eval {
require Digest::MD5;
Digest::MD5->import('md5_hex');
};
die "This feature requires Digest::MD5" if $@;
$stored_filename = md5_hex($filename);
}
$filename =~ s/^$dir\///g;
for (@ignores) {
return if $filename =~ /$_/;
}
if (@only) {
my $found = 0;
for (@only) {
$found = 1 if $filename =~ /$_/;
}
return unless $found;
}
my @fileparts = split('\.',$File::Find::name);
my $ext = pop @fileparts;
if (grep { $ext eq $_ } keys %e) {
my $file = $File::Find::name;
my $type = $e{$ext};
print STDERR $type." => ".$file."\n";
return if -l $file and not -e readlink($file);
my @lines = io($file)->slurp;
my $line = 0;
for (@lines) {
$line++;
my @results = parse_line($_, $type);
for (@results) {
push @found, {
%{$_},
line => $line,
file => $stored_filename,
type => $type,
}
}
}
}
}, $dir);
if ($output eq 'po') {
my %files;
my %token;
for (@found) {
my $key .= defined $_->{domain} ? '"'.$_->{domain}.'"' : 'undef';
$key .= defined $_->{msgctxt} ? '"'.$_->{msgctxt}.'"' : 'undef';
$key .= defined $_->{msgid} ? '"'.$_->{msgid}.'"' : 'undef';
$key .= defined $_->{msgid_plural} ? '"'.$_->{msgid_plural}.'"' : 'undef';
$token{$key} = $_ unless defined $token{$key};
$files{$key} = [] unless defined $files{$key};
push @{$files{$key}}, $_->{file}.':'.$_->{line};
}
for my $k (sort { $a cmp $b } keys %files) {
print "\n";
print "#: ".join(' ',@{$files{$k}})."\n";
print "#, locale-simple-format";
print " ".$token{$k}{domain} if defined $token{$k}{domain};
print "\n";
for (qw( msgctxt msgid msgid_plural )) {
print $_.' "'.Locale::Simple::gettext_escape($token{$k}{$_}).'"'."\n" if defined $token{$k}{$_};
}
my $plural_marker = $token{$k}{msgid_plural} ? "[0]" : "";
print qq[msgstr$plural_marker ""\n];
}
} elsif ($output eq 'perl') {
print Dumper \@found;
} elsif ($output eq 'json') {
eval {
require JSON;
JSON->import;
print encode_json(\@found);
} or do {
die "You require the module JSON for this output";
};
} elsif ($output eq 'yaml') {
eval {
require YAML;
YAML->import;
print Dump(\@found);
} or do {
die "You require the module YAML for this output";
};
}
sub parse_line {
my ( $line, $type, @results ) = @_;
return if $line =~ /^\s*\#.*/;
for (keys %f) {
my @args = @{$f{$_}};
for ($line =~ /[^\w]${_}\((.*)/) {
my $argc = scalar @args;
my ( $remainder, @params ) = parse_params($1, $type, $argc);
if (scalar @params == $argc) {
my %result;
my $pos = 0;
for (@args) {
$result{msgid} = $params[$pos] if $_ eq 1;
$result{msgid_plural} = $params[$pos] if $_ eq 2;
$result{msgctxt} = $params[$pos] if $_ eq 3;
$result{domain} = $params[$pos] if $_ eq 4;
$pos++;
}
push @results, \%result, parse_line( $remainder, $type );
}
}
}
return @results;
}
sub parse_params {
my ( $params, $type, $argc ) = @_;
my @chars = split('',$params);
my @args;
my $arg = "";
my $q_state = 0; # 0 = code, 1 = qoute, 2 = double qoute
my $comma_state = 1;
while (defined (my $c = shift @chars)) {
next if $c =~ /\s/ and !$q_state;
if ($q_state) {
if ($c eq '\\') {
my $esc = shift @chars;
if ($esc eq "'" or $esc eq '"' or $esc eq '\\') {
$arg .= $c;
} else {
warn "Unknown escape char '".$esc."'";
}
} elsif ( ( $c eq "'" and $q_state == 1 ) or ( $c eq '"' and $q_state == 2 ) ) {
$q_state = 0;
$comma_state = 0;
push @args, $arg;
$arg = "";
last if scalar @args == $argc;
} else {
$arg .= $c;
}
} else {
if ($c eq "'" or $c eq '"') {
die "quote found where comma expected: ".$params unless $comma_state;
$q_state = $c eq "'" ? 1 : 2;
} elsif ($c eq ',') {
die "comma found after comma in code: ".$params if $comma_state;
$comma_state = 1;
} elsif ($type eq 'js') {
last;
} else {
last;
}
}
}
return join('', @chars), @args;
}
__END__
=pod
=head1 NAME
locale_simple_scraper_old_old - Old Command line tool for finding tokens in code
=head1 VERSION
version 0.019
=head1 SYNOPSIS
# ignoring specific regexps for the filename
# please be aware that you must escape the . and because we use bash here you
# must also be aware to escape the \ for escaping the .
locale_simple_scraper_old --ignores \\.build --ignores some/dir
# only use specific regexps for the filename
locale_simple_scraper_old --only core --only site/something
# different output parameter, might require more modules
locale_simple_scraper_old --output perl
locale_simple_scraper_old --output yaml
locale_simple_scraper_old --output json
# scramble real filename with md5 (for security)
locale_simple_scraper_old --md5
# setting additional extensions for Javascript (default: js)
locale_simple_scraper_old --js jjs,ajs
# setting additional extensions for Perl (default: pl, pm, t)
locale_simple_scraper_old --pl ppl,pppl
# setting additional extensions for Text::Xslate (default: tx)
locale_simple_scraper_old --tx ttx,xxx
=head1 DESCRIPTION
This tool parses all Perl, Python, Javascript and Text::Xslate templates in the
current directory and subdirectories to find calls to the L<Locale::Simple>
API.
It gives out on STDERR which files are parsed right now, while it dumps the
resulting data to the screen. By default it generates a B<.po> file, but you can
specify via I<--output> to dump B<json>, B<yaml> or a B<perl> data structure.
Be aware that we add the domain as flag in the B<.po> file. So double token
may appear which only differs through a B<#,> flag.
=head1 SUPPORT
Repository
http://github.com/Getty/p5-locale-simple
Pull request and additional contributors are welcome
Issue Tracker
http://github.com/Getty/p5-locale-simple/issues
=head1 AUTHOR
Torsten Raudssus <torsten@raudss.us>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2012 by DuckDuckGo, Inc. L<http://duckduckgo.com/>, Torsten Raudssus <torsten@raudss.us>.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut