package Locale::Maketext::Guts;
BEGIN { *zorp = sub { return scalar nelems @_ } unless defined &zorp; }
# Just so we're nice and define SOMETHING in "our" package.
package Locale::Maketext;
our ($USE_LITERALS, $GUTSPATH);
BEGIN {
$GUTSPATH = __FILE__;
*DEBUG = sub () {0} unless defined &DEBUG;
}
use utf8;
sub _compile {
# This big scary routine compiles an entry.
# It returns either a coderef if there's brackety bits in this, or
# otherwise a ref to a scalar.
my $target = ref(@_[0]) || @_[0];
my(@code);
my@(@c) =@( @('')); # "chunks" -- scratch.
my $call_count = 0;
my $big_pile = '';
do {
my $in_group = 0; # start out outside a group
my($m, @params); # scratch
while(@_[1] =~ # Iterate over chunks.
m<\G(
[^\~\[\]]+ # non-~[] stuff
|
~. # ~[, ~], ~~, ~other
|
\[ # [ presumably opening a group
|
\] # ] presumably closing a group
|
~ # terminal ~ ?
|
$
)>xgs
) {
print $^STDOUT, " \"$1\"\n" if DEBUG +> 2;
if($1 eq '[' or $1 eq '') { # "[" or end
# Whether this is "[" or end, force processing of any
# preceding literal.
if($in_group) {
if($1 eq '') {
$target->_die_pointing(@_[1], "Unterminated bracket group");
} else {
$target->_die_pointing(@_[1], "You can't nest bracket groups");
}
} else {
if($1 eq '') {
print $^STDOUT, " [end-string]\n" if DEBUG +> 2;
} else {
$in_group = 1;
}
die "How come \@c is empty?? in <@_[1]>" unless (nelems @c); # sanity
if(length @c[-1]) {
# Now actually processing the preceding literal
$big_pile .= @c[-1];
if($USE_LITERALS and (
(ord('A') == 65)
?? @c[-1] !~ m<[^\x20-\x7E]>s
# ASCII very safe chars
!! @c[-1] !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
# EBCDIC very safe chars
)) {
# normal case -- all very safe chars
@c[-1] =~ s/'/\\'/g;
push @code, q{ '} . @c[-1] . "',\n";
@c[-1] = ''; # reuse this slot
} else {
push @code, ' @c[' . ((nelems @c)-1) . "],\n";
push @c, ''; # new chunk
}
}
# else just ignore the empty string.
}
} elsif($1 eq ']') { # "]"
# close group -- go back in-band
if($in_group) {
$in_group = 0;
print $^STDOUT, " --Closing group [@c[-1]]\n" if DEBUG +> 2;
# And now process the group...
if(!length(@c[-1]) or @c[-1] =~ m/^\s+$/s) {
DEBUG +> 2 and print $^STDOUT, " -- (Ignoring)\n";
@c[-1] = ''; # reset out chink
next;
}
#$c[-1] =~ s/^\s+//s;
#$c[-1] =~ s/\s+$//s;
@($m,@< @params) = split(",", @c[-1], -1); # was /\s*,\s*/
# A bit of a hack -- we've turned "~,"'s into DELs, so turn
# 'em into real commas here.
if (ord('A') == 65) { # ASCII, etc
foreach(@($m, < @params)) { s/\x7F/,/g }
} else { # EBCDIC (1047, 0037, POSIX-BC)
# Thanks to Peter Prymmer for the EBCDIC handling
foreach(@($m, < @params)) { s/\x07/,/g }
}
# Special-case handling of some method names:
if($m eq '_*' or $m =~ m<^_(-?\d+)$>s) {
# Treat [_1,...] as [,_1,...], etc.
unshift @params, $m;
$m = '';
} elsif($m eq '*') {
$m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars"
} elsif($m eq '#') {
$m = 'numf'; # "#" for "number": [#,_1] for "the number _1"
}
# Most common case: a simple, legal-looking method name
if($m eq '') {
# 0-length method name means to just interpolate:
push @code, ' (';
} elsif($m =~ m<^\w+(?:\:\:\w+)*$>s
and $m !~ m<(?:^|\:)\d>s
# exclude starting a (sub)package or symbol with a digit
) {
# Yes, it even supports the demented (and undocumented?)
# $obj->Foo::bar(...) syntax.
$target->_die_pointing(
@_[1], "Can't (yet?) use \"SUPER::\" in a bracket-group method",
2 + length(@c[-1])
)
if $m =~ m/^SUPER::/s;
# Because for SUPER:: to work, we'd have to compile this into
# the right package, and that seems just not worth the bother,
# unless someone convinces me otherwise.
push @code, ' @_[0]->' . $m . '(';
} else {
# TODO: implement something? or just too icky to consider?
$target->_die_pointing(
@_[1],
"Can't use \"$m\" as a method name in bracket group",
2 + length(@c[-1])
);
}
pop @c; # we don't need that chunk anymore
++$call_count;
foreach my $p ( @params) {
if($p eq '_*') {
# Meaning: all parameters except @_[0]
@code[-1] .= ' @_[1 .. $#_], ';
# and yes, that does the right thing for all @_ < 3
} elsif($p =~ m<^_(-?\d+)$>s) {
# _3 meaning @_[3]
@code[-1] .= '@_[' . (0 + $1) . '], ';
} elsif($USE_LITERALS and (
(ord('A') == 65)
?? $p !~ m<[^\x20-\x7E]>s
# ASCII very safe chars
!! $p !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
# EBCDIC very safe chars
)) {
# Normal case: a literal containing only safe characters
$p =~ s/'/\\'/g;
@code[-1] .= q{'} . $p . q{', };
} else {
# Stow it on the chunk-stack, and just refer to that.
push @c, $p;
push @code, ' $c[' . ((nelems @c)-1) . "], ";
}
}
@code[-1] .= "),\n";
push @c, '';
} else {
$target->_die_pointing(@_[1], "Unbalanced ']'");
}
} elsif(substr($1,0,1) ne '~') {
# it's stuff not containing "~" or "[" or "]"
# i.e., a literal blob
@c[-1] .= $1;
} elsif($1 eq '~~') { # "~~"
@c[-1] .= '~';
} elsif($1 eq '~[') { # "~["
@c[-1] .= '[';
} elsif($1 eq '~]') { # "~]"
@c[-1] .= ']';
} elsif($1 eq '~,') { # "~,"
if($in_group) {
# This is a hack, based on the assumption that no-one will actually
# want a DEL inside a bracket group. Let's hope that's it's true.
if (ord('A') == 65) { # ASCII etc
@c[-1] .= "\x7F";
} else { # EBCDIC (cp 1047, 0037, POSIX-BC)
@c[-1] .= "\x07";
}
} else {
@c[-1] .= '~,';
}
} elsif($1 eq '~') { # possible only at string-end, it seems.
@c[-1] .= '~';
} else {
# It's a "~X" where X is not a special character.
# Consider it a literal ~ and X.
@c[-1] .= $1;
}
}
};
if($call_count) {
undef $big_pile; # Well, nevermind that.
} else {
# It's all literals! Ahwell, that can happen.
# So don't bother with the eval. Return a SCALAR reference.
return \$big_pile;
}
die "Last chunk isn't null??" if (nelems @c) and length @c[-1]; # sanity
print $^STDOUT, scalar(nelems @c), " chunks under closure\n" if DEBUG;
if((nelems @code) == 0) { # not possible?
print $^STDOUT, "Empty code\n" if DEBUG;
return \'';
} elsif((nelems @code) +> 1) { # most cases, presumably!
unshift @code, "join '',@(\n";
}
unshift @code, "sub \{\n";
push @code, ")\}\n";
print $^STDOUT, < @code if DEBUG;
my $sub = eval(join '', @code);
die "$($^EVAL_ERROR->message) while evalling" . join('', @code) if $^EVAL_ERROR; # Should be impossible.
return $sub;
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub _die_pointing {
# This is used by _compile to throw a fatal error
my $target = shift; # class name
# ...leaving @_[0] the error-causing text, and @_[1] the error message
my $i = index(@_[0], "\n");
my $pointy;
my $pos = pos(@_[0]) - (defined(@_[2]) ?? @_[2] !! 0) - 1;
if($pos +< 1) {
$pointy = "^=== near there\n";
} else { # we need to space over
my $first_tab = index(@_[0], "\t");
if($pos +> 2 and ( -1 == $first_tab or $first_tab +> pos(@_[0]))) {
# No tabs, or the first tab is harmlessly after where we will point to,
# AND we're far enough from the margin that we can draw a proper arrow.
$pointy = ('=' x $pos) . "^ near there\n";
} else {
# tabs screw everything up!
$pointy = substr(@_[0],0,$pos);
$pointy =~ s/[^\t ]//g;
# make everything into whitespace, but preseving tabs
$pointy .= "^=== near there\n";
}
}
my $errmsg = "@_[1], in\:\n@_[0]";
if($i == -1) {
# No newline.
$errmsg .= "\n" . $pointy;
} elsif($i == (length(@_[0]) - 1) ) {
# Already has a newline at end.
$errmsg .= $pointy;
} else {
# don't bother with the pointy bit, I guess.
}
Carp::croak( "$errmsg via $target, as used" );
}
1;