#!/usr/bin/env perl
use strict;
use warnings;
use Encode;
use FindBin;
use LWP::Simple;
use Regexp::Assemble::Compressed;
use Spreadsheet::ParseExcel;
use File::Basename;
use File::Path;
use Getopt::Long;
our $DEBUG;
our $UPDATED;
our $VERSION;
our $STOREDIR;
our $TABLEDIR;
our $TESTDIR ;
GetOptions(
'date=s' => sub { updated_on(\$UPDATED, @_) },
verbose => \$DEBUG,
) or die <DATA>;
$DEBUG = defined $ENV{DEBUG} ? $ENV{DEBUG} : $DEBUG;
$UPDATED = defined $UPDATED ? $UPDATED : today();
$VERSION = "0." . do { (my $ymd = $UPDATED) =~ s/-//g; $ymd };
$STOREDIR = "$FindBin::Bin/share";
$TABLEDIR = "$FindBin::Bin/lib/Number/Phone/JP/Table";
$TESTDIR = "$FindBin::Bin/t";
main();
sub main {
my %task_map = (
Class1 => +{ function => 'class' },
Class2 => +{ skip => 1 },
Pager => +{
function => 'fixed_pref',
prefix => '020',
test_suffix => '12345',
filename => '000124105.xls',
},
Q2 => +{
function => 'fixed_pref',
prefix => '0990',
test_suffix => '123',
filename => '000124118.xls',
},
Upt => +{ function => 'upt' },
United => +{
function => 'fixed_pref',
prefix => '0570',
test_suffix => '123',
filename => '000124113.xls',
},
Ipphone => +{
function => 'fixed_pref',
prefix => '050',
filename => '000124106.xls',
},
Freedial => +{
function => 'fixed_pref',
prefix => [qw(0120 0800)],
test_suffix => +{ '0120' => '123', '0800' => '1234' },
filename => [qw(000124112.xls 000124114.xls)],
},
);
unless (-d $STOREDIR) {
mkpath($STOREDIR) or die $!;
}
opendir my $dh, $TABLEDIR or die "$TABLEDIR: $!";
for my $file (readdir $dh) {
next unless $file =~ /^(\w+)\.pm$/;
my $class = $1;
no strict 'refs';
if (my $param = $task_map{$class}) {
next if $param->{skip};
my $func = delete $param->{function};
_warn("calling $func()");
$func->($class, $param);
}
else {
my $func = lc($class);
unless (defined $::{$func}) {
_warn("$func() is not defined. skipping");
next;
}
_warn("calling $func()");
$func->($class);
}
}
}
sub fixed_pref {
my($class, $param) = @_;
my $lc_class = lc($class);
my $filename = "$TABLEDIR/$class.pm";
my @files = ();
if ($param->{filename}) {
@files = ref($param->{filename}) eq 'ARRAY' ?
@{$param->{filename}} : ($param->{filename});
}
else {
@files = ("$lc_class.xls");
}
my @prefixes = ref($param->{prefix}) eq 'ARRAY' ?
@{$param->{prefix}} : ($param->{prefix});
my %regexp_table =
map { $_ => Regexp::Assemble::Compressed->new } @prefixes;
my $prefix_re;
if (@prefixes == 1) {
$prefix_re = $prefixes[0];
}
else {
my $re = Regexp::Assemble::Compressed->new;
for my $prefix (@prefixes) {
$re->add($prefix);
}
($prefix_re = $re->re) =~ s/^\(\?(?:-xism|\^):/(?:/;
}
my @rows_list = ();
my @cols_list = ();
my @column_values_list = ();
for my $file (@files) {
_warn($file);
$file = "$STOREDIR/$file";
http_get_file($file) or die "HTTP failed";
my($rows, $cols, $column_values) = parse_excel($file);
push @rows_list, $rows;
push @cols_list, $cols;
push @column_values_list, $column_values;
}
open my $fh, '>', $filename or die "$filename: $!";
print $fh table_class_header($class);
my @ok = ();
my @ng = ();
my $re = Regexp::Assemble::Compressed->new;
for my $i (0 .. $#rows_list) {
my $rows = $rows_list[$i];
my $cols = $cols_list[$i];
my $column_values = $column_values_list[$i];
for my $row (sort { $a <=> $b } keys %$rows) {
for my $col (sort { $a <=> $b } keys %$cols) {
my $number = sprintf '%s%s', $rows->{$row}, $cols->{$col};
my $value = $column_values->{$row}{$col};
my $orig_number = $number;
$number =~ s/^($prefix_re)//;
my $prefix = $1;
my $test_suffix =
ref($param->{test_suffix}) eq 'HASH' ?
$param->{test_suffix}{$prefix} :
($param->{test_suffix} || '1234');
my $regexp_suffix = '\d{' . length($test_suffix) . '}';
_warn($orig_number . ("x" x (length $test_suffix)));
if (!defined $value || $value =~ /^(\s|-)*$/) {
push @ng, "$prefix ${number}${test_suffix}";
next;
}
else {
push @ok, "$prefix ${number}${test_suffix}";
}
my $re = $regexp_table{$prefix};
$re->add($number . $regexp_suffix);
}
}
}
for my $prefix (@prefixes) {
my $re = $regexp_table{$prefix};
(my $regexp = $re->re) =~ s/^\(\?(?:-xism|\^):/(?:/;
(my $table_prefix = $prefix) =~ s/^0//;
printf $fh " $table_prefix => '%s',\n", compress($regexp);
}
printf $fh table_class_footer();
close $fh;
make_test($lc_class, \@ok, \@ng);
}
sub mobile {
my @files = (qw/000200622.xls 000124110.xls 000124111.xls/);
my @prefixes = qw(070 080 090);
my %regexp_table =
map { $_ => Regexp::Assemble::Compressed->new } @prefixes;
my $re = Regexp::Assemble::Compressed->new;
for my $prefix (@prefixes) {
$re->add($prefix);
}
(my $prefix_re = $re->re) =~ s/^\(\?(?:-xism|\^):/(?:/;
my @rows_list = ();
my @cols_list = ();
my @column_values_list = ();
for my $file (@files) {
_warn($file);
$file = "$STOREDIR/$file";
http_get_file($file) or die "HTTP failed";
my($rows, $cols, $column_values) = parse_excel($file);
push @rows_list, $rows;
push @cols_list, $cols;
push @column_values_list, $column_values;
}
my $filename = "$TABLEDIR/Mobile.pm";
open my $fh, '>', $filename or die "$filename: $!";
print $fh table_class_header('Mobile');
my @ok = ();
my @ng = ();
for my $i (0 .. $#rows_list) {
my $rows = $rows_list[$i];
my $cols = $cols_list[$i];
my $column_values = $column_values_list[$i];
for my $row (sort { $a <=> $b } keys %$rows) {
for my $col (sort { $a <=> $b } keys %$cols) {
my $number = sprintf '%s%s', $rows->{$row}, $cols->{$col};
my $value = $column_values->{$row}{$col};
my $orig_number = $number;
$number =~ s/^($prefix_re)//;
my $prefix = $1;
my $test_suffix = '12345';
my $regexp_suffix = '\d{' . length($test_suffix) . '}';
_warn($orig_number . ("x" x (length $test_suffix)));
if (!defined $value || $value =~ /^(\s|-)*$/) {
push @ng, "$prefix ${number}${test_suffix}";
next;
}
else {
push @ok, "$prefix ${number}${test_suffix}";
}
my $re = $regexp_table{$prefix};
$re->add($number . $regexp_suffix);
}
}
}
for my $prefix (@prefixes) {
my $re = $regexp_table{$prefix};
(my $regexp = $re->re) =~ s/^\(\?(?:-xism|\^):/(?:/;
(my $table_prefix = $prefix) =~ s/^0//;
printf $fh " $table_prefix => '%s',\n", compress($regexp);
}
printf $fh table_class_footer();
close $fh;
make_test('mobile', \@ok, \@ng);
make_test('phs', \@ok, \@ng);
}
sub phs {
my $filename = "$TABLEDIR/Phs.pm";
open my $fh, '>', $filename or die "$filename: $!";
print $fh inherit_class('Phs', 'Mobile');
close $fh;
}
sub home {
my $class = shift;
my $lc_class = lc($class);
my $filename = "$TABLEDIR/$class.pm";
my %table = ();
my @ok = ();
my @ng = ();
my $modified;
no warnings 'uninitialized';
for my $num (1 .. 9) {
my $file = sprintf '00012407%d.xls', $num - 1;
_warn($file);
$file = "$STOREDIR/$file";
http_get_file($file) or die "HTTP failed";
$modified = 1;
my $parser = Spreadsheet::ParseExcel->new;
my $workbook = $parser->parse($file);
my $sheet = ($workbook->worksheets)[0];
my @row_range = $sheet->row_range;
for my $row ($row_range[0] .. $row_range[1]) {
my $cell = $sheet->get_cell($row, 3);
next unless defined $cell;
my $pref = $cell->value;
next unless defined $pref && $pref =~ s/^0//;
my $local_pref = $sheet->get_cell($row, 4)->value;
my $status = encode('utf-8', $sheet->get_cell($row, 6)->value);
unless ($status =~ /(?:使用中|使用予定)/) {
push @ng, sprintf '0%s %s1234', $pref, $local_pref;
next;
}
push @ok, sprintf '0%s %s1234', $pref, $local_pref;
unless (exists $table{$pref}) {
$table{$pref} = Regexp::Assemble::Compressed->new;
}
$table{$pref}->add("$local_pref\\d{4}");
_warn(sprintf "%s-%s: %s", $pref, $local_pref, $status);
}
}
return unless $modified;
open my $fh, '>', $filename or die "$filename: $!";
print $fh table_class_header($class);
for my $pref (sort { $a cmp $b } keys %table) {
(my $re = $table{$pref}->re) =~ s/^\(\?(?:-xism|\^):/(?:/;
printf $fh " %-4d => '%s',\n", $pref, compress($re);
}
print $fh table_class_footer();
close $fh;
make_test($lc_class, \@ok, \@ng);
}
sub class {
my $file = '000124104.xls';
_warn($file);
$file = "$STOREDIR/$file";
http_get_file($file) or die "HTTP failed";
my($rows, $cols, $column_values) = parse_excel($file);
my @ok = ();
my @ng = ();
my @rows1 = ();
my @rows2 = ();
my $start_class2 = 0;
for my $row (sort { $a <=> $b } keys %$rows) {
if ($start_class2) {
push @rows2, $row;
}
else {
if ($rows->{$row} !~ /^\d+$/) {
$start_class2 = 1;
next;
}
push @rows1, $row;
}
}
my $filename = "$TABLEDIR/Class1.pm";
open my $fh, '>', $filename or die "$filename: $!";
print $fh table_class_header('Class1');
for my $row (@rows1) {
for my $col (sort { $a <=> $b } keys %$cols) {
next unless length $rows->{$row};
# fixing illegal cell formats
$rows->{$row} =~ s/^0+/00/ unless $rows->{$row} =~ /^00/;
my $prefix = sprintf '%s%s', $rows->{$row}, $cols->{$col};
_warn("${prefix}xxxxxxxx");
my $value = $column_values->{$row}{$col};
if ($value =~ /^(\s|-)*$/) {
push @ng, "$prefix 12345678";
next;
}
else {
push @ok, "$prefix 12345678";
}
$prefix =~ s/^0//;
printf $fh
" %-7s => '%s', # %s\n", "'" . $prefix . "'", '\d+', $value;
}
}
print $fh table_class_footer();
close $fh;
make_test('class1', \@ok, \@ng);
@ok = ();
@ng = ();
$filename = "$TABLEDIR/Class2.pm";
open $fh, '>', $filename or die "$filename: $!";
print $fh table_class_header('Class2');
for my $row (@rows2) {
for my $col (sort { $a <=> $b } keys %$cols) {
next unless $rows->{$row};
# fixing illegal cell formats
$rows->{$row} =~ s/^0+/00/ unless $rows->{$row} =~ /^00/;
my $prefix = sprintf '%s%s', $rows->{$row}, $cols->{$col};
_warn("${prefix}xxxxxxxx");
my $value = $column_values->{$row}{$col};
if ($value =~ /^(\s|-)*$/) {
push @ng, "$prefix 12345678";
next;
}
else {
push @ok, "$prefix 12345678";
}
$prefix =~ s/^0//;
printf $fh
" %-8s => '%s', # %s\n", "'" . $prefix . "'", '\d+', $value;
}
}
print $fh table_class_footer();
close $fh;
make_test('class2', \@ok, \@ng);
}
sub upt {
my $filename = "$TABLEDIR/Upt.pm";
open my $fh, '>', $filename or die "$filename: $!";
print $fh inherit_class('Upt', 'Fmc');
close $fh;
make_test('upt', [], []);
}
sub parse_excel {
my $file = shift;
my $parser = Spreadsheet::ParseExcel->new;
my $workbook = $parser->parse($file);
my $sheet = ($workbook->worksheets)[0];
my($row_from, $row_to) = $sheet->row_range;
my($col_from, $col_to) = $sheet->col_range;
my %rows = ();
my %cols = ();
my %column_values = ();
my $start_reading = 0;
for my $row ($row_from .. $row_to) {
my $read_header = 0;
for my $col ($col_from .. $col_to) {
if ($col == 0) {
my $cell = $sheet->get_cell($row, $col);
my $value = $cell ? convert_value($cell->value) : '';
if ($start_reading) {
next unless length $value;
$rows{$row} = $value =~ /^0/ ? $value : '0' . $value;
}
else {
if ($value eq '番号') {
$read_header = 1;
$start_reading = 1;
next;
}
}
next;
}
last unless $start_reading;
my $cell = $sheet->get_cell($row, $col);
my $value = $cell ? convert_value($cell->value) : '';
$column_values{$row}{$col} = $value;
if ($read_header) {
if ($value =~ /^\d$/) {
$cols{$col} = $value;
}
next;
}
}
}
return (\%rows, \%cols, \%column_values);
}
sub http_get_file {
my $file = shift;
my $uri = basename($file);
#my($ext) = $uri =~ /\.([^\.]+)$/;
my $url = sprintf 'http://www.soumu.go.jp/main_content/%s', $uri;
_warn($url);
my $res = LWP::Simple::mirror($url, $file);
return 1 if $res == 200 || $res == 304;
_warn("fail to get new file: $file ($res)");
return;
}
sub compress { # makes regexp more compressed
my $regexp = shift;
$regexp =~ s{((?:\\d(?!\{)){2,})}{
my $len = length($1) / 2;
sprintf("\\d{%d}", $len);
}eg;
$regexp =~ s{((?:\\d)*)((?:\\d\{\d+\})+)((?:\\d(?!\{))*)}{
my($prefix, $match_times, $postfix) = ($1, $2, $3);
my $total = 0;
my @times = $match_times =~ m{\\d\{(\d+)\}}g;
$total += $_ for @times;
$total += length($prefix) / 2 if $prefix;
$total += length($postfix) / 2 if $postfix;
sprintf("\\d{%d}", $total);
}eg;
return $regexp;
}
sub convert_value {
my $cell_value = shift;
$cell_value =~ tr/\x{3000}\x{FF01}-\x{FF5E}/\x20\x21-\x7E/;
$cell_value =~ tr/\x{201D}\x{2019}\x{FFE5}\x{2018}\x{301C}/"'\\`~/;
$cell_value =~ tr/\x{2010}-\x{2015}\x{2212}/\-\-\-\-\-\-\-/;
return encode('utf-8', $cell_value);
}
sub table_class_header {
my $name = shift;
my $desc_pref = $name eq 'Home' ? 'Area-Pref' : 'Pref';
my $desc_regexp = $name eq 'Home' ? 'Local-Pref-Regex' : 'Assoc-Pref-Regex';
return sprintf <<'END', $name, $VERSION, $UPDATED, $desc_pref, $desc_regexp;
package Number::Phone::JP::Table::%s;
use strict;
use warnings;
our $VERSION = '%s';
# Table last modified: %s
our %%TEL_TABLE = (
# %s => q<%s>,
END
;
}
sub table_class_footer {
return <<'END';
);
1;
__END__
END
;
}
sub inherit_class {
my($name, $parent) = @_;
return sprintf <<'END', $name, $parent, $VERSION, $parent;
package Number::Phone::JP::Table::%s;
use strict;
use warnings;
require Number::Phone::JP::Table::%s;
our $VERSION = '%s';
no warnings 'once';
our %%TEL_TABLE = %%Number::Phone::JP::Table::%s::TEL_TABLE;
1;
__END__
END
;
}
sub make_test {
my($name, $ok, $ng) = @_;
my $testfile = "$TESTDIR/$name.t";
open my $t, '>', $testfile or die "$testfile: $!";
print $t "use strict;\n";
printf $t
"use Test::More tests => %d;\n\n", scalar(@$ok) + scalar(@$ng) + 1;
print $t "use_ok('Number::Phone::JP', '$name');\n\n";
print $t "my \$tel = Number::Phone::JP->new;\n";
for my $test (@$ok) {
printf $t test_ok($test);
}
for my $test (@$ng) {
printf $t test_ng($test);
}
close $t;
}
sub test_ok {
my $ok = shift;
return sprintf "ok(\$tel->set_number('%s')->is_valid_number, " .
"'checking for %s');\n", $ok, $ok;
}
sub test_ng {
my $ng = shift;
return sprintf "ok(!\$tel->set_number('%s')->is_valid_number, " .
"'checking for %s');\n", $ng, $ng;
}
sub today {
my $self = shift;
my @lt = localtime();
return sprintf '%d-%02d-%02d', $lt[5] + 1900, $lt[4] + 1, $lt[3];
}
sub _warn {
return unless $DEBUG;
warn(map { "$_\n" } @_);
}
sub updated_on {
my($ref, $name, $value) = @_;
unless ($value =~ /^\d{4}-\d\d-\d\d$/) {
die qq{$name option is assumed to have the format "YYYY-MM-DD"\n};
}
$$ref = $value;
}
__DATA__
Usage: regexp-table-maker.pl [OPTION]...
options:
-d, --date=YYYY-MM-DD specifies the date of updated the tables.
it'll be used for $VERSION of each classes.
-v, --verbose verbose mode.
causes to print debugging messages about its
progress.
you can also turn on the feature using DEBUG
environment variable.