The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package    # hide from PAUSE
    ModuleGenerator;

use v5.22;

use strict;
use warnings;
use feature qw( postderef signatures );
use namespace::autoclean;
use autodie;

use Data::Dumper::Concise qw( Dumper );
use JSON::MaybeXS qw( decode_json );
use List::AllUtils qw( max );
use ModuleGenerator::Locale;
use Locale::Language
    qw( language_code2code LOCALE_LANG_ALPHA_2 LOCALE_LANG_ALPHA_3 );
use Parse::PMFile;
use Path::Class qw( file );
use Path::Class::Rule;
use Scalar::Util qw( reftype );
use Text::Template;

use Moose;
use MooseX::Types::Moose qw( ArrayRef Bool Num Str );
use MooseX::Types::Path::Class qw( Dir File );

## no critic (TestingAndDebugging::ProhibitNoWarnings)
no warnings qw( experimental::postderef experimental::signatures );
## use critic

with 'MooseX::Getopt::Dashes';

our $VERSION = '0.10';

has _autogen_warning => (
    is      => 'ro',
    isa     => Str,
    lazy    => 1,
    builder => '_build_autogen_warning',
);

has _script => (
    is      => 'ro',
    isa     => File,
    lazy    => 1,
    builder => '_build_script',
);

has _source_data_root => (
    is      => 'ro',
    isa     => Dir,
    lazy    => 1,
    builder => '_build_source_data_root',
);

has _locale_codes => (
    is      => 'ro',
    isa     => ArrayRef [Str],
    lazy    => 1,
    builder => '_build_locale_codes',
);

has _locales => (
    is      => 'ro',
    isa     => ArrayRef ['ModuleGenerator::Locale'],
    lazy    => 1,
    builder => '_build_locales',
);

sub run ($self) {
    $self->_clean_old_data;
    $self->_locales;
    $self->_write_data_pm;
    $self->_write_catalog_pm;
    $self->_write_pod_files;

    return 0;
}

sub _clean_old_data ($self) {
    my $pir  = Path::Class::Rule->new;
    my $iter = $pir->file->name(qr/\.pod$/)->iter('lib');
    while ( my $path = $iter->() ) {
        ## no critic (InputOutput::RequireCheckedSyscalls)
        say 'Removing ', $path->basename;
        $path->remove;
    }
}

sub _build_locales ($self) {
    my @locales;
    for my $code ( $self->_locale_codes->@* ) {
        my $locale = ModuleGenerator::Locale->instance(
            code             => $code,
            source_data_root => $self->_source_data_root,
        );

        ## no critic (InputOutput::RequireCheckedSyscalls)
        say $locale->code;
        say $_ for $locale->source_files;
        print "\n";
        ## use critic

        push @locales, $locale;
    }

    return \@locales;
}

sub _write_data_pm ($self) {
    my $data_pm_file = file(qw( lib DateTime Locale Data.pm ));
    ## no critic (InputOutput::RequireCheckedSyscalls)
    say "Generating $data_pm_file";
    ## use critic
    my $data_pm = $data_pm_file->slurp( iomode => '<:encoding(UTF-8)' );

    $self->_insert_autogen_warning( \$data_pm );

    my %codes;
    my %names;
    my %native_names;
    my %raw_locales;
    for my $locale ( $self->_locales->@* ) {
        $codes{ $locale->code }               = 1;
        $names{ $locale->en_name }            = 1;
        $native_names{ $locale->native_name } = 1;
        $raw_locales{ $locale->code }         = $locale->data_hash;
    }

    $self->_insert_var_in_code(
        'CLDRVersion',
        $self->_locales->[0]->version, 1, \$data_pm
    );

    $self->_insert_var_in_code( 'Codes',       \%codes,        1, \$data_pm );
    $self->_insert_var_in_code( 'Names',       \%names,        1, \$data_pm );
    $self->_insert_var_in_code( 'NativeNames', \%native_names, 1, \$data_pm );

    $self->_insert_var_in_code(
        'ISO639Aliases',
        $self->_iso_639_aliases, 1, \$data_pm
    );

    # These are some of the world's top languages by speakers plus a few
    # locales where I think there are lots of Perl people.
    my %preload = map { $_ => delete $raw_locales{$_} }
        qw( ar en en-CA en-US es fr-FR hi ja-JP pt-BR zh-Hans-CN zh-Hant-TW );

    $self->_insert_var_in_code( 'LocaleData', \%preload, 0, \$data_pm );

    my $pos = 0;
    my %index;
    my $data_section = q{};
    for my $code ( sort keys %raw_locales ) {
        my $marker = "__[ $code ]__\n";
        $data_section .= $marker;
        $pos += length $marker;

        my $start_pos = $pos;

        my $dumped = $self->_dump_with_unicode( $raw_locales{$code} );
        $data_section .= $dumped;
        $pos += length $dumped;

        $index{$code} = [ $start_pos => $pos - $start_pos ];
    }

    $self->_insert_var_in_code( 'DataSectionIndex', \%index, 0, \$data_pm );

    $data_pm =~ s/(__DATA__\n).*(__END__\n)/$1$data_section\n$2/s
        or die 'data section subst failed';

    $data_pm_file->spew( iomode => '>:encoding(UTF-8)', $data_pm );

    return;
}

sub _iso_639_aliases ($self) {
    my %aliases;
    for my $locale ( $self->_locales->@* ) {
        my $three = language_code2code(
            $locale->language_code,
            LOCALE_LANG_ALPHA_2, LOCALE_LANG_ALPHA_3
        ) or next;

        my $full_three_code = join '-',
            grep {defined} (
            $three,
            $locale->script_code,
            $locale->territory_code,
            $locale->variant_code
            );

        $aliases{$full_three_code} = $locale->code;
    }
    return \%aliases;
}

sub _write_catalog_pm ($self) {
    my $catalog_pm_file = file(qw( lib DateTime Locale Catalog.pm ));
    ## no critic (InputOutput::RequireCheckedSyscalls)
    say "Generating $catalog_pm_file";
    ## use critic
    my $catalog_pm = $catalog_pm_file->slurp( iomode => '<:encoding(UTF-8)' );

    my $max_code = max map { length $_->code } $self->_locales->@*;
    $max_code += 3;
    my $max_en_name = max map { length $_->en_name } $self->_locales->@*;
    $max_en_name += 3;
    my $max_native_name
        = max map { length $_->native_name } $self->_locales->@*;
    $max_native_name += 3;

    my $locale_list = sprintf(
        " %-${max_code}s%-${max_en_name}s%-${max_native_name}s\n",
        'Locale code', 'Locale name (in English)', 'Native locale name'
    );
    $locale_list
        .= q{ } . '=' x ( $max_code + $max_en_name + $max_native_name );
    $locale_list .= "\n";

    for my $locale ( sort { $a->code cmp $b->code } $self->_locales->@* ) {
        $locale_list .= sprintf(
            " %-${max_code}s%-${max_en_name}s%-${max_native_name}s\n",
            $locale->code, $locale->en_name, $locale->native_name,
        );
    }
    $locale_list .= "\n";

    $locale_list =~ s/ +$//mg;

    $catalog_pm =~ s/(^=for :locales\n\n).+^(?==)/$1$locale_list/ms
        or die 'locale list subst failed';

    $catalog_pm_file->spew( iomode => '>:encoding(UTF-8)', $catalog_pm );
}

sub _insert_var_in_code ( $self, $name, $value, $public, $code ) {
    my $sigil
        = !ref $value              ? '$'
        : reftype $value eq 'HASH' ? '%'
        :                            '@';

    my $safe;
    if ( ref $value ) {
        $safe = $self->_dump_with_unicode($value);
        $safe =~ s/^[\{\[]/(/;
        $safe =~ s/[\}\]]\n$/)/;
    }
    else {
        $safe = $value = is_Num($value) ? $value : B::perlstring($value);
    }

    my $declarator = $public ? 'our' : 'my';
    ${$code} =~ s/
        (\#<<<\n
         \#\#\#\Q :start $name:\E\n)
        .*
        (\#\#\#\Q :end $name:\E\n
         \#>>>\n)
    /$1$declarator $sigil$name = $safe;\n$2/xs
        or die "inserting $name failed";

    return;
}

# Data::Dumper dumps all Unicode characters using Perl's \x{feedad0g}
# syntax. If the character is in the 0x80-0xFF range, then Perl will not treat
# this as a UTF-8 char when it sees it (either at compile or eval time). We
# force it to use UTF-8 by replacing \x{feedad0g} with \N{U+feedad0g}, which
# is always interpreted as UTF-8.
sub _dump_with_unicode ( $self, $val ) {
    my $dumped = Dumper($val);
    $dumped =~ s/\\x\{([^}]+)\}/$self->_unicode_char_for($1)/eg;
    return $dumped;
}

sub _unicode_char_for ( $, $hex ) {
    ## no critic (BuiltinFunctions::ProhibitStringyEval)
    my $num = eval '0x' . $hex;
    die $@ if $@;
    return '\N{U+' . sprintf( '%04x', $num ) . '}';
}

sub _insert_autogen_warning ( $self, $code ) {
    ${$code} =~ s/(?:^###+$).+(?:^###+$)\n+//ms;
    ${$code} =~ s/^/$self->_autogen_warning/e;
    return;
}

sub _build_autogen_warning ($self) {
    my $script = $self->_script->basename;

    return <<"EOF";
###########################################################################
#
# This file is partially auto-generated by the DateTime::Locale generator
# tools (v$VERSION). This code generator comes with the DateTime::Locale
# distribution in the tools/ directory, and is called $script.
#
# This file was generated from the CLDR JSON locale data. See the LICENSE.cldr
# file included in this distribution for license details.
#
# Do not edit this file directly unless you are sure the part you are editing
# is not created by the generator.
#
###########################################################################

EOF
}

sub _write_pod_files ($self) {
    my $template = Text::Template->new(
        TYPE   => 'FILE',
        SOURCE => file(qw( tools templates locale.pod ))->stringify,
    ) or die $Text::Template::ERROR;

    use lib 'lib';
    require DateTime;
    require DateTime::Locale;

    my @example_dts = (
        DateTime->new(
            year      => 2008,
            month     => 2,
            day       => 5,
            hour      => 18,
            minute    => 30,
            second    => 30,
            time_zone => 'UTC',
        ),
        DateTime->new(
            year      => 1995,
            month     => 12,
            day       => 22,
            hour      => 9,
            minute    => 5,
            second    => 2,
            time_zone => 'UTC',
        ),
        DateTime->new(
            year      => -10,
            month     => 9,
            day       => 15,
            hour      => 4,
            minute    => 44,
            second    => 23,
            time_zone => 'UTC',
        ),
    );

    for my $code ( DateTime::Locale->codes ) {
        my $underscore = $code =~ s/-/_/gr;

        my $pod_file
            = file( qw( lib DateTime Locale ), $underscore . '.pod' );
        ## no critic (InputOutput::RequireCheckedSyscalls)
        say "Generating $pod_file";
        ## use critic

        my $locale = DateTime::Locale->load($code)
            or die "Cannot load $code";

        my $filled = $template->fill_in(
            HASH => {
                autogen_warning => $self->_autogen_warning,
                name            => 'DateTime::Locale::' . $underscore,
                description => "Locale data examples for the $code locale.",
                example_dts => \@example_dts,
                locale      => \$locale,
            },
        ) or die $Text::Template::ERROR;

        $pod_file->spew( iomode => '>:encoding(UTF-8)', $filled );
    }

    return;
}

sub _build_script {
    return file($0);
}

sub _build_source_data_root ($self) {
    return $self->_script->parent->parent->subdir('source-data');
}

sub _build_locale_codes ($self) {
    my $avail
        = decode_json(
        $self->_source_data_root->file(qw( cldr-core availableLocales.json ))
            ->slurp( iomode => '<:raw' ) );
    my $default
        = decode_json(
        $self->_source_data_root->file(qw( cldr-core defaultContent.json ))
            ->slurp( iomode => '<:raw' ) );

    return [
        $avail->{availableLocales}{full}->@*,
        $default->{defaultContent}->@*
    ];
}

__PACKAGE__->meta->make_immutable;

1;