The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package ParseUtil::Domain;


## no critic
our $VERSION = '2.28';
$VERSION = eval $VERSION;
## use critic

use perl5i::2;

use Perl6::Export::Attrs;
use ParseUtil::Domain::ConfigData;
use Net::IDN::Encode ':all';
use Net::IDN::Punycode ':all';
use Net::IDN::Nameprep;

#use Smart::Comments;

func parse_domain($name) :Export(:parse) {
    $name =~ s/\s//gs;
    open my $utf8h, "<", \$name;
    my $utf8_name = do { local $/; <$utf8h>; };
    $utf8h->close;
    my @name_segments = $utf8_name->split(qr{\Q@\E});
    ### namesegments : Dump(\@name_segments)

    my @segments = $name_segments[-1]->split(qr/[\.\x{FF0E}\x{3002}\x{FF61}]/);
    ### executing with : $name
    my ( $zone, $zone_ace, $domain_segments ) =
      _find_zone( \@segments )->slice(qw/zone zone_ace domain/);

    ### found zone : $zone
    ### found zone_ace : $zone_ace

    my $puny_processed = _punycode_segments( $domain_segments, $zone );
    @{$puny_processed}{qw/zone zone_ace/} = ( $zone, $zone_ace );

    # process .name "email" domains
    if ( @name_segments > 1 ) {
        my $punycoded_name = _punycode_segments( [ $name_segments[0] ], $zone );
        my ( $name_domain, $name_ace ) =
          $punycoded_name->slice(qw/domain domain_ace/);

        $puny_processed->{domain} =
          [ $name_domain, $puny_processed->{domain} ]->join('@');
        if ($name_ace) {
            $puny_processed->{domain_ace} =
              [ $name_ace, $puny_processed->{domain_ace} ]->join('@');

        }
    }
    return $puny_processed;

}

func puny_convert($domain) :Export(:simple) {
    my @keys;
    given ($domain) {
        when (/\.?xn--/) {
            @keys = qw/domain zone/;
        }
        default {
            @keys = qw/domain_ace zone_ace/;
        }
    }
    my $parsed        = parse_domain($domain);
    my $parsed_domain = $parsed->slice(@keys)->join(".");

    return $parsed_domain;
}

func _find_zone($domain_segments) {

    my $tld_regex = ParseUtil::Domain::ConfigData->config('tld_regex');
    my $tld       = $domain_segments->pop;
    my $sld       = $domain_segments->pop;
    my $thld      = $domain_segments->pop;

    my ( $possible_tld, $possible_thld );
    my ( $sld_zone_ace, $tld_zone_ace ) =
      map { domain_to_ascii( nameprep $_) } $sld, $tld;
    my $thld_zone_ace;
    $thld_zone_ace = domain_to_ascii( nameprep $thld) if $thld;
    if ( $tld =~ /^de$/ ) {
        ### is a de domain
        $possible_tld = join "." => $tld, _puny_encode($sld);
    }
    else {
        $possible_tld = join "." => $tld_zone_ace, $sld_zone_ace;
        $possible_thld = join "." => $possible_tld,
          $thld_zone_ace
          if $thld_zone_ace;
    }
    my ( $zone, @zone_params );

    if ( $possible_thld and $possible_thld =~ /\A$tld_regex\z/ ) {
        my $zone_ace = join "." => $thld_zone_ace, $sld_zone_ace, $tld_zone_ace;
        $zone = join "." => $thld, $sld, $tld;
        push @zone_params, zone_ace => $zone_ace;
    }
    elsif ( $possible_tld =~ /\A$tld_regex\z/ ) {
        push @{$domain_segments}, $thld;
        my $zone_ace = join "." => $sld_zone_ace, $tld_zone_ace;
        $zone = join "." => $sld, $tld;
        push @zone_params, zone_ace => $zone_ace;
    }
    elsif ( $tld_zone_ace =~ /\A$tld_regex\z/ ) {
        push @{$domain_segments}, $thld if $thld;
        push @{$domain_segments}, $sld;
        push @zone_params, zone_ace => $tld_zone_ace;
        $zone = $tld;
    }
    croak "Could not find tld." unless $zone;
    my $unicode_zone = domain_to_unicode($zone);
    return {
        zone   => $unicode_zone,
        domain => $domain_segments,
        @zone_params
    };
}

func _punycode_segments( $domain_segments, $zone ) {
    if ( not $zone or $zone !~ /^(?:de|fr|pm|re|tf|wf|yt)$/ ) {
        my $puny_encoded = [];
        foreach my $segment ( @{$domain_segments} ) {
            croak
              "Error processing domain. Please report to package maintainer."
              if not $segment
              or $segment eq '';
            my $nameprepped = nameprep( lc $segment );
            my $ascii       = domain_to_ascii($nameprepped);
            push @{$puny_encoded}, $ascii;
        }
        my $puny_decoded = [ map { domain_to_unicode($_) } @{$puny_encoded} ];
        croak "Undefined mapping!"
          if $puny_decoded->any( sub { lc $_ ne nameprep( lc $_ ) } );
        return {
            domain     => $puny_decoded->join("."),
            domain_ace => $puny_encoded->join(".")
        };
    }

    # Avoid nameprep step for certain tlds
    my $puny_encoded = [ map { _puny_encode( lc $_ ) } @{$domain_segments} ];
    my $puny_decoded = [ map { _puny_decode($_) } @{$puny_encoded} ];
    return {
        domain     => $puny_decoded->join("."),
        domain_ace => $puny_encoded->join(".")
    };
}

func _puny_encode($unencoded) {

    ### encoding : $unencoded
    # quick check to make sure that domain should be decoded
    my $temp_unencoded = nameprep $unencoded;
    ### namepreped : $temp_unencoded
    my $test_encode = domain_to_ascii($temp_unencoded);
    return $unencoded if $test_encode eq $unencoded;
    return "xn--" . encode_punycode($unencoded);
}

func _puny_decode($encoded) {
    return $encoded
      unless $encoded =~ /xn--/;
    $encoded =~ s/^xn--//;
    ### decoding : $encoded
    my $test_decode = decode_punycode($encoded);
    ### test decode : $test_decode
    return $encoded if $encoded eq $test_decode;
    return decode_punycode($encoded);

}


__END__


=head1 NAME

=encoding utf8

ParseUtil::Domain - Domain parser and puny encoder/decoder.

=head1 SYNOPSIS

  use ParseUtil::Domain ':parse';

    my $processed = parse_domain("somedomain.com");
    #$processed:
    #{
        #domain => 'somedomain',
        #domain_ace => 'somedomain',
        #zone => 'com',
        #zone_ace => 'com'
    #}


=head1 DESCRIPTION


This purpose of this module is to parse a domain name into its respective name and tld. Note that
the I<tld> may actually refer to a second- or third-level domain, e.g. co.uk or
plc.co.im.  It also provides respective puny encoded and decoded versions of
the parsed domain.

This module uses TLD data from the L<Public Suffix List|http://publicsuffix.org/list/> which is included with this
distribution.


=head1 INTERFACE


=head2 parse_domain


=over 2

=item
parse_domain(string)


=over 3

=item
Examples:


   1. parse_domain('somedomain.com');

    Result:
    {
        domain     => 'somedomain',
        zone       => 'com',
        domain_ace => 'somedomain',
        zone_ace   => 'com'
    }

  2. parse_domain('test.xn--o3cw4h');

    Result:
    {
        domain     => 'test',
        zone       => 'ไทย',
        domain_ace => 'test',
        zone_ace   => 'xn--o3cw4h'
    }

  3. parse_domain('bloß.co.at');

    Result:
    {
        domain     => 'bloss',
        zone       => 'co.at',
        domain_ace => 'bloss',
        zone_ace   => 'co.at'
    }

  4. parse_domain('bloß.de');

    Result:
    {
        domain     => 'bloß',
        zone       => 'de',
        domain_ace => 'xn--blo-7ka',
        zone_ace   => 'de'
    }

=back



=back

=head2 puny_convert

Toggles a domain between puny encoded and decoded versions.


   use ParseUtil::Domain ':simple';

   my $result = puny_convert('bloß.de');
   # $result: xn--blo-7ka.de

   my $reverse = puny_convert('xn--blo-7ka.de');
   # $reverse: bloß.de






=head1 DEPENDENCIES

=over 3


=item
L<Net::IDN::Encode>


=item
L<Net::IDN::Punycode>


=item
L<Regexp::Assemble::Compressed>


=item
The L<Public Suffix List|http://publicsuffix.org/list/>.


=back


=head1 CHANGES


=over 3


=item *
Updated with latest version of the public suffix list.

=item *
Added a bunch of new TLDs (nTLDs).

=item *
Now uses L<perl5i>.


=back