The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package HTML::Linear::Element;
# ABSTRACT: represent elements to populate HTML::Linear
use strict;
use utf8;
use warnings qw(all);

use Digest::SHA;
use Encode;
use List::Util qw(sum);
use Moo;
use MooX::Types::MooseLike::Base qw(:all);

use HTML::Linear::Path;

## no critic (ProtectPrivateSubs)

our $VERSION = '0.019'; # VERSION


has attributes  => (is => 'rw', isa => HashRef[Str], default => sub { {} });
has content     => (is => 'rw', isa => Str, default => sub { '' });
has depth       => (is => 'ro', isa => Int, required => 1);
has index       => (is => 'rw', isa => Int, default => sub { 0 });
has index_map   => (is => 'rw', isa => HashRef[Str], default => sub { {} });
has key         => (is => 'rw', isa => Str, default => sub { '' });
has path        => (is => 'ro', isa => ArrayRef[InstanceOf('HTML::Linear::Path')], required => 1);
has sha         => (is => 'ro', isa => InstanceOf('Digest::SHA'), default => sub { Digest::SHA->new(256) }, lazy => 1 );
has strict      => (is => 'ro', isa => Bool, default => sub { 0 });
has trim_at     => (is => 'rw', isa => Int, default => sub { 0 });

use overload '""' => \&as_string, fallback => 1;


sub BUILD {
    my ($self) = @_;
    $self->attributes({%{$self->path->[-1]->attributes}});
    return;
}


sub as_string {
    my ($self) = @_;
    return $self->key if $self->key;

    my $content = $self->content;
    Encode::_utf8_off($content);
    $self->sha->add($content);

    $self->sha->add($self->index);
    $self->sha->add(join ',', $self->path);

    return $self->key($self->sha->b64digest);
}


sub as_xpath {
    my ($self) = @_;
    my @xpath = map {
        $_->as_xpath . ($self->index_map->{$_->address} // '')
    } @{$self->path} [$self->trim_at .. $#{$self->path}];
    $self->trim_at and unshift @xpath, HTML::Linear::Path::_wrap(separator => '/');
    return wantarray
        ? @xpath
        : join '', @xpath;
}


sub as_hash {
    my ($self) = @_;
    my $hash = {};
    my $xpath = $self->as_xpath . HTML::Linear::Path::_wrap(separator => '/');

    for my $key (sort keys %{$self->attributes}) {
        $hash->{
            $xpath
            . HTML::Linear::Path::_wrap(sigil       => '@')
            . HTML::Linear::Path::_wrap(attribute   => $key)
        } = $self->attributes->{$key}
            if
                $self->strict
                or not HTML::Linear::Path::_isgroup($self->path->[-1]->tag, $key);
    }

    $hash->{
        $xpath
        . HTML::Linear::Path::_wrap(attribute => 'text()')
    } = $self->content
        unless $self->content =~ m{^\s*$}sx;

    return $hash;
}


sub weight {
    my ($self) = @_;
    return sum map { $_->weight } @{$self->path};
}

1;

__END__

=pod

=encoding UTF-8

=head1 NAME

HTML::Linear::Element - represent elements to populate HTML::Linear

=head1 VERSION

version 0.019

=head1 SYNOPSIS

    use HTML::Linear::Element;
    use HTML::Linear::Path;

    my $el = HTML::Linear::Element->new({
        depth   => 0,
        path    => [ HTML::Linear::Path->new({ address => q(...), tag => q(...) }) ],
    })

=head1 ATTRIBUTES

=head2 attributes

Element attributes.

=head2 content

Element content.

=head2 depth

Depth level of an element inside a L<HTML::TreeBuilder> structure.

=head2 index

Index to preserve elements order.

=head2 index_map

Used for internal collision detection.

=head2 key

Stringified element representation.

=head2 path

Store representations of paths inside C<HTML::TreeBuilder> structure (L<HTML::Linear::Path>).

=head2 sha

Lazy L<Digest::SHA> (256-bit) representation.

=head2 strict

Strict mode disables grouping by tags/attributes listed in L<HTML::Linear::Path/%HTML::Linear::Path::groupby>.

=head2 trim_at

XPath seems to be unique after that level.

=head1 METHODS

=head2 as_string

Stringified signature of an element.

=head2 as_xpath

Build a nice XPath representation of a path inside the L<HTML::TreeBuilder> structure.

Returns string in scalar context or XPath segments in list context.

=head2 as_hash

Linearize element as an associative array (Perl hash).

=head2 weight

Return XPath weight.

=for Pod::Coverage BUILD

=head1 AUTHOR

Stanislaw Pusep <stas@sysd.org>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2014 by Stanislaw Pusep.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut