The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package HTTP::DOM;
use warnings;
use strict;
use base 'Object';
use Array;
use HTML::TreeBuilder::XPath;
use HTML::Selector::XPath qw(selector_to_xpath);

=head1 DESCRIPTION
A mini implement of Web::Query, but all method return Array object, no matter wantarray or no.
I only want something work with web_get, so modify-relate methods all deleted.
=cut

sub new {
    my ( $pkg, $html ) = @_;
    my $tree = HTML::TreeBuilder::XPath->new;
    $tree->ignore_unknown(0);
    $tree->store_comments(1);
    $tree->parse_content($html);
    my $self = $pkg->new_from_element( Array->new( $tree->guts ) );
    $self->{need_delete}++;
    return $self;
}

sub new_from_element {
    my $class = shift;
    my $trees = ref $_[0] eq 'Array' ? $_[0] : Array->new( $_[0] );
    return bless { trees => $trees, before => $_[1] }, ref($class) || $class;
}

sub end {
    my $self = shift;
    return $self->{before};
}

sub size {
    my $self = shift;
    return $self->{trees}->size;
}

sub parent {
    my $self = shift;
    my $new  = Array->new;
    $self->{trees}->each(
        sub {
            $new->push( shift->getParentNode() );
        }
    );
    return ( ref $self || $self )->new_from_element( $new, $self );
}

sub first {
    my $self = shift;
    return ( ref $self || $self )
      ->new_from_element( Array->new( $self->{trees}->get(0) || () ), $self );
}

sub last {
    my $self = shift;
    return ( ref $self || $self )
      ->new_from_element( Array->new( $self->{trees}->get(-1) || () ), $self );
}

=head2 find
    my $dom = HTTP::DOM->new($content);
    print $dom->find('#m')->text->get(0);
=cut
sub find {
    my ( $self, $selector ) = @_;
    my $xpath_rootless = selector_to_xpath($selector);

    my $new = Array->new;
    $self->{trees}->each(
        sub {
            my $tree = shift;
            $new->push($tree)
              if defined $tree->parent && $tree->matches($xpath_rootless);
            $new->push(
                $tree->findnodes(
                    selector_to_xpath(
                        $selector, root => defined $tree->parent ? './' : '/'
                    )
                )
            );
        }
    );

    return ( ref $self || $self )->new_from_element( $new, $self );
}

=head2 html
=cut
sub html {
    my $self = shift;
    my $html = Array->new;
    $self->{trees}->each(
        sub {
            $html->push( shift->as_HTML );
        }
    );
    return $html->join("\n");
}

sub xml {
    my $self = shift;
    my $html = Array->new;
    $self->{trees}->each(
        sub {
            $html->push( shift->as_XML_indented );
        }
    );
    return $html->join("\n");
}

=head2 text
    my $dom = HTTP::DOM->new($content);
    print $dom->text->get(0);
=cut
sub text {
    my $self = shift;
    my $text = Array->new;
    $self->{trees}->each(
        sub {
            $text->push( shift->as_text );
        }
    );
    return $text->join("\n");
}

=head2 attr
    my $url = 'http://www.baidu.com/';
    my $hc = HTTP::Client->new;
    my $content = $hc->web_get($url);
    my $dom = HTTP::DOM->new($content);
    print $dom->find('meta')->attr('content')->get(0);
=cut
sub attr {
    my $self      = shift;
    my @attr_keys = @_;
    my $retval    = Array->new;
    $self->{trees}->each(
        sub {
            $retval->push( shift->attr(@attr_keys) );
        }
    );
    return $retval->join("\n");
}

sub DESTROY {
    if ( $_[0]->{need_delete} ) {
        $_->delete for @{ $_[0]->{trees} };
    }
}

1;