The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package HTML::TreeBuilder::LibXML::Node;
use strict;
use warnings;
use Carp();

sub new {
    my ($class, $node) = @_;
    Carp::croak 'missing node' unless $node;
    bless {node => $node}, $class;
}

sub attr {
    my ($self, $key, $value) = @_;
    if (@_ == 3) {
        if (defined $value) {
            $self->{node}->setAttribute (lc $key, $value);
        } else {
            $self->{node}->removeAttribute(lc $key);
        }
    } elsif (@_ == 2 and lc $key eq 'text') {
        return $self->{node}->textContent;
    }
    $self->{node}->getAttribute(lc $key);
}

sub isTextNode {
    my $self = shift;
    $self->{node}->isa('XML::LibXML::Text');
}

# The analog of HTML::TreeBuilder::XPath::getValue for comment nodes
*getValue = \&as_text;

sub string_value {
    $_[0]->{node}->textContent;
}

sub as_text {
    $_[0]->{node}->textContent;
}

sub as_trimmed_text {
    my $text = shift->as_text(@_);
    $text =~ s/[\n\r\f\t ]+$//s;
    $text =~ s/^[\n\r\f\t ]+//s;
    $text =~ s/[\n\r\f\t ]+/ /g;
    return $text;
}
sub as_text_trimmed { shift->as_trimmed_text(@_) } # alias

sub objectify_text { }
sub deobjectify_text { }

sub as_XML {
    $_[0]->{node}->toString;
}

sub as_HTML {
    return $_[0]->{node}->toStringHTML if $_[0]->{node}->can('toStringHTML'); # best method, but only document nodes can toStringHTML()
    
    # second best is to call toStringC14N(1), which generates valid HTML (eg. no auto closed <div/>), 
    # but dies on some cases with "Failed to convert doc to string in doc->toStringC14N" error.
    # so we fallback to toString()
    {
        local $@; # protect existing $@
        my $output = eval { $_[0]->{node}->toStringC14N(1) };
        return $@ ? $_[0]->{node}->toString : $output;
    }
}

sub tag {
    $_[0]->{node}->localname
}

sub id {
    if (@_==2) {
        # setter
        if (defined $_[1]) {
            $_[0]->{node}->setAttribute('id', $_[1]);
        } else {
            $_[0]->{node}->removeAttribute('id');
        }
    } else {
        $_[0]->{node}->getAttribute('id');
    }
}

# hack for Web::Scraper
sub isa {
    my ($self, $klass) = @_;
    $klass eq 'HTML::Element' ? 1 : UNIVERSAL::isa($self, $klass);
}

sub exists {
    my( $self , $xpath ) = @_;

    $self->_eof_or_die unless $self->{node};
    my @nodes = $self->{node}->findnodes( $xpath );
    return scalar( @nodes ) ? 1 : 0;
}

sub find {
    my( $self , $elem ) = @_;

    $self->_eof_or_die unless $self->{node};

    my @nodes = $self->{node}->getElementsByTagName( $elem );
    @nodes = map { HTML::TreeBuilder::LibXML::Node->new( $_ ) } @nodes;

    wantarray ? @nodes : \@nodes;
}

sub findnodes {
    my ($self, $xpath) = @_;

    $self->_eof_or_die unless $self->{node};
    my @nodes = $self->{node}->findnodes( $xpath );
    @nodes = map { HTML::TreeBuilder::LibXML::Node->new($_) } @nodes;
    wantarray ? @nodes : \@nodes;
}

*findnodes_as_string  = \&findvalue;
*findnodes_as_strings = \&findvalues;

sub findnodes_filter {
    my( $self , $xpath , $callback ) = @_;

    Carp::croak "Second argument must be coderef"
          unless $callback and ref $callback eq 'CODE';

    my @nodes = $self->findnodes( $xpath );
    @nodes = grep { $callback->($_) } @nodes;

    wantarray ? @nodes : \@nodes;
}

sub findvalue {
    my ($self, $xpath) = @_;

    $self->_eof_or_die unless $self->{node};
    $self->{node}->findvalue( $xpath );
}

sub findvalues {
    my( $self , $xpath ) = @_;

    $self->_eof_or_die unless $self->{node};
    my $nodes = $self->{node}->find( $xpath );
    my @nodes = map { $_->textContent } $nodes->get_nodelist;
    wantarray ? @nodes : \@nodes;
}

sub clone {
    my ($self, ) = @_;

    my $orignode = $self->{node};
    my $origdoc = $orignode->ownerDocument;

    my $node = $orignode->cloneNode(1);
    my $doc = XML::LibXML::Document->new($origdoc->version, $origdoc->encoding);
    
    if ($node->isa('XML::LibXML::Dtd')) {
        $doc->createInternalSubset( $node->getName, $node->publicId, $node->systemId );
        $node = $doc->internalSubset;        
    } elsif ($node->isa('XML::LibXML::Element')) {
        $doc->setDocumentElement($node);    
    } else {
        $doc->adoptNode($node);
    }
    
    my $cloned = __PACKAGE__->new($node);
    return $cloned;
}

sub clone_list {
    my $class = shift;
    my @clones = map { $_->clone } @_;
    @clones;    
}

sub detach {
    my $self = shift;
    my $parent = $self->parent;
    #$self->{node}->unbindNode();   
    my $doc = XML::LibXML->createDocument;
    $doc->adoptNode($self->{node});
    $doc->setDocumentElement($self->{node});  
    $parent;
}

sub delete {
    my $self = shift;
    $self->{node}->unbindNode();
}

sub delete_content {
    my ($self) = @_;
    $self->{node}->removeChildNodes;
}

sub getFirstChild {
    my $self = shift;
    __PACKAGE__->new($self->{node}->getFirstChild);
}

sub childNodes {
    my $self = shift;

    $self->_eof_or_die unless $self->{node};
    my @nodes = $self->{node}->childNodes;
    @nodes = map { __PACKAGE__->new($_) } @nodes;
    wantarray ? @nodes : \@nodes;
}

sub content_list {
    my ($self) = @_;
    my @nodes = $self->childNodes;
    @nodes;
}

sub replace_with {
    my $self = shift;
    
    # TODO handle @_ == 0
    
    my $node   = $self->{node}; 
    my $doc    = $node->ownerDocument;
    my $parent = $node->parentNode;
    die "can't replace_with(), node has no parent!" unless $parent;
            
    my @nodes  = map { ref $_ ? $_->{node} : $doc->createTextNode($_) } @_;
    
    if ($parent->isa('XML::LibXML::Document')) {
        # can't call insertBefore() in a document node,
        # so this is the best hack so far :[
        # works only if $node is the last child
        die "[not supported] calling replace_with() in a node that is child of a document node, and its not the last child."
            unless $node->isSameNode($parent->lastChild);
        
        foreach (@nodes) {
                       
            if ($_->isa('XML::LibXML::Dtd')) {
                $parent->createInternalSubset($_->getName, $_->publicId, $_->systemId);
                next;
            }
            $parent->adoptNode($_);
            $node->addSibling($_);
        }    
         
    }
    else {
        $parent->insertBefore($_, $node)
            for @nodes;        
    }
    
    $self->detach;   
    $self;  
}

sub push_content {
    my $self = shift;
 
    my $node = $self->{node};
    my $doc = $node->isa('XML::LibXML::Document') ? $node : $node->ownerDocument;
    my @nodes = map { ref $_ ? $_->{node} : $doc->createTextNode($_) } @_;
    
    # thats because appendChild() is not supported on a Document node (as of XML::LibXML 2.0017)
    if ($node->isa('XML::LibXML::Document')) { 
        
        foreach (@nodes) {      
            #$node->adoptNode($_);
            $node->hasChildNodes ? $node->lastChild->addSibling($_)
                                 : $node->setDocumentElement($_);
        }        
    }
    else {
        $node->appendChild($_) for @nodes;
    }
    
    $self;
}

sub unshift_content {
    my $self = shift;
    
    return $self->push_content(@_) 
        unless $self->{node}->hasChildNodes;

    my $node = $self->{node};
    my $doc = $node->isa('XML::LibXML::Document') ? $node : $node->ownerDocument;
    my @nodes = map { ref $_ ? $_->{node} : $doc->createTextNode($_) } @_;
    
    # thats because insertBefore() is not supported on a Document node (as of XML::LibXML 2.0017)
    if ($node->isa('XML::LibXML::Document')) {
        
        foreach (@nodes) {
            $node->hasChildNodes ? $node->lastChild->addSibling($_)
                                 : $node->setDocumentElement($_);
        }
        
        # rotate
        while (not $node->firstChild->isSameNode($nodes[0])) {
            my $first_node = $node->firstChild;
            $first_node->unbindNode;
            $node->lastChild->addSibling($first_node);
            
        }        
    }
    else {
        my $first_child = $node->firstChild;
        $node->insertBefore($_, $first_child) for @nodes;
    }
    
    $self;    
}

sub left {
    my $self = shift;

    $self->_eof_or_die unless $self->{node};
    my $prev = $self->{node}->previousNonBlankSibling;
    return $prev ? __PACKAGE__->new( $prev ) : undef;
}

sub right {
    my $self = shift;

    $self->_eof_or_die unless $self->{node};
    my $next = $self->{node}->nextNonBlankSibling;
    return $next ? __PACKAGE__->new( $next ) : undef;
}

sub look_down {
    my $self = shift;
    my @args = @_;

    $self->_eof_or_die unless $self->{node};

    my @filter;
    my $xpath = "//*"; # default
    while (@args) {
        if (ref $args[0] eq 'CODE') {
            my $code = shift @args;
            push @filter, $code;
        } elsif (@args >= 2 && $args[0] eq '_tag') {
            my($tag, $want_tag) = splice(@args, 0, 2);
            $xpath = "//$want_tag";
        } elsif (@args >= 2) {
            my($attr, $stuff) = splice(@args, 0, 2);
            if (ref $stuff eq 'Regexp') {
                push @filter, sub { no warnings 'uninitialized'; $_[0]->attr($attr) =~ $stuff };
            } else {
                push @filter, sub { no warnings 'uninitialized'; $_[0]->attr($attr) eq $stuff };
            }
        } else {
            Carp::carp("Don't know what to do with @args");
            shift @args;
        }
    }

    $xpath =~ s/~text\b/text()/g;

    my @nodes = $self->findnodes($xpath);
    my @wants = grep {
        my $node = $_;
        my $ok = 1;
        for my $filter (@filter) {
            $filter->($_) or $ok = 0;
        }
        $ok ? $node : ();
    } @nodes;

    wantarray ? @wants : $wants[0];
}

sub all_attr {
    my $self = shift;
    return map { $_->name => $_->value } $self->{node}->attributes;
}

sub all_attr_names {
    my $self = shift;
    return map $_->name, $self->{node}->attributes;
}

sub all_external_attr       { shift->all_attr(@_) }
sub all_external_attr_names { shift->all_attr_names(@_) }

sub _eof_or_die {
    my $self = shift;
    if (defined($self->{_content})) {
        $self->eof;
    } else {
        Carp::croak "\$self is not loaded: $self"
    }
}


sub matches {
    my ($self, $xpath) = @_;    
    
    foreach ($self->{node}->ownerDocument->findnodes($xpath)) {
        return 1 if $_->isEqual($self->{node});
    }    
    
    return;
}

sub parent {
    my $self = shift;
    
    if (@_) {    
        
        # unset
        unless (defined $_[0]) {
            $self->{node}->unbindNode;   
            return;            
        }
        
        # set        
        Carp::croak "an element can't be made its own parent"
            if ref $_[0]->{node}->isSameNode($self->{node});    # sanity
            
        my $parent = $_[0]->{node};
        
        if ($_[0]->{node}->isa('XML::LibXML::Document')) {

            if ($parent->hasChildNodes) {
                $parent->lastChild->addSibling($self->{node});
            }
            else {
                $parent->adoptNode($self->{node});
                $parent->setDocumentElement($self->{node});                    
            }                
            
        }
        else {
            $parent->appendChild($self->{node});                
        }
                
    }
    else {
        # get
        my $parent = $self->{node}->parentNode;
        return defined $parent && !$parent->isa('XML::LibXML::DocumentFragment')? ref($self)->new($parent) : undef;
    }

}

sub postinsert {
    my $self   = shift;    
    my @nodes  = map { $_->{node} } @_;
    my $parent = $self->{node}->parentNode;
    
    $parent->insertAfter($_, $self->{node})
        foreach reverse @nodes;
    
    $self;    
}

sub preinsert {
    my $self   = shift;    
    my @nodes  = map { $_->{node} } @_;
    my $parent = $self->{node}->parentNode;
    
    $parent->insertBefore($_, $self->{node})
        foreach @nodes;
    
    $self;    
}



1;

__END__

=head1 NAME

HTML::TreeBuilder::LibXML::Node - HTML::Element compatible API for HTML::TreeBuilder::LibXML

=head1 SYNOPSIS

  my $value = $node->attr('name');
  my $string = $node->string_value;
  my $text   = $node->as_text;
  my $t_text = $node->as_trimmed_text;
  my $xml    = $node->as_XML;
  my $html   = $node->as_HTML;
  my $tag    = $node->tag;
  my $id     = $node->id;
  my $clone  = $node->clone;
  $node->delete;
  my $prev_sib = $node->left;
  my $next_sib = $node->right;
  $node->look_down(@args);
  my %attr     = $node->all_attr;
  my %attr     = $node->all_external_attr;
  my @names    = $node->all_attr_names;
  my @names    = $node->all_external_attr_names;
  my @elements = $node->find($elem_name);
  my @child_nodes = $node->content_list;
  my $parent = $node->parent;
  my $old_parent = $node->detach;
  $node->postinsert($element_or_text, ...);
  $node->preinsert($element_or_text, ...);
  $node->push_content($element_or_text, ...);
  $node->unshift_content($element_or_text, ...);
  $node->replace_with($element_or_text, ...);
  
  my @clones = HTML::TreeBuilder::LibXML::Node->clone_list(@nodes);

  # HTML::TreeBuilder::XPath
  my @nodes  = $node->find($xpath)
  my @nodes  = $node->findnodes($xpath);
  my $value  = $node->findvalue($xpath);
  my @values = $node->findvalues($xpath);
  $node->isTextNode;
  my $child = $node->getFirstChild;
  my $bool  = $node->exists($xpath);
  my $bool  = $node->matches($xpath);

  my $value  = $node->findnodes_as_string($xpath);
  my @values = $node->findnodes_as_strings($xpath);