#!/usr/bin/perl
use strict;
use warnings;
use Glib qw(TRUE FALSE);
use Gtk2 '-init';
use HTML::TreeBuilder;
my $NODE_POS = 0;
my $NODE_DATA = $NODE_POS++;
my $NODE_NAME = $NODE_POS++;
exit main() unless caller;
sub main {
local $| = 1;
my ($html) = @ARGV;
$html = \qq{
<html>
<body>
<p>Hello
<s>world</s>
</p>
<a hrf='http://www.gnome.org/'>link</a>
<b>bold</b>
<i>italic</i>
</body>
</html>
} unless $html;
my $document = parse_html($html);
my $model = my::HtmlTreeModel->new($document);
my $window = Gtk2::Window->new();
$window->set_size_request(200, 200);
my $view = create_tree_view();
$view->set_model($model);
$window->add(scrollify($view));
$window->signal_connect(destroy => sub {Gtk2->main_quit(); });
$window->show_all();
Gtk2->main();
return 0;
}
sub create_tree_view {
my $view = Gtk2::TreeView->new();
$view->set_fixed_height_mode(TRUE);
my $cell = Gtk2::CellRendererText->new();
my $column = Gtk2::TreeViewColumn->new();
$column->pack_end($cell, TRUE);
$column->set_title('Element');
$column->set_resizable(TRUE);
$column->set_sizing('fixed');
$column->set_fixed_width(150);
$column->set_attributes($cell, text => $NODE_NAME);
$view->append_column($column);
return $view;
}
sub scrollify {
my ($widget, $width, $height) = @_;
$width = -1 unless defined $width;
$height = -1 unless defined $height;
my $scroll = Gtk2::ScrolledWindow->new();
$scroll->set_policy('automatic', 'automatic');
$scroll->set_shadow_type('in');
$scroll->set_size_request($width, $height);
$scroll->add($widget);
return $scroll;
}
sub parse_html {
my ($html) = @_;
if (ref $html) {
return HTML::TreeBuilder->new_from_content($$html);
}
return HTML::TreeBuilder->new_from_file($html);
}
package my::HtmlTreeModel;
##
## Implementation of a TreeModel that wraps a HTML::TreeBuilder tree. This tree
## model shows only the element nodes and hides all content nodes (the text
## inside an element node).
##
## This TreeModel has 2 columns per row: the element's name and the actual node.
## At the moment only the name field is used.
##
use Glib qw(TRUE FALSE);
use Carp;
use Scalar::Util 'refaddr';
use Glib::Object::Subclass 'Glib::Object' =>
interfaces => [ 'Gtk2::TreeModel' ]
;
sub new {
my $class = shift;
my ($node) = @_ or croak "Usage: ${class}->new(node)";
my $self = $class->SUPER::new();
$self->{stamp} = sprintf '%d', rand (1<<31);
$self->{node} = $node;
$self->{types} = [ 'Glib::Scalar', 'Glib::String' ];
return $self;
}
sub GET_FLAGS { [ 'iters-persist' ] }
sub GET_N_COLUMNS { 2 }
sub GET_COLUMN_TYPE {
my ($self, $index) = @_;
return $self->{types}[$index];
}
sub GET_ITER {
my ($self, $path) = @_;
# We don't need the first level
my (undef, @pos) = split /:/, $path->to_string;
my $node = $self->{node};
foreach my $pos (@pos) {
# We keep only the element nodes, this tree doesn't show the content nodes
my @nodes = grep { is_element($_) } $node->content_list;
$node = $nodes[$pos];
}
return $self->new_iter($node);
}
sub GET_PATH {
my ($self, $iter) = @_;
my $path = Gtk2::TreePath->new();
my $node = $self->get_node($iter) or return undef;
my @indexes;
for (; $node; $node = $node->parent) {
my $index = 0;
# We must use a list context here otherwise we could get a content node and
# we will not be able to perform a call to <left>.
foreach my $left ($node->left) {
# Because we want only the elements to appear in the tree we have to
# exclude some nodes
next unless is_element($left);
++$index;
}
push @indexes, $index;
}
foreach my $index (reverse @indexes) {
$path->append_index($index);
}
return $path;
}
sub GET_VALUE {
my ($self, $iter, $column) = @_;
my $node = $self->get_node($iter) or return "broken iter?";
if ($column == 0) {
return $node;
}
elsif ($column == 1) {
return $node->tag;
}
return "Which column?";
}
sub ITER_NEXT {
my ($self, $iter) = @_;
my $node = $self->get_node($iter) or return undef;
# We have to get the list of nodes because calling node->right is scalar
# context can return a content node and then we lose the capability to go to
# the next node.
foreach my $next ($node->right) {
return $self->new_iter($next) if is_element($next);
}
return undef;
}
sub ITER_CHILDREN {
my ($self, $iter) = @_;
if ($iter) {
my $node = $self->get_node($iter) or return undef;
foreach my $child ($node->content_list) {
return $self->new_iter($child) if is_element($child);
}
return undef;
}
return $self->new_iter($self->{node});
}
sub ITER_HAS_CHILD {
my ($self, $iter) = @_;
my $node = $self->get_node($iter) or return FALSE;
foreach my $child ($node->content_list) {
return TRUE if is_element($child);
}
return FALSE;
}
sub ITER_N_CHILDREN {
my ($self, $iter) = @_;
my $node = $iter ? $self->get_node($iter) : $self->{node};
return undef unless $node;
my $count = 0;
foreach my $child ($node->content_list) {
# We only want element nodes
++$count if is_element($child);
}
return $count;
}
sub ITER_NTH_CHILD {
my ($self, $iter, $n) = @_;
# Special case: if iter == NULL, return number of top-level rows
my $node = $iter ? $self->get_node($iter) : $self->{node};
return undef unless $node;
# Get the nodes in list context because if we are given a content node we will
# no be able to todo $node->right.
my @nodes = $node->right;
for (my $i = 0; $i < $n;) {
$node = shift @nodes or return undef;
++$i if is_element($node);
}
return $self->new_iter($node);
}
sub ITER_PARENT {
my ($self, $iter) = @_;
my $node = $self->get_node($iter) or return undef;
return $self->new_iter($node->parent);
}
# Returns TRUE if the given node is and element. HTML::Tree has to types of
# nodes: Elements and cotent (text strings).
sub is_element {
my $ref = ref $_[0];
return ($ref eq 'HTML::Element' || $ref eq 'HTML::TreeBuilder');
}
# Builds the arrayref that most methods should return.
sub new_iter {
my ($self, $node) = @_;
return $node ? [ $self->{stamp}, 0, $node, undef ] : undef;
}
# Returns a node from a given iter. This method complements <new_iter>. If the
# iter has no node then undef is returned instead.
sub get_node {
my ($self, $iter) = @_;
return undef if $iter->[0] == 0
and $iter->[1] == 0
and ! defined $iter->[2]
and ! defined $iter->[3]
;
my $node = $iter->[2];
if (! $node) {
Carp::cluck "Iter has no node: ", iter_dumper($iter);
return undef;
}
return $node;
}
# Used for debugging purposes.
sub iter_dumper {
my ($iter) = @_;
return is_element($iter->[2]) ? $iter->[2] . " - " . $iter->[2]->tag : $iter->[2];
}