t/01.parse.t - metacpan.org

use Test::More tests => 12;
use Data::Dumper;

BEGIN {
  use_ok( 'XML::Descent' );
}

my $xml = <<EOX;
<config>
    <favourites>
        <folder name="Me">
            <url name="Hexten">http://hexten.net/</url>
        </folder>
        <folder name="Programming">
            <url name="Source code search">http://www.koders.com/</url>
            <folder name="Perl">
                <url name="CPAN Search">http://search.cpan.org/</url>
                <url name="Perl Documentation">http://perldoc.perl.org/</url>
            </folder>
            <folder name="Ruby">
                <url name="Ruby Home">http://www.ruby-lang.org/</url>
            </folder>
        </folder>
    </favourites>
    <?horse?>
    <meta>
        <title>Frog fleening</title>
        <body>The body text is just <a href="http://www.w3.org/MarkUp/">HTML</a>.</body>
        <url>http://cpan.hexten.net/</url>
        <ignored>This text is ignored</ignored>
        <handled>This has a handler which doesn't recursively parse the contents</handled>
        <tokenised>This is <i>tokenised</i>.</tokenised>
    </meta>
</config>
EOX

# Trailing newline causes problems
chomp $xml;

#### Test xml() returns original source

my $p1 = XML::Descent->new( { Input => \$xml } );
my $o1 = $p1->xml();

is( $o1, $xml, 'unparsed XML' );

#### Test text() returns all text

( my $text = $xml ) =~ s/<[^>]+>//g;
my $p2 = XML::Descent->new( { Input => \$xml } );
my $o2 = $p2->text();

is( $o2, $text, 'extracted text' );

#### Global extract tag contents

my @furls = $xml =~ />(http:.+?)</g;
my $p3 = XML::Descent->new( { Input => \$xml } );
my @gurls = ();
$p3->on(
  url => sub {
    push @gurls, $p3->text();
  }
);
$p3->walk();

is_deeply( \@gurls, \@furls, 'extract urls' );

#### Get all elements

my @felem = $xml =~ /<(\w+)/g;
my $p4 = XML::Descent->new( { Input => \$xml } );
my @gelem = ();
$p4->on(
  '*' => sub {
    my ( $elem, $attr ) = @_;
    push @gelem, $elem;
    $p4->walk();
  }
);
$p4->walk();

is_deeply( \@gelem, \@felem, 'all elements' );

#### Global extract attribute

my @fnames = $xml =~ /name=\"(.*?)\"/g;
my $p5 = XML::Descent->new( { Input => \$xml } );
my @gnames = ();
$p5->on(
  '*' => sub {
    my ( $elem, $attr ) = @_;
    push @gnames, $attr->{name} if exists $attr->{name};
    $p5->walk();
  }
);
$p5->walk();

is_deeply( \@gnames, \@fnames, 'extracted attributes' );

#### Extract inner XML
my @fmeta = $xml =~ m{<meta>(.*?)</meta>}sm;
my $gmeta = undef;
my $p6    = XML::Descent->new( { Input => \$xml } );
$p6->on(
  meta => sub {
    $gmeta = $p6->xml();
  }
);
$p6->walk();

is( $gmeta, $fmeta[0], 'extract inner XML' );

#### Extract inner text

( my $ftext = $fmeta[0] ) =~ s/<.+?>//g;
my $gtext = undef;
my $p7 = XML::Descent->new( { Input => \$xml } );
$p7->on(
  meta => sub {
    $gtext = $p7->text();
  }
);
$p7->walk();

is( $gtext, $ftext, 'extract inner text' );

#### Test get_tok

my $ftag = bless( [ 'E', 'url', '</url>' ], 'XML::TokeParser::Token' );
my $p8 = XML::Descent->new( { Input => \$xml } );
my $gtag = undef;
$p8->on(
  favourites => sub {
    TOK: while ( my $tok = $p8->get_token() ) {
      if ( $tok->[0] eq 'E' ) {
        $gtag = $tok;
        last TOK;
      }
    }
  }
);
my $gmeta2 = 0;
$p8->on(
  meta => sub {
    $gmeta2++;
  }
);
$p8->walk();

is_deeply( $gtag, $ftag, 'get_token' );
is( $gmeta2, 1, 'found meta' );

#### Test paths

my @path  = ();
my @fpath = ();
while ( $xml =~ m{<(/?[a-z]+)}g ) {
  my $tag = $1;
  if ( $tag =~ m{^/} ) {
    pop @path;
  }
  else {
    push @path, $tag;
    push @fpath, '/' . join( '/', @path );
  }
}
my @gpath = ();
my $p9 = XML::Descent->new( { Input => \$xml } );
$p9->on(
  '*' => sub {
    push @gpath, $p9->get_path();
    $p9->walk();
  }
);
$p9->walk();

is_deeply( \@gpath, \@fpath, 'get_path()' );

#### Test context

my $p10 = XML::Descent->new( { Input => \$xml } );

my $root = {};
$p10->context( $root );

$p10->on(
  '*' => sub {
    my ( $elem, $attr, $ctx ) = @_;

    my $obj = { %{$attr} };    # Keep attributes
    $p10->context( $obj );
    $p10->walk();

    # Save results in caller's context
    push @{ $ctx->{$elem} }, $obj;
  }
);

# Leaf elements -> save text
$p10->on(
  [ 'url', 'title', 'ignored', 'handled' ],
  sub {
    my ( $elem, $attr, $ctx ) = @_;
    push @{ $ctx->{$elem} }, { %{$attr}, inner_text => $p10->text() };
  }
);

$p10->on(
  [ 'body', 'tokenised' ] => sub {
    my ( $elem, $attr, $ctx ) = @_;
    push @{ $ctx->{$elem} }, { %{$attr}, inner_text => $p10->xml() };
  }
);

$p10->walk();

$fstruc = {
  'config' => [
    {
      'favourites' => [
        {
          'folder' => [
            {
              'url' => [
                {
                  'name'       => 'Hexten',
                  'inner_text' => 'http://hexten.net/'
                }
              ],
              'name' => 'Me'
            },
            {
              'url' => [
                {
                  'name'       => 'Source code search',
                  'inner_text' => 'http://www.koders.com/'
                }
              ],
              'name'   => 'Programming',
              'folder' => [
                {
                  'url' => [
                    {
                      'name'       => 'CPAN Search',
                      'inner_text' => 'http://search.cpan.org/'
                    },
                    {
                      'name'       => 'Perl Documentation',
                      'inner_text' => 'http://perldoc.perl.org/'
                    }
                  ],
                  'name' => 'Perl'
                },
                {
                  'url' => [
                    {
                      'name'       => 'Ruby Home',
                      'inner_text' => 'http://www.ruby-lang.org/'
                    }
                  ],
                  'name' => 'Ruby'
                }
              ]
            }
          ]
        }
      ],
      'meta' => [
        {
          'body' => [
            {
              'inner_text' =>
               'The body text is just <a href="http://www.w3.org/MarkUp/">HTML</a>.'
            }
          ],
          'handled' => [
            {
              'inner_text' =>
               'This has a handler which doesn\'t recursively parse the contents'
            }
          ],
          'url'   => [ { 'inner_text' => 'http://cpan.hexten.net/' } ],
          'title' => [ { 'inner_text' => 'Frog fleening' } ],
          'tokenised' =>
           [ { 'inner_text' => 'This is <i>tokenised</i>.' } ],
          'ignored' => [ { 'inner_text' => 'This text is ignored' } ]
        }
      ]
    }
  ]
};

is_deeply( $root, $fstruc, 'context' );
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)