The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
Changes 29
MANIFEST 13
MANIFEST.SKIP 065
META.json 056
META.yml 2525
MYMETA.yml 300
Makefile.PL 1824
lib/HTML/SimpleLinkExtor.pm 7069
t/parse.t 09
t/rt87288.t 016
t/test_manifest 01
11 files changed (This is a version diff) 146277
@@ -1,7 +1,14 @@
 Revision history for Perl extension HTML::SimpleLinkExtor.
 
-1.25 - Fri Aug  5 07:56:17 2011
-	* I need Test::More 0.96 for subtests and done_testing.
+1.27 - Tue Jan  7 08:01:22 2014
+	* RT #87288 - add the test for this bug to test_manifest
+
+1.26 - Fri Jan  3 14:00:31 2014
+	* Get rid of MYMETA
+
+1.25_01 - Fri Aug  9 14:34:27 2013
+	* Filter links by allowed tags. I should have been doing this
+	all along.  RT #87288
 
 1.24 - Thu Aug  4 06:37:30 2011
 	* Fix clear_links so it actually clears the links
@@ -4,8 +4,8 @@ lib/HTML/SimpleLinkExtor.pm
 LICENSE
 Makefile.PL
 MANIFEST
+MANIFEST.SKIP
 META.yml			Module meta-data (added by MakeMaker)
-MYMETA.yml
 README
 scripts/linktractor
 t/absolute_links.t
@@ -17,6 +17,8 @@ t/parse.t
 t/pod.t
 t/pod_coverage.t
 t/relative_links.t
+t/rt87288.t
 t/schemes.t
 t/tags.t
 t/test_manifest
+META.json                                Module JSON meta-data (added by MakeMaker)
@@ -0,0 +1,65 @@
+
+#!start included /usr/local/perls/perl-5.18.1/lib/5.18.1/ExtUtils/MANIFEST.SKIP
+# Avoid version control files.
+\bRCS\b
+\bCVS\b
+\bSCCS\b
+,v$
+\B\.svn\b
+\B\.git\b
+\B\.gitignore\b
+\b_darcs\b
+\B\.cvsignore$
+
+# Avoid VMS specific MakeMaker generated files
+\bDescrip.MMS$
+\bDESCRIP.MMS$
+\bdescrip.mms$
+
+# Avoid Makemaker generated and utility files.
+\bMANIFEST\.bak
+\bMakefile$
+\bblib/
+\bMakeMaker-\d
+\bpm_to_blib\.ts$
+\bpm_to_blib$
+\bblibdirs\.ts$         # 6.18 through 6.25 generated this
+
+# Avoid Module::Build generated and utility files.
+\bBuild$
+\b_build/
+\bBuild.bat$
+\bBuild.COM$
+\bBUILD.COM$
+\bbuild.com$
+
+# Avoid temp and backup files.
+~$
+\.old$
+\#$
+\b\.#
+\.bak$
+\.tmp$
+\.#
+\.rej$
+
+# Avoid OS-specific files/dirs
+# Mac OSX metadata
+\B\.DS_Store
+# Mac OSX SMB mount metadata files
+\B\._
+
+# Avoid Devel::Cover and Devel::CoverX::Covered files.
+\bcover_db\b
+\bcovered\b
+
+# Avoid MYMETA files
+^MYMETA\.
+#!end included /usr/local/perls/perl-5.18.1/lib/5.18.1/ExtUtils/MANIFEST.SKIP
+
+
+\.travis\.yml
+\.releaserc
+\.lwpcookies
+HTML-.*
+
@@ -0,0 +1,56 @@
+{
+   "abstract" : "Extract links from HTML",
+   "author" : [
+      "brian d foy <bdfoy@cpan.org>"
+   ],
+   "dynamic_config" : 1,
+   "generated_by" : "ExtUtils::MakeMaker version 6.86, CPAN::Meta::Converter version 2.120921",
+   "license" : [
+      "perl_5"
+   ],
+   "meta-spec" : {
+      "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
+      "version" : "2"
+   },
+   "name" : "HTML-SimpleLinkExtor",
+   "no_index" : {
+      "directory" : [
+         "t",
+         "inc"
+      ]
+   },
+   "prereqs" : {
+      "build" : {
+         "requires" : {
+            "ExtUtils::MakeMaker" : "0"
+         }
+      },
+      "configure" : {
+         "requires" : {
+            "ExtUtils::MakeMaker" : "6.64"
+         }
+      },
+      "runtime" : {
+         "requires" : {
+            "HTML::LinkExtor" : "1.28",
+            "LWP::UserAgent" : "0",
+            "URI" : "1.09",
+            "URI::file" : "0"
+         }
+      },
+      "test" : {
+         "requires" : {
+            "Test::More" : "0.96",
+            "Test::Output" : "0"
+         }
+      }
+   },
+   "release_status" : "stable",
+   "resources" : {
+      "repository" : {
+         "type" : "git",
+         "web" : "https://github.com/briandfoy/html-simplelinkextor"
+      }
+   },
+   "version" : "1.27"
+}
@@ -1,29 +1,29 @@
---- #YAML:1.0
-name:               HTML-SimpleLinkExtor
-version:            1.25
-abstract:           Extract links from HTML
+---
+abstract: 'Extract links from HTML'
 author:
-    - brian d foy <bdfoy@cpan.org>
-license:            perl
-distribution_type:  module
-configure_requires:
-    ExtUtils::MakeMaker:  0
+  - 'brian d foy <bdfoy@cpan.org>'
 build_requires:
-    ExtUtils::MakeMaker:  0
+  ExtUtils::MakeMaker: 0
+  Test::More: 0.96
+  Test::Output: 0
+configure_requires:
+  ExtUtils::MakeMaker: 6.64
+dynamic_config: 1
+generated_by: 'ExtUtils::MakeMaker version 6.86, CPAN::Meta::Converter version 2.120921'
+license: perl
+meta-spec:
+  url: http://module-build.sourceforge.net/META-spec-v1.4.html
+  version: 1.4
+name: HTML-SimpleLinkExtor
+no_index:
+  directory:
+    - t
+    - inc
 requires:
-    HTML::LinkExtor:  1.28
-    LWP::UserAgent:   0
-    Test::More:       0.96
-    Test::Output:     0
-    URI:              1.09
-    URI::file:        0
+  HTML::LinkExtor: 1.28
+  LWP::UserAgent: 0
+  URI: 1.09
+  URI::file: 0
 resources:
-    repository:  https://github.com/briandfoy/html-simplelinkextor
-no_index:
-    directory:
-        - t
-        - inc
-generated_by:       ExtUtils::MakeMaker version 6.57_05
-meta-spec:
-    url:      http://module-build.sourceforge.net/META-spec-v1.4.html
-    version:  1.4
+  repository: https://github.com/briandfoy/html-simplelinkextor
+version: 1.27
@@ -1,30 +0,0 @@
----
-abstract: 'Extract links from HTML'
-author:
-  - 'brian d foy <bdfoy@cpan.org>'
-build_requires:
-  ExtUtils::MakeMaker: 0
-configure_requires:
-  ExtUtils::MakeMaker: 0
-distribution_type: module
-dynamic_config: 0
-generated_by: 'ExtUtils::MakeMaker version 6.57_05'
-license: perl
-meta-spec:
-  url: http://module-build.sourceforge.net/META-spec-v1.4.html
-  version: 1.4
-name: HTML-SimpleLinkExtor
-no_index:
-  directory:
-    - t
-    - inc
-requires:
-  HTML::LinkExtor: 1.28
-  LWP::UserAgent: 0
-  Test::More: 0.96
-  Test::Output: 0
-  URI: 1.09
-  URI::file: 0
-resources:
-  repository: https://github.com/briandfoy/html-simplelinkextor
-version: 1.25
@@ -1,38 +1,44 @@
-use ExtUtils::MakeMaker;
+use ExtUtils::MakeMaker 6.64;
 
 require 5.008;
 
 eval "use Test::Manifest 1.21";   
    
-WriteMakefile
-	(
-    'NAME'         => 'HTML::SimpleLinkExtor',
+WriteMakefile(
+	'NAME'         => 'HTML::SimpleLinkExtor',
 	'ABSTRACT'     => 'Extract links from HTML',
-    'VERSION_FROM' => 'lib/HTML/SimpleLinkExtor.pm',
+	'VERSION_FROM' => 'lib/HTML/SimpleLinkExtor.pm',
 	'LICENSE'      => 'perl',
 	'AUTHOR'       => 'brian d foy <bdfoy@cpan.org>',
-	
-   'PREREQ_PM'     => { 
-    	'HTML::LinkExtor' => '1.28',
-		'LWP::UserAgent'  => '0',
+
+	'CONFIGURE_REQUIRES' => {
+		'ExtUtils::MakeMaker' => '6.64',
+		},
+
+	'TEST_REQUIRES' => {
+		'Test::More'      => '0.96',  # for subtest, done_testing
 		'Test::Output'    => '0',
+		},
+
+	'PREREQ_PM'     => { 
+		'HTML::LinkExtor' => '1.28',
+		'LWP::UserAgent'  => '0',
 		'URI'             => '1.09',
 		'URI::file'       => '0',
-		'Test::More'      => '0.96',  # for subtest, done_testing
 		},
 
 	'EXE_FILES'    => [ 'scripts/linktractor' ],
-	
-	'dist'         => {
-		'COMPRESS'        => 'gzip -9f',
-		'SUFFIX'          => 'gz',
-		},
 
-  	'META_MERGE'   => {
+	'META_MERGE' => {
+		'meta-spec' => { version => 2 },
 		resources => {
-			repository  => 'https://github.com/briandfoy/html-simplelinkextor'
+			repository => {
+				type => 'git',
+				url  => 'git@github.com:briandfoy/html-simplelinkextor.git',
+				web  => 'https://github.com/briandfoy/html-simplelinkextor',
+				},
 			},
 		},
 
-    clean  => { FILES    => q|HTML-SimpleLinkExtor-*| },
+	clean  => { FILES    => q|HTML-SimpleLinkExtor-*| },
 	);
@@ -13,7 +13,7 @@ use HTML::LinkExtor;
 use LWP::UserAgent;
 use URI;
 
-$VERSION = '1.25';
+$VERSION = '1.27';
 
 @ISA = qw(HTML::LinkExtor);
 
@@ -36,15 +36,13 @@ $VERSION = '1.25';
 
 sub DESTROY { 1 };
 
-sub AUTOLOAD
-	{
+sub AUTOLOAD {
 	my $self = shift;
 	my $method = $AUTOLOAD;
 
 	$method =~ s/.*:://;
 
-	unless( exists $AUTO_METHODS{$method} )
-		{
+	unless( exists $AUTO_METHODS{$method} ) {
 		carp __PACKAGE__ . ": method $method unknown";
 		return;
 		}
@@ -52,20 +50,17 @@ sub AUTOLOAD
 	$self->_extract( $method );
 	}
 
-sub can
-	{
+sub can {
 	my( $self, @methods ) = @_;
 
-	foreach my $method ( @methods )
-		{
+	foreach my $method ( @methods ) {
 		return 0 unless $self->_can( $method );
 		}
 
 	return 1;
 	}
 
-sub _can
-	{
+sub _can {
 	no strict 'refs';
 
 	return 1 if exists $AUTO_METHODS{ $_[1] };
@@ -74,23 +69,20 @@ sub _can
 	return 0;
 	}
 
-sub _init_links
-	{
+sub _init_links {
 	my $self  = shift;
 	my $links = shift;
-
 	do {
 		delete $self->{'_SimpleLinkExtor_links'};
 		return
-		} unless UNIVERSAL::isa( $links, 'ARRAY' );
+		} unless ref $links eq ref [];
 
 	$self->{'_SimpleLinkExtor_links'} = $links;
 
 	$self;
 	}
 
-sub _link_refs
-	{
+sub _link_refs {
 	my $self = shift;
 
 	my @link_refs;
@@ -100,23 +92,21 @@ sub _link_refs
 	# through this branch. In _init_links I have to use a delete
 	# which I really don't like. I don't have time to rewrite this
 	# right now though --brian, 20050816
-	if( ref $self->{'_SimpleLinkExtor_links'} )
-		{
+	if( ref $self->{'_SimpleLinkExtor_links'} ) {
 		@link_refs = @{$self->{'_SimpleLinkExtor_links'}};
 		}
-	else
-		{
-		@link_refs = $self->SUPER::links();
+	else {
+		@link_refs = map { 
+			HTML::SimpleLinkExtor::LinkRef->new( $_ )
+			} $self->SUPER::links();
 		$self->_init_links( \@link_refs );
 		}
 
 	# defined() so that an empty string means "do not resolve"
-	unless( defined $self->{'_SimpleLinkExtor_base'} )
-		{
+	unless( defined $self->{'_SimpleLinkExtor_base'} ) {
 		my $count = -1;
 		my $found =  0;
-		foreach my $link ( @link_refs )
-			{
+		foreach my $link ( @link_refs ) {
 			$count++;
 			next unless $link->[0] eq 'base' and $link->[1] eq 'href';
 			$found = 1;
@@ -133,32 +123,28 @@ sub _link_refs
 	return @link_refs;
 	}
 
-sub _extract
-	{
+sub _extract {
 	my $self      = shift;
-	my $method    = shift;
+	my $type      = shift;
 
-	my $position  = $AUTO_METHODS{$method} eq 'tag' ? 0 : 1;
+	my $method  = $AUTO_METHODS{$type} eq 'tag' ? 'tag' : 'attribute';
 
-	my @links = map  { $$_[2] }
-	            grep { $_->[$position] eq $method }
+	my @links = map  { $_->linkref }
+	            grep { $_->$method() eq $type }
 	            $self->_link_refs;
 
 	return @links;
 	}
 
-sub _add_base
-	{
+sub _add_base {
 	my $self      = shift;
 	my $array_ref = shift;
 
 	my $base      = $self->{'_SimpleLinkExtor_base'};
 	return unless $base;
 
-	foreach my $tuple ( @$array_ref )
-		{
-		foreach my $index ( 1 .. $#$tuple )
-			{
+	foreach my $tuple ( @$array_ref ) {
+		foreach my $index ( 1 .. $#$tuple ) {
 			next unless exists $AUTO_METHODS{ $tuple->[$index] };
 
 			my $url = URI->new( $tuple->[$index + 1] );
@@ -213,7 +199,7 @@ not want to deal with the intricacies of C<HTML::Parser> or the
 de-referencing needed to get links out of C<HTML::LinkExtor>.
 
 You can extract all the links or some of the links (based on the HTML
-tag name or attribute name). If a E<lt>BASE HREFE<gt> tag is found,
+tag name or attribute name). If a C<< <BASE HREF> >> tag is found,
 all of the relative URLs will be resolved according to that reference.
 
 This module is simply a subclass around C<HTML::LinkExtor>, so it can
@@ -233,20 +219,19 @@ reset the link list between files, use the clear_links method.
 Create the link extractor object.
 
 =item $extor = HTML::SimpleLinkExtor->new('')
+
 =item $extor = HTML::SimpleLinkExtor->new($base)
 
 Create the link extractor object and resolve the relative URLs
 accoridng to the supplied base URL. The supplied base URL overrides
 any other base URL found in the HTML.
 
-
 Create the link extractor object and do not resolve relative
 links.
 
 =cut
 
-sub new
-	{
+sub new {
 	my $class = shift;
 	my $base  = shift;
 
@@ -278,8 +263,7 @@ the entire class, including previously created objects.
 
 =cut
 
-sub add_tags
-	{
+sub add_tags {
 	my $self = shift;
 	my $tag  = lc shift;
 
@@ -303,8 +287,7 @@ A smarter C<can> that can tell which attributes are also methods.
 
 =cut
 
-sub add_attributes
-	{
+sub add_attributes {
 	my $self = shift;
 	my $attr = lc shift;
 
@@ -319,8 +302,7 @@ created objects.
 
 =cut
 
-sub remove_tags
-	{
+sub remove_tags {
 	my $self = shift;
 	my $tag  = lc shift;
 
@@ -335,8 +317,7 @@ class, including previously created objects.
 
 =cut
 
-sub remove_attributes
-	{
+sub remove_attributes {
 	my $self = shift;
 	my $attr = lc shift;
 
@@ -350,8 +331,7 @@ attention to.
 
 =cut
 
-sub attribute_list
-	{
+sub attribute_list {
 	my $self = shift;
 
 	grep { $AUTO_METHODS{ $_ } eq 'attribute' } keys %AUTO_METHODS;
@@ -360,13 +340,13 @@ sub attribute_list
 =item HTML::SimpleLinkExtor->tag_list
 
 Returns a list of the tags C<HTML::SimpleLinkExtor> pays attention to.
+These tags have convenience methods.
 
 =back
 
 =cut
 
-sub tag_list
-	{
+sub tag_list {
 	my $self = shift;
 
 	grep { $AUTO_METHODS{ $_ } eq 'tag' } keys %AUTO_METHODS;
@@ -389,8 +369,7 @@ Fetch URL and parse its content for links.
 
 =cut
 
-sub parse_url
-	{
+sub parse_url {
 	my $data = $_[0]->ua->get( $_[1] )->content;
 
 	return unless $data;
@@ -417,9 +396,16 @@ Return a list of the links.
 
 =cut
 
-sub links
-	{
-	map { $$_[2] } $_[0]->_link_refs;
+sub links {
+	map  { $_->linkref } 
+	grep { $_[0]->_is_an_allowed_tag( $_->tag ) }
+	$_[0]->_link_refs 
+	}
+
+sub _is_an_allowed_tag { 
+	exists $AUTO_METHODS{$_[1]} 
+		and 
+	$AUTO_METHODS{$_[1]} eq 'tag' 
 	}
 
 =item $extor->img
@@ -492,8 +478,7 @@ the count of the matching links.
 
 =cut
 
-sub schemes
-	{
+sub schemes {
 	my( $self, @schemes ) = @_;
 
 	my %schemes = map { lc, lc } @schemes;
@@ -503,8 +488,8 @@ sub schemes
 			my $scheme = eval { lc URI->new( $_ )->scheme };
 			exists $schemes{ $scheme };
 			}
-		map { $_->[2] }
-			$self->_link_refs;
+		map { $_->linkref }
+		$self->_link_refs;
 
 	wantarray ? @links : scalar @links;
 	}
@@ -519,8 +504,7 @@ the count of the matching links.
 
 =cut
 
-sub absolute_links
-	{
+sub absolute_links {
 	my $self = shift;
 
 	my @links =
@@ -528,7 +512,7 @@ sub absolute_links
 			my $scheme = eval { lc URI->new( $_ )->scheme };
 			length $scheme;
 			}
-		map { $$_[2] }
+		map { $_->linkref }
 		$self->_link_refs;
 
 	wantarray ? @links : scalar @links;
@@ -545,8 +529,7 @@ the count of the matching links.
 
 =cut
 
-sub relative_links
-	{
+sub relative_links {
 	my $self = shift;
 
 	my @links =
@@ -554,7 +537,7 @@ sub relative_links
 			my $scheme = eval { URI->new( $_ )->scheme };
 			! defined $scheme;
 			}
-		map { $$_[2] }
+		map { $_->linkref }
 			$self->_link_refs;
 
 	wantarray ? @links : scalar @links;
@@ -585,13 +568,29 @@ brian d foy, C<< <bdfoy@cpan.org> >>
 
 =head1 COPYRIGHT AND LICENSE
 
-Copyright (c) 2004-2011 brian d foy.  All rights reserved.
+Copyright (c) 2004-2014 brian d foy.  All rights reserved.
 
 This program is free software; you can redistribute it and/or modify
 it under the same terms as Perl itself.
 
 =cut
 
+BEGIN {
+package 
+	HTML::SimpleLinkExtor::LinkRef;
+use Carp qw(croak);
+
+sub new {
+	my( $class, $arrayref ) = @_;
+	croak "Not an array reference argument!" unless ref $arrayref eq ref [];
+	bless $arrayref, $class;
+	}
+
+sub tag       { $_[0]->[0] }
+sub attribute { $_[0]->[1] }
+sub linkref   { $_[0]->[2] }
+}
+
 1;
 
 __END__
@@ -42,6 +42,15 @@ subtest 'extor' => sub {
 		$extor->parse_file( $file );
 
 		my @links = $extor->links;
+		ok( 
+			exists $extor->{'_SimpleLinkExtor_links'}, 
+			'_SimpleLinkExtor_links exists' 
+			);
+
+		is( scalar @{ $extor->{'_SimpleLinkExtor_links'} }, 
+			$total_links, "Data structure has the links" 
+			);
+
 		is( scalar @links, $total_links, "Found the right number of links" );
 		};
 
@@ -0,0 +1,16 @@
+use Test::More 0.95;
+
+my $class = 'HTML::SimpleLinkExtor';
+use_ok( $class ); 
+
+my $html = '<html><body><form name="test"
+action="/test"><input type="submit" /></form></body></html>'; 
+
+my $extor = $class->new(); 
+isa_ok( $extor, $class );
+
+$extor->parse( $html ); 
+
+print STDERR join "\n", $extor->links;
+
+done_testing();
@@ -7,3 +7,4 @@ schemes.t
 relative_links.t
 absolute_links.t
 autoload.t
+rt87288.t