lib/WWW/Scraper/computerjobs.pm


package WWW::Scraper::computerjobs;

use strict;
use vars qw($VERSION @ISA);
@ISA = qw(WWW::Scraper);
$VERSION = sprintf("%d.%02d", q$Revision: 1.03 $ =~ /(\d+)\.(\d+)/);
use WWW::Scraper(qw(1.48 trimLFs testParameters));

my $scraperRequest = 
   { 
      'type' => 'QUERY'       # Type of query generation is 'QUERY'
      # This is the basic URL on which to build the query.
     ,'url' => 'http://www.search.computerjobs.com/job_results.asp?'
      # This is the Scraper attributes => native input fields mapping
     ,'nativeQuery' => 's_kw'
     ,'nativeDefaults' => {}
     ,'defaultRequestClass' => 'Job'
     ,'fieldTranslations' =>
             {
                 '*' =>
                     {    '*'             => '*'
                     }
             }
      # Some more options for the Scraper operation.
     ,'cookies' => 1
   };

my $scraperFrame =
[ 'HTML', 
  [ 
      [ 'COUNT', '([,0-9]+)\s+Search results ' ]
     ,[ 'NEXT', 1, '/ci/page_next_page.gif' ]
     # I think there might be something in this 'BODY' segment, but I haven't seen any, yet.
     ,[ 'BODY', '<!--- featured jobs --->', '<!-- end featured jobs -->' ]
     ,[ 'BODY', 'Page \d+ of', undef,
         [
            [ 'HIT*' ,
              [
                [ 'TABLE', '#0',
                  [
                    [ 'TR', 
                      [
                        [ 'TD' ] 
                       ,[ 'TD', 'title', \&trimLFs ]
                       ,[ 'TD' ] 
                      ]
                    ]
                  ]
                ]
               ,[ 'TABLE', '#0', 
                  [ 
                    [ 'TR', 
                      [ 
                         [ 'TD', 'description', \&parseDescriptionAndAllThat ]
                      ] 
                    ]
                  ]
                ]
              ]
            ]
           ,[ 'BOGUS', -2 ]
         ]
      ] 
   ]
];

sub testParameters {
    my ($self) = @_;

    if ( ref $self ) {
        $self->{'isTesting'} = 1;
    }
    
    return { 
             'SKIP' => 'computerjobs - something wrong here, I don\'t know what it is.'
            ,'testNativeQuery' => 'Perl'
            ,'expectedOnePage' => 9
            ,'expectedMultiPage' => 11
            ,'expectedBogusPage' => 0
           };
}

# Access methods for the structural declarations of this Scraper engine.
sub scraperRequest { $scraperRequest }
sub scraperFrame { $_[0]->SUPER::scraperFrame($scraperFrame); }
sub scraperDetail{ undef }


# Here we might someday do some more elaborate parsing, since the
# 'description' text contains the company, location and salary (sometimes).
sub parseDescriptionAndAllThat {
    my ($self, $hit, $dat) = @_;
    return $self->trimLFLFs($hit, $dat);
}

=pod

=head1 NAME

WWW::Scraper::computerjobs - Scrapes www.computerjobs.com


=head1 SYNOPSIS

    require WWW::Scraper;
    $search = new WWW::Scraper('computerjobs');

=head1 DESCRIPTION

This class is an computerjobs specialization of WWW::Search.
It handles making and interpreting computerjobs searches
F<http://www.computerjobs.com>.


=head1 OPTIONS

=over 8

=item siteid => a regional code

    '139' -> All regions
    '100' -> Atlanta
    '109' -> Boston
    '102' -> Carolina
    '103' -> Chicago
    '105' -> D.C. Metro
    '114' -> Denver
    '111' -> Detroit
    '104' -> Florida
    '118' -> Los Angeles
    '106' -> New York
    '108' -> Ohio
    '110' -> Philadelphia
    '107' -> Phoenix
    '116' -> Portland
    '115' -> Seattle
    '117' -> Silicon Valley
    '113' -> St. Louis
    '101' -> Texas
    '112' -> Twin Cities


=item s_jcid => a skills code

    ''    -> All categories
    '101' -> AS/400
    '116' -> Data Warehousing
    '115' -> Database Systems
    '106' -> E-Commerce / Internet
    '103' -> ERP
    '117' -> Executive Level
    '108' -> Hardware
    '112' -> Help Desk
    '100' -> Legacy Systems
    '118' -> Miscellaneous
    '107' -> Networking
    '105' -> New Media
    '109' -> Project Management
    '110' -> Quality Assurance
    '114' -> Technical Recruiting
    '113' -> Technical Sales
    '111' -> Technical Writing
    '102' -> Unix
    '104' -> Windows Development

=back                


=head1 AUTHOR

C<WWW::Scraper::computerjobs> is written and maintained
by Glenn Wood, http://search.cpan.org/search?mode=author&query=GLENNWOOD.

The best place to obtain C<WWW::Scraper::computerjobs>
is from Glenn's releases on CPAN. Because www.computerjobs.com
sometimes changes its format in between his releases, 
sometimes more up-to-date versions can be found at
F<http://alumni.caltech.edu/~glenwood/SOFTWARE/index.html>.

=head1 COPYRIGHT

Copyright (c) 2001 Glenn Wood
All rights reserved.

This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=cut

1;
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)