#!/usr/bin/perl -w use Socket; use strict; my $word = $ARGV[0] or die "Usage: mw word\n"; my $host = "proxy.kla-tencor.com"; my $port = 80; my $socketaddr; my $content = "jump=$word"; # This seems to work with white space in word my $wholePage = ""; my $form = ""; my $buf = ""; my @listvalue = (); my @option = (); my $selections = 0; my $count = 0; while ($content) { openSock(); post ($content); $wholePage = ""; while ( ) { $wholePage .= $_; } close SOCK; $wholePage =~ /(
)/gs; $form = $1 or die "Can not find the word\n"; # this is heavy duty kludge, geared toward www.m-w.com, needs maintenance # find out if the form has a selection of options $selections = 0; if ($form =~ s/^To view.*?GO TO.$//m) { $selections = 1; @option = ($form =~ /^(.*)$/mg); @listvalue = ($form =~ /name=list value="(.*)">/g); } # convert html into something more readable $form =~ s/
/\n/g; # change html linebreak to newline $form =~ s/]*>//g; # delete all the other html tags $form =~ s/>/>/g; # make visible the greater-than sign $form =~ s/</; # don't know how to use "read" chomp $buf; if ($buf eq '.') { $content = ""; } elsif ($buf !~ /\d/ or $buf >= @option) { print "What did you just do?\n"; $content = ""; } else{ $content = "hdwd=$word&book=Dictionary&jump="; $content .= urlencode ($option[$buf]); $content .= "&list="; $content .= urlencode ($listvalue[0]); } } # end of if selections } # end of while content ########### # subroutine: open a socket at SOCK ########### sub openSock { $socketaddr= sockaddr_in $port, inet_aton $host or die "Bad hostname\n"; socket SOCK, PF_INET, SOCK_STREAM, getprotobyname('tcp') or die "Bad socket\n"; connect SOCK, $socketaddr or die "Bad connection\n"; select((select(SOCK), $| = 1)[0]); } ########### # subroutine: urlencode a string ########### sub urlencode { my $ask = shift @_; my @a2 = unpack "C*", $ask; my $s2 = ""; while (@a2) { $s2 .= sprintf "%%%X", shift @a2; } return $s2; } ########### # subroutine: send post request to target web site ########### sub post { my $content = shift @_; print SOCK "POST http://www.m-w.com/cgi-bin/dictionary HTTP/1.0\n"; print SOCK "Content-type: application/x-www-form-urlencoded\n"; my $contentLength = length $content; print SOCK "Content-length: $contentLength\n"; print SOCK "\n"; print SOCK "$content"; } =head1 NAME Save this file to "mw", which stands for merriam-webster, then you can run it as "mw word" or "perl mw word" =head1 DESCRIPTION a simple web robot to look up a word from Merriam-Webster site using POST method, and print the text response to STDOUT. =head1 README A special-purpose simple script that looks up a word from Merriam-Webster site. This script only uses Socket and no other external modules or packages, and it demonstrates the use of POST method to submit a FORM. However, the specific use of this script is limited to talking to www.m-w.com, and the fact that many parameters are hard-coded makes it dependent on the stability of that web site. Nonetheless, since everything is explicitly written, it is very easy to manually change those hard-coded strings =head1 PREREQUISITES requires strict module and Socket module =head1 SCRIPT CATEGORIES Web =cut