The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
######################################################################
#
# Copyright 1999 Web Juice, LLC. All Rights Reserved.
#
# String/Checker.pm
#
# An extensible string validation module (allowing commonly used
# checks on a string to be called more concisely and consistently).
#
# $Header: /usr/local/repository/advoco/String-Checker/Checker.pm,v 1.2 1999/08/26 04:38:41 dlowe Exp $
# $Log: Checker.pm,v $
# Revision 1.2  1999/08/26 04:38:41  dlowe
# Bugfix.
#
# Revision 1.1.1.1  1999/07/09 01:25:16  dlowe
# String::Checker
#
#
######################################################################
package String::Checker;

use strict;
use vars qw($VERSION %check_subs);
use Date::Manip;

$VERSION = '0.03';


##
## This is the default set of allowed expectations
##
%check_subs = ('allow_empty'    => \&check_allow_empty,
               'disallow_empty' => \&check_disallow_empty,
               'min'            => \&check_min,
               'max'            => \&check_max,
               'want_int'       => \&check_want_int,
               'want_float'     => \&check_want_float,
               'allow_chars'    => \&check_allow_chars,
               'disallow_chars' => \&check_disallow_chars,
               'upcase'         => \&check_upcase,
               'downcase'       => \&check_downcase,
               'stripxws'       => \&check_stripxws,
               'enum'           => \&check_enum,
               'match'          => \&check_match,
               'want_email'     => \&check_email,
               'want_date'      => \&check_date,
               'want_phone'     => \&check_phone, );



######################################################################
## NAME:          checkstring
##
## DESCRIPTION:   Verifies that a string meets some set of expectations.
##
## USAGE:         String::Checker::checkstring($string, [ $expectation1,
##                                                        $expectation2 ];
##
## RETURN VALUES: Returns a reference to an array, the values of which
##                are the names of failed expectations.
##
## BUGS:          Hopefully none.
######################################################################
sub checkstring (\$$)
{
    my($string_ref) = shift;
    my($checks)     = shift;
    my(@output);

    if ((ref($string_ref) ne 'SCALAR') || (ref($checks) ne 'ARRAY'))
    {
        return undef;
    }

    foreach my $c (@{$checks})
    {
        my($arg);

        if (ref($c))
        {
            ($c, $arg) = @{$c}[0, 1];
        }

        if (defined($check_subs{$c}))
        {
            my($ret);

            if (defined($arg))
            {
                $ret = $check_subs{$c}->($string_ref, $arg);
            } else
            {
                $ret = $check_subs{$c}->($string_ref);
            }

            if ((defined($ret)) && ($ret == 1))
            {
                push(@output, $c);
            }
        }
    }

    return(\@output);
}
### end checkstring ##################################################



######################################################################
## NAME:          register_check
##
## DESCRIPTION:   Register a new string checking routine.
##
## USAGE:         String::Checker::register_check($name, \&sub);
##
## RETURN VALUES: None.
##
## BUGS:          Hopefully none.
######################################################################
sub register_check ($$)
{
    my($check)   = shift;
    my($coderef) = shift;

    if (ref($coderef) eq 'CODE')
    {
        $check_subs{$check} = $coderef;
    }
}
### end register_check ###############################################



######################################################################
## NAME:          check_*
##
## DESCRIPTION:   A default set of string validators
##
## USAGE:         Don't.  Let checkstring do it for you.
##
## RETURN VALUES: undef if there's no problem, 1 otherwise.
##
## BUGS:          Hopefully none.
######################################################################
# convert undef to an empty string, if necessary
sub check_allow_empty ($)
{
    my($string_ref) = shift;

    if ((! defined($$string_ref)) || ($$string_ref eq ''))
    {
        $$string_ref = '';
    }
    return undef;
}

# blow up if the string is empty or undef
sub check_disallow_empty ($)
{
    my($string_ref) = shift;

    if ((! defined($$string_ref)) || ($$string_ref eq ''))
    {
        return 1;
    }
    return undef;
}

# ensure the string is no shorter than some minimum number of characters
sub check_min ($$)
{
    my($string_ref) = shift;
    my($min)        = shift || 0;

    if ((defined($$string_ref)) && (length($$string_ref) < $min))
    {
        return 1;
    }
    return undef;
}

# ensure the string is no longer than some maximum number of characters
sub check_max ($$)
{
    my($string_ref) = shift;
    my($max)        = shift || 0;

    if ((defined($$string_ref)) && (length($$string_ref) > $max))
    {
        return 1;
    }
    return undef;
}

# check if the string looks like a whole number
sub check_want_int ($)
{
    my($string_ref) = shift;

    if ((defined($$string_ref)) && ($$string_ref !~ /^\d*$/))
    {
        return 1;
    }
    return undef;
}

# check if the string looks like a real number
sub check_want_float ($)
{
    my($string_ref) = shift;

    if ((defined($$string_ref)) && ($$string_ref !~ /^\d*\.?\d*?$/))
    {
        return 1;
    }
    return undef;
}

# allow a particular character class
sub check_allow_chars ($$)
{
    my($string_ref) = shift;
    my($chars)      = shift || '';

    if ((defined($$string_ref)) && ($$string_ref !~ /^[$chars]*$/))
    {
        return 1;
    }
    return undef;
}

# disallow a particular character class
sub check_disallow_chars ($$)
{
    my($string_ref) = shift;
    my($chars)      = shift || '';

    if ((defined($$string_ref)) && ($$string_ref =~ /[$chars]/))
    {
        return 1;
    }
    return undef;
}

# smash the case of the string to uppercase
sub check_upcase ($)
{
    my($string_ref) = shift;

    if (! defined($$string_ref))
    {
        return undef;
    }
    $$string_ref = uc($$string_ref);
    return undef;
}

# smash the case of the string to lowercase
sub check_downcase ($)
{
    my($string_ref) = shift;

    if (! defined($$string_ref))
    {
        return undef;
    }
    $$string_ref = uc($$string_ref);
    return undef;
}

# strip leading and trailing whitespace
sub check_stripxws ($)
{
    my($string_ref) = shift;

    if (! defined($$string_ref))
    {
        return undef;
    }
    $$string_ref =~ s/^\s+//;
    $$string_ref =~ s/\s+$//;
    return undef;
}

# verify that the string matches a particular regexp
sub check_match ($$)
{
    my($string_ref) = shift;
    my($regexp)     = shift || '';

    if ((defined($$string_ref)) && ($$string_ref !~ /$regexp/))
    {
        return 1;
    }
    return undef;
}

# verify that the string is a member of a given list
sub check_enum ($$)
{
    my($string_ref) = shift;
    my($enum_list)  = shift;

    if (! defined($$string_ref))
    {
        return 1;
    }

    if (ref($enum_list) ne 'ARRAY')
    {
        return 1;
    }

    foreach my $item (@{$enum_list})
    {
        if ($$string_ref eq $item)
        {
            return undef;
        }
    }
    return 1;
}

# verify that we have what appears to be a valid e-mail address
sub check_email ($$)
{
    my($string_ref) = shift;

    if ((defined($$string_ref)) && ($$string_ref !~ /^\S+\@[\w-]+\.[\w\.-]+$/))
    {
        return 1;
    }
    return undef;
}

# verify that we have a valid date
sub check_date ($$)
{
    my($string_ref) = shift;
    my($format)     = shift;
    my($date);

    if ((defined($$string_ref)) && (defined($format)) && ($format ne ''))
    {
        $date = UnixDate($$string_ref, $format);
        if (! defined($date))
        {
            return 1;
        }
        $$string_ref = $date;
        return undef;
    }
    if (defined($$string_ref))
    {
        $date = ParseDate($$string_ref);
        if (! defined($date))
        {
            return 1;
        }
    }
    return undef;
}

# verify that we have what appears to be a valid phone number
sub check_phone ($$)
{
    my($string_ref) = shift;

    if ((defined($$string_ref)) && ($$string_ref !~ /^[0-9+.()-]*$/))
    {
        return 1;
    }
    return undef;
}

### end check_* ######################################################

1;
__END__

=head1 NAME

String::Checker - An extensible string validation module (allowing
commonly used checks on strings to be called more concisely and
consistently).

=head1 SYNOPSIS

 use String::Checker;

 String::Checker::register_check($checkname, \&sub);
 $return = String::Checker::checkstring($string, [ expectation, ... ]);

=head1 DESCRIPTION

This is a very simple library for checking a string against a given set of
expectations.  It contains a number of pre-defined expectations which can be
used, and can also be extended to perform any arbitrary match or modification
on a string.

Why is this useful?  If you're only checking one string, it probably isn't.
However, if you're checking a bunch of strings (say, for example, CGI input
parameters) against a set of expectations, this comes in pretty handy.  As
a matter of fact, the CGI::ArgChecker module is a simple, CGI.pm aware wrapper
for this library.

=head2 Checking a string

The checkstring function takes a string scalar and a reference to a list of
'expectations' as arguments, and outputs a reference to a list, containing
the names of the expectations which failed.

Each expectation, in turn, can either be a string scalar (the name of the
expectation) or a two-element array reference (the first element being the
name of the expectation, and second element being the argument to that
expectation.)  For example:

   $string = "foo";
   String::Checker::checkstring($string, [ 'allow_empty',
                                           [ 'max' => 20 ] ] );

Note that the expectations are run in order.  In the above case, for example,
the 'allow_empty' expectation would be checked first, followed by the 'max'
expectation with an argument of 20.

=head2 Defined checks

The module predefines a number of checks.  They are:

=over 3

=item B<allow_empty>

Never fails - will convert an undef scalar to an empty string, though.

=item B<disallow_empty>

Fails if the input string is either undef or empty.

=item B<min>

Fails if the length of the input string is less than the numeric value of
it's single argument.

=item B<max>

Fails if the length of the input string is more than the numeric value of
it's single argument.

=item B<want_int>

Fails if the input string does not solely consist of numeric characters.

=item B<want_float>

Fails if the argument does not solely consist of numeric characters, plus
an optional single '.'.

=item B<allow_chars>

Fails if the input string contains characters other than those in its
argument.

=item B<disallow_chars>

Fails if the input string contains any of the characters in its argument.

=item B<upcase>

Never fails - converts the string to upper case.

=item B<downcase>

Never fails - converts the string to lower case.

=item B<stripxws>

Never fails - strips leading and trailing whitespace from the string.

=item B<enum>

Fails if the input string does not precisely match at least one of the
elements of the array reference it takes as an argument.

=item B<match>

Fails if the input string does not match the regular expression it takes
as an argument.

=item B<want_email>

Fails if the input string does not match the regular expression: ^\S+\@@[\w-]+\.[\w\.-]+$

=item B<want_phone>

Fails if the input string does not match the regular expression ^[0-9+.()-]*$

=item B<want_date>

Interprets the input string as a date, if possible.  This will fail if it can't
figure out a date from the input.  In addition, it is possible to use this to
standardize date input.  Pass a formatting string (see the strftime(3) man page)
as an argument to this check, and the string will be formatted appropriately
if possible.  This is based on the Date::Manip(1) module, so that documentation
might prove valuable if you're using this check.

=back

=head2 Extension checks

Use register_check to register a new expectation checking routine.  This
function should be passed a new expectation name and a code reference.

This code reference will be called every time the expectation name is seen,
with either one or two arguments.  The first argument will always be
a reference to the input string (the function is free to modify the value
of the string).  The second argument, if any, is the second element of a
two-part expectation, whatever that might be.

The function should return undef unless there's a problem, in which case
it should return 1.  It's also best (if possible) to return undef if the
string is undef, so that the user can decide whether to allow_empty or
disallow_empty independent of your check.

For example, registering a check to verify that the input word is "poot"
would look like:

   String::Checker::register_check("ispoot", sub {
       my($s) = shift;
       if ((defined($$s)) && ($$s ne 'poot')) {
           return 1;
       }
       return undef;
   };

=head1 BUGS

Hopefully none.

=head1 AUTHOR

J. David Lowe, dlowe@webjuice.com

=head1 SEE ALSO

perl(1), CGI::ArgChecker(1)

=cut