The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
use Test::More tests => 15;
use Regexp::Log::BlueCoat;

# change the default UFS categories
Regexp::Log::BlueCoat->ufs_category(
    'smartfilter',
    an => 'Anonymizer/Translator',
    ac => 'Art/Culture',
    ch => 'Chat',
    cs => 'Criminal_Skills',
    oc => 'Cults/Occult',
    mm => 'Dating',
    dr => 'Drugs',
    et => 'Entertainment',
    ex => 'Obscene/Extreme',
    gb => 'Gambling',
    gm => 'Games',
    nw => 'General_News',
    hs => 'Hate_Speech',
    hm => 'Humor',
    in => 'Investing',
    js => 'Job_Search',
    ls => 'Lifestyle',
    mt => 'Mature',
    mp => 'MP3_Sites',
    nd => 'Nudity',
    os => 'Online_Sales',
    pp => 'Personal',
    po => 'Politics/Religion',
    ps => 'Portal_Sites',
    sh => 'Self_Help/Health',
    sx => "Sex",
    sp => 'Sports',
    tr => 'Travel',
    na => 'Usenet_News',
    wm => 'Webmail',
);

my $log = Regexp::Log::BlueCoat->new(
    format  => '%g %e %a %w/%s %b %m %i %u %H/%d %c %f %A',
    ufs     => 'smartfilter',
    login   => 'ldap',
    capture => [':all'],
);

# test the regex on real log lines
@ARGV = ('t/bc1.log');
my @fields = $log->capture;
my $regexp = $log->regexp;

# a big data set
my %data;
my @data = (
    {
        'c-ip'        => '10.0.203.16',
        'user-agent'  => 'Mozilla/4.76 [en] (X11; U; Linux 2.4.2-22mdk i686)',
        'time-taken'  => '1950',
        'cs-uri'      => 'http://www.microsoft.com:80/',
        's-hierarchy' => 'DIRECT',
        'cs-username' => 'CN=Agent Smith,OU=fr,O=company',
        'cs-supplier-name'   => 'www.microsoft.com',
        's-action'           => 'TCP_NC_MISS',
        'sc-filter-category' => 'uncategorized',
        'cs-content-type'    => 'text/html',
        'cs-method'          => 'GET',
        'cs-bytes'           => '31977',
        'sc-status'          => '200',
        'timestamp'          => '1038956400.024'
    },
    {
        'c-ip'        => '10.0.203.16',
        'user-agent'  => 'Mozilla/4.76 [en] (X11; U; Linux 2.4.2-22mdk i686)',
        'time-taken'  => '182',
        'cs-uri'      => 'http://www.fnac.com:80/',
        's-hierarchy' => 'DIRECT',
        'cs-username' => 'CN=Agent Smith,OU=fr,O=company',
        'cs-supplier-name'   => 'www.fnac.com',
        's-action'           => 'TCP_NC_MISS',
        'sc-filter-category' => 'Online_Sales',
        'cs-content-type'    => 'text/html',
        'cs-method'          => 'GET',
        'cs-bytes'           => '50537',
        'sc-status'          => '200',
        'timestamp'          => '1038956401.460'
    },
    {
        'c-ip'        => '10.0.203.16',
        'user-agent'  => 'Mozilla/4.76 [en] (X11; U; Linux 2.4.2-22mdk i686)',
        'time-taken'  => '18',
        'cs-uri'      => 'http://voyages-sncf.com:80/',
        's-hierarchy' => 'DIRECT',
        'cs-username' => '-',
        'cs-supplier-name'   => '-',
        's-action'           => 'TCP_DENIED',
        'sc-filter-category' => 'uncategorized',
        'cs-content-type'    => '-',
        'cs-method'          => 'GET',
        'cs-bytes'           => '3309',
        'sc-status'          => '407',
        'timestamp'          => '1038956401.633'
    },
    {
        'c-ip'       => '10.0.203.16',
        'user-agent' => 'MSMSGS',
        'time-taken' => '298',
        'cs-uri'     =>
'http://207.46.110.3/gateway/gateway.dll?Action=poll&SessionID=268004698.8918',
        's-hierarchy'        => 'DIRECT',
        'cs-username'        => 'CN=Tux PENGUIN,OU=ress,OU=fr,O=company',
        'cs-supplier-name'   => '207.46.110.3',
        's-action'           => 'TCP_NC_MISS',
        'sc-filter-category' => 'Webmail',
        'cs-content-type'    => 'application/x-msn-messenger',
        'cs-method'          => 'POST',
        'cs-bytes'           => '228',
        'sc-status'          => '200',
        'timestamp'          => '1038956401.013'
    },
    {
        'c-ip'               => '10.0.203.16',
        'user-agent'         => 'Mozilla/4.01 [en] (Win95; I)',
        'time-taken'         => '2877',
        'cs-uri'             => 'http://shttp.msg.yahoo.com/notify/',
        's-hierarchy'        => 'DIRECT',
        'cs-username'        => 'CN=Mr MESSENGER,OU=fr,O=company',
        'cs-supplier-name'   => 'shttp.msg.yahoo.com',
        's-action'           => 'TCP_NC_MISS',
        'sc-filter-category' => 'Portal_Sites',
        'cs-content-type'    => 'text/plain',
        'cs-method'          => 'POST',
        'cs-bytes'           => '2311',
        'sc-status'          => '200',
        'timestamp'          => '1038956412.372'
    },
    {
        'c-ip'               => '10.0.203.16',
        'user-agent'         => 'SetiQueue',
        'time-taken'         => '2613',
        'cs-uri'             => 'http://shserver2.ssl.berkeley.edu/',
        's-hierarchy'        => 'DIRECT',
        'cs-username'        => 'CN=Frodo BAGGINS,OU=lord-otr,OU=fr,O=company',
        'cs-supplier-name'   => 'shserver2.ssl.berkeley.edu',
        's-action'           => 'TCP_NC_MISS',
        'sc-filter-category' => 'uncategorized',
        'cs-content-type'    => 'text/plain',
        'cs-method'          => 'POST',
        'cs-bytes'           => '568',
        'sc-status'          => '200',
        'timestamp'          => '1039302000.538'
    },
    {
        'c-ip'        => '10.0.203.16',
        'user-agent'  => 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0)',
        'time-taken'  => '91',
        'cs-uri'      => 'http://www.idealforex.sg-ib.com/Pac_Angl/pool.asp',
        's-hierarchy' => 'DIRECT',
        'cs-username' => 'CN=John DOE,OU=decc,OU=fr,O=company',
        'cs-supplier-name'   => 'www.idealforex.sg-ib.com',
        's-action'           => 'TCP_NC_MISS',
        'sc-filter-category' => 'uncategorized',
        'cs-content-type'    => 'text/html',
        'cs-method'          => 'GET',
        'cs-bytes'           => '8753',
        'sc-status'          => '200',
        'timestamp'          => '1039302001.724'
    },
    {
        'c-ip'       => '10.0.203.16',
        'user-agent' => 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0)',
        'time-taken' => '125',
        'cs-uri'     =>
          'http://www.idealforex.sg-ib.com/Pac_Angl/Include/sg_domain.js',
        's-hierarchy'        => 'DIRECT',
        'cs-username'        => 'CN=Pamela ANDERSON,OU=decc,OU=fr,O=company',
        'cs-supplier-name'   => 'www.idealforex.sg-ib.com',
        's-action'           => 'TCP_NC_MISS',
        'sc-filter-category' => 'uncategorized',
        'cs-content-type'    => '-',
        'cs-method'          => 'GET',
        'cs-bytes'           => '170',
        'sc-status'          => '304',
        'timestamp'          => '1039302002.949'
    },
    {
        'c-ip'               => '10.0.203.16',
        'user-agent'         => 'SetiQueue',
        'time-taken'         => '2186',
        'cs-uri'             => 'http://shserver2.ssl.berkeley.edu/',
        's-hierarchy'        => 'DIRECT',
        'cs-username'        => 'CN=Frodo BAGGINS,OU=marc-otc,OU=fr,O=company',
        'cs-supplier-name'   => 'shserver2.ssl.berkeley.edu',
        's-action'           => 'TCP_NC_MISS',
        'sc-filter-category' => 'uncategorized',
        'cs-content-type'    => 'text/plain',
        'cs-method'          => 'POST',
        'cs-bytes'           => '356591',
        'sc-status'          => '200',
        'timestamp'          => '1039302002.754'
    },
    {
        'c-ip'               => '10.0.203.16',
        'user-agent'         => '-',
        'time-taken'         => '204',
        'cs-uri'             => 'https://193.116.122.3:443/',
        's-hierarchy'        => 'DIRECT',
        'cs-username'        => 'CN=Larry WALL,OU=decc,OU=fr,O=company',
        'cs-supplier-name'   => '193.116.122.3',
        's-action'           => 'TCP_TUNNELED',
        'sc-filter-category' => 'uncategorized',
        'cs-content-type'    => '-',
        'cs-method'          => 'CONNECT',
        'cs-bytes'           => '432',
        'sc-status'          => '200',
        'timestamp'          => '1039302011.734'
    },
    {
        'c-ip'               => '10.0.203.16',
        'user-agent'         => 'Mozilla/4.01 [en] (Win95; I)',
        'time-taken'         => '336',
        'cs-uri'             => 'http://shttp.msg.yahoo.com/notify/',
        's-hierarchy'        => 'DIRECT',
        'cs-username'        => 'CN=Peter PAN,OU=fr,O=company',
        'cs-supplier-name'   => 'shttp.msg.yahoo.com',
        's-action'           => 'TCP_NC_MISS',
        'sc-filter-category' => 'Portal_Sites',
        'cs-content-type'    => 'text/plain',
        'cs-method'          => 'POST',
        'cs-bytes'           => '316',
        'sc-status'          => '200',
        'timestamp'          => '1039302013.190'
    },
    {
        'c-ip'               => '192.168.71.137',
        'user-agent'         => '-',
        'time-taken'         => '2',
        'cs-uri'             => 'https://econf.qsdf.com/',
        's-hierarchy'        => 'DIRECT',
        'cs-username'        => '-',
        'cs-supplier-name'   => '-',
        's-action'           => 'TCP_NC_MISS',
        'sc-filter-category' => 'content_filter_not_applied',
        'cs-content-type'    => '-',
        'cs-method'          => 'GET',
        'cs-bytes'           => '116',
        'sc-status'          => '200',
        'timestamp'          => '1039302017.034'
    },
    {
        'c-ip'       => '10.0.203.16',
        'user-agent' => 'Totem',
        'time-taken' => '2',
        'cs-uri'     =>
'http://adverts.mp3dancer.com/phps_application/advertisingupdate_7.php3?domain=FavoriteLinksMP3Dancer&localdate=1039301994',
        's-hierarchy'        => 'DIRECT',
        'cs-username'        => '-',
        'cs-supplier-name'   => '-',
        's-action'           => 'TCP_DENIED',
        'sc-filter-category' => 'Mature',
        'cs-content-type'    => '-',
        'cs-method'          => 'GET',
        'cs-bytes'           => '3309',
        'sc-status'          => '407',
        'timestamp'          => '1039302020.911'
    },
    {
        'c-ip'               => '10.0.203.16',
        'user-agent'         => 'Mozilla/4.01 [en] (Win95; I)',
        'time-taken'         => '334',
        'cs-uri'             => 'http://shttp.msg.yahoo.com/notify/',
        's-hierarchy'        => 'DIRECT',
        'cs-username'        => 'CN=Nicolas BOURBAKI,OU=fr,O=company',
        'cs-supplier-name'   => 'shttp.msg.yahoo.com',
        's-action'           => 'TCP_NC_MISS',
        'sc-filter-category' => 'Portal_Sites',
        'cs-content-type'    => 'text/plain',
        'cs-method'          => 'POST',
        'cs-bytes'           => '316',
        'sc-status'          => '200',
        'timestamp'          => '1039302022.732'
    },
    {
        'timestamp'   => '1039424947.214',
        'time-taken'  => '19',
        'c-ip'        => '192.186.203.16',
        's-action'    => 'TCP_ERR_MISS',
        'sc-status'   => '503',
        'cs-bytes'    => '3100',
        'cs-method'   => 'GET',
        'cs-uri'      => 'http:// bug0bus0.free.fr/es_fichiers/tn_ak_01.jpg',
        'cs-username' => 'CN=Tom BOMBADIL,OU=fr,O=tolkien',
        's-hierarchy' => 'DIRECT',
        'cs-supplier-name'   => '-',
        'cs-content-type'    => '-',
        'sc-filter-category' => 'Portal_Sites',
        'user-agent'         =>
          'Mozilla/5.0 (Windows; U; WinNT4.0; en-US; rv:1.2.1) Gecko/20021130',
    }
);

$i = 0;
while (<>) {
    @data{@fields} = /$regexp/;
    is_deeply( \%data, $data[ $i++ ], "bc1.log line " . ( $i + 1 ) );
}