The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Parse::IRC;
{
  $Parse::IRC::VERSION = '1.20';
}

#ABSTRACT: A parser for the IRC protocol.

# We export some stuff
require Exporter;
@ISA = qw[Exporter];
@EXPORT = qw[parse_irc];

use strict;
use warnings;
use File::Basename qw[fileparse];

my $g = {
  space			=> qr/\x20+/o,
  trailing_space	=> qr/\x20*/o,
};

my $irc_regex = qr/^
  (?:
    \x3a                #  : comes before hand
    (\S+)               #  [prefix]
    $g->{'space'}       #  Followed by a space
  )?                    # but is optional.
  (
    \d{3}|[a-zA-Z]+     #  [command]
  )                     # required.
  (?:
    $g->{'space'}       # Strip leading space off [middle]s
    (                   # [middle]s
      (?:
        [^\x00\x0a\x0d\x20\x3a]
        [^\x00\x0a\x0d\x20]*
      )                 # Match on 1 of these,
      (?:
        $g->{'space'}
        [^\x00\x0a\x0d\x20\x3a]
        [^\x00\x0a\x0d\x20]*
      )*           # then match on 0-13 of these,
    )
  )?                    # otherwise dont match at all.
  (?:
    $g->{'space'}\x3a   # Strip off leading spacecolon for [trailing]
    ([^\x00\x0a\x0d]*)	# [trailing]
  )?                    # [trailing] is not necessary.
  $g->{'trailing_space'}
$/x;

# the magic cookie jar
my %dcc_types = (
    qr/^(?:CHAT|SEND)$/ => sub {
        my ($nick, $type, $args) = @_;
        my ($file, $addr, $port, $size);
        return if !(($file, $addr, $port, $size) = $args =~ /^(".+"|[^ ]+) +(\d+) +(\d+)(?: +(\d+))?/);

        if ($file =~ s/^"//) {
            $file =~ s/"$//;
            $file =~ s/\\"/"/g;
        }
        $file = fileparse($file);

        return (
            $port,
            {
                nick => $nick,
                type => $type,
                file => $file,
                size => $size,
                addr => $addr,
                port => $port,
            },
            $file,
            $size,
            $addr,
        );
    },
    qr/^(?:ACCEPT|RESUME)$/ => sub {
        my ($nick, $type, $args) = @_;
        my ($file, $port, $position);
        return if !(($file, $port, $position) = $args =~ /^(".+"|[^ ]+) +(\d+) +(\d+)/);

        $file =~ s/^"|"$//g;
        $file = fileparse($file);

        return (
            $port,
            {
                nick => $nick,
                type => $type,
                file => $file,
                size => $position,
                port => $port,
            },
            $file,
            $position,
        );
    },
);

sub parse_irc {
  my $string = shift || return;
  return __PACKAGE__->new(@_)->parse($string);
}

sub new {
  my $package = shift;
  my %opts = @_;
  $opts{lc $_} = delete $opts{$_} for keys %opts;
  return bless \%opts, $package;
}

sub parse {
  my $self = shift;
  my $raw_line = shift || return;
  $raw_line =~ s/(\x0D\x0A?|\x0A\x0D?)$//;
  if ( my($prefix, $command, $middles, $trailing) = $raw_line =~ m/$irc_regex/ ) {
      my $event = { raw_line => $raw_line };
      $event->{'prefix'} = $prefix if $prefix;
      $event->{'command'} = uc $command;
      $event->{'params'} = [] if ( defined ( $middles ) || defined ( $trailing ) );
      push @{$event->{'params'}}, (split /$g->{'space'}/, $middles) if defined ( $middles );
      push @{$event->{'params'}}, $trailing if defined( $trailing );
      if ( $self->{ctcp} and $event->{'command'} =~ /^(PRIVMSG|NOTICE)$/ and $event->{params}->[1] =~ tr/\001// ) {
        return $self->_get_ctcp( $event );
      }
      if ( $self->{public} and $event->{'command'} eq 'PRIVMSG' and $event->{'params'}->[0] =~ /^(\x23|\x26)/ ) {
	$event->{'command'} = 'PUBLIC';
      }
      return $event;
  }
  else {
      warn "Received line $raw_line that is not IRC protocol\n" if $self->{debug};
  }
  return;
}

sub _get_ctcp {
    my ($self, $line) = @_;

    # Is this a CTCP request or reply?
    my $ctcp_type = $line->{command} eq 'PRIVMSG' ? 'CTCP' : 'CTCPREPLY';

    # CAPAP IDENTIFY-MSG is only applied to ACTIONs
    my ($msg, $identified) = ($line->{params}->[1], undef);
    ($msg, $identified) = _split_idmsg($msg) if $self->{identifymsg} && $msg =~ /^.ACTION/;

    my $events;
    my ($ctcp, $text) = _ctcp_dequote($msg);

    if (!defined $ctcp) {
        warn "Received malformed CTCP message: $msg\n" if $self->{debug};
        return $events;
    }

    my $nick = defined $line->{prefix} ? (split /!/, $line->{prefix})[0] : undef;

    # We only process the first CTCP. The only people who send multiple ones
    # are those who are trying to flood our outgoing queue anyway (e.g. by
    # having us reply to 20 VERSION requests at a time).
    my ($name, $args);
    CTCP: for my $string ($ctcp->[0]) {
        if (!(($name, $args) = $string =~ /^(\w+)(?: +(.*))?/)) {
            defined $nick
                ? do { warn "Received malformed CTCP message from $nick: $string\n" if $self->{debug} }
                : do { warn "Trying to send malformed CTCP message: $string\n" if $self->{debug} }
            ;
            last CTCP;
        }

        if (lc $name eq 'dcc') {
            my ($dcc_type, $rest);

            if (!(($dcc_type, $rest) = $args =~ /^(\w+) +(.+)/)) {
                defined $nick
                    ? do { warn "Received malformed DCC request from $nick: $args\n" if $self->{debug} }
                    : do { warn "Trying to send malformed DCC request: $args\n" if $self->{debug} }
                ;
                last CTCP;

            }
            $dcc_type = uc $dcc_type;

            my ($handler) = grep { $dcc_type =~ /$_/ } keys %dcc_types;
            if (!$handler) {
                warn "Unhandled DCC $dcc_type request: $rest\n" if $self->{debug};
                last CTCP;
            }

            my @dcc_args = $dcc_types{$handler}->($nick, $dcc_type, $rest);
            if (!@dcc_args) {
                defined $nick
                    ? do { warn "Received malformed DCC $dcc_type request from $nick: $rest\n" if $self->{debug} }
                    : do { warn "Trying to send malformed DCC $dcc_type request: $rest\n" if $self->{debug} }
                ;
                last CTCP;
            }

            $events = {
                prefix => $line->{prefix},
                command => 'DCC_REQUEST',
                params => [
                    $dcc_type,
                    @dcc_args,
                ],
                raw_line => $line->{raw_line},
            };
        }
        else {
            $events = {
                command => $ctcp_type . '_' . uc $name,
                prefix => $line->{prefix},
                params => [
                    $line->{params}->[0],
                    (defined $args ? $args : ''),
                    (defined $identified ? $identified : () ),
                ],
                raw_line => $line->{raw_line},
            };
        }
    }

    # XXX: I'm not quite sure what this is for, but on FreeNode it adds an
    # extra bogus event and displays a debug message, so I have disabled it.
    # FreeNode precedes PRIVMSG and CTCP ACTION messages with '+' or '-'.
    #if ($text && @$text) {
    #    my $what;
    #    ($what) = $line->{raw_line} =~ /^(:[^ ]+ +\w+ +[^ ]+ +)/
    #        or warn "What the heck? '".$line->{raw_line}."'\n" if $self->{debug};
    #    $text = (defined $what ? $what : '') . ':' . join '', @$text;
    #    $text =~ s/\cP/^P/g;
    #    warn "CTCP: $text\n" if $self->{debug};
    #    push @$events, @{ $self->{_ircd}->get([$text]) };
    #}

    return $events;
}

sub _split_idmsg {
    my ($line) = @_;
    my ($identified, $msg) = split //, $line, 2;
    $identified = $identified eq '+' ? 1 : 0;
    return $msg, $identified;
}

# Splits a message into CTCP and text chunks. This is gross. Most of
# this is also stolen from Net::IRC, but I (fimm) wrote that too, so it's
# used with permission. ;-)
sub _ctcp_dequote {
    my ($msg) = @_;
    my (@chunks, $ctcp, $text);

    # CHUNG! CHUNG! CHUNG!

    if (!defined $msg) {
        die 'Not enough arguments to Parse::IRC::_ctcp_dequote';
    }

    # Strip out any low-level quoting in the text.
    $msg = _low_dequote( $msg );

    # Filter misplaced \001s before processing... (Thanks, tchrist!)
    substr($msg, rindex($msg, "\001"), 1, '\\a')
        if ($msg =~ tr/\001//) % 2 != 0;

    return if $msg !~ tr/\001//;

    @chunks = split /\001/, $msg;
    shift @chunks if !length $chunks[0]; # FIXME: Is this safe?

    for (@chunks) {
        # Dequote unnecessarily quoted chars, and convert escaped \'s and ^A's.
        s/\\([^\\a])/$1/g;
        s/\\\\/\\/g;
        s/\\a/\001/g;
    }

    # If the line begins with a control-A, the first chunk is a CTCP
    # message. Otherwise, it starts with text and alternates with CTCP
    # messages. Really stupid protocol.
    if ($msg =~ /^\001/) {
        push @$ctcp, shift @chunks;
    }

    while (@chunks) {
        push @$text, shift @chunks;
        push @$ctcp, shift @chunks if @chunks;
    }

    return ($ctcp, $text);
}

# Quotes a string in a low-level, protocol-safe, utterly brain-dead
# fashion. Returns the quoted string.
sub _low_quote {
    my ($line) = @_;
    my %enquote = ("\012" => 'n', "\015" => 'r', "\0" => '0', "\cP" => "\cP");

    if (!defined $line) {
        die 'Not enough arguments to Parse::IRC->_low_quote';
    }

    if ($line =~ tr/[\012\015\0\cP]//) { # quote \n, \r, ^P, and \0.
        $line =~ s/([\012\015\0\cP])/\cP$enquote{$1}/g;
    }

    return $line;
}

# Does low-level dequoting on CTCP messages. I hate this protocol.
# Yes, I copied this whole section out of Net::IRC.
sub _low_dequote {
    my ($line) = @_;
    my %dequote = (n => "\012", r => "\015", 0 => "\0", "\cP" => "\cP");

    if (!defined $line) {
        die 'Not enough arguments to Parse::IRC->_low_dequote';
    }

    # dequote \n, \r, ^P, and \0.
    # Thanks to Abigail (abigail@foad.org) for this clever bit.
    if ($line =~ tr/\cP//) {
        $line =~ s/\cP([nr0\cP])/$dequote{$1}/g;
    }

    return $line;
}

q[Operation Blackbriar];

__END__

=pod

=head1 NAME

Parse::IRC - A parser for the IRC protocol.

=head1 VERSION

version 1.20

=head1 SYNOPSIS

General usage:

  use strict;
  use Parse::IRC;

  # Functional interface

  my $hashref = parse_irc( $irc_string );

  # OO interface

  my $irc_parser = Parse::IRC->new();

  my $hashref = $irc_parser->parse( $irc_string );

Using Parse::IRC in a simple IRC bot:

  # A simple IRC bot using Parse::IRC

  use strict;
  use IO::Socket;
  use Parse::IRC;

  my $parser = Parse::IRC->new( public => 1 );

  my %dispatch = ( 'ping' => \&irc_ping, '001' => \&irc_001, 'public' => \&irc_public );

  # The server to connect to and our details.
  my $server = "irc.perl.moo";
  my $nick = "parseirc$$";
  my $login = "simple_bot";

  # The channel which the bot will join.
  my $channel = "#IRC.pm";

  # Connect to the IRC server.
  my $sock = new IO::Socket::INET(PeerAddr => $server,
                                  PeerPort => 6667,
                                  Proto => 'tcp') or
                                    die "Can't connect\n";

  # Log on to the server.
  print $sock "NICK $nick\r\n";
  print $sock "USER $login 8 * :Perl IRC Hacks Robot\r\n";

  # Keep reading lines from the server.
  while (my $input = <$sock>) {
    $input =~ s/\r\n//g;
    my $hashref = $parser->parse( $input );
    SWITCH: {
          my $type = lc $hashref->{command};
          my @args;
          push @args, $hashref->{prefix} if $hashref->{prefix};
          push @args, @{ $hashref->{params} };
          if ( defined $dispatch{$type} ) {
            $dispatch{$type}->(@args);
            last SWITCH;
          }
          print STDOUT join( ' ', "irc_$type:", @args ), "\n";
    }
  }

  sub irc_ping {
    my $server = shift;
    print $sock "PONG :$server\r\n";
    return 1;
  }

  sub irc_001 {
    print STDOUT "Connected to $_[0]\n";
    print $sock "JOIN $channel\r\n";
    return 1;
  }

  sub irc_public {
    my ($who,$where,$what) = @_;
    print "$who -> $where -> $what\n";
    return 1;
  }

=head1 DESCRIPTION

Parse::IRC provides a convenient way of parsing lines of text conforming to the IRC
protocol ( see RFC1459 or RFC2812 ).

=head1 FUNCTION INTERFACE

Using the module automagically imports 'parse_irc' into your namespace.

=over

=item C<parse_irc>

Takes a string of IRC protcol text. Returns a hashref on success or undef on failure.
See below for the format of the hashref returned.

=back

=head1 OBJECT INTERFACE

=head2 CONSTRUCTOR

=over

=item C<new>

Creates a new Parse::IRC object. One may specify C<< debug => 1 >> to enable warnings about non-IRC
protcol lines. Specify C<< public => 1 >> to enable the automatic conversion of privmsgs targeted at
channels to C<public> instead of C<privmsg>. Specify C<< ctcp => 1 >> to enable automatic conversion
of privmsgs and notices with CTCP/DCC type encoding to C<ctcp>, C<ctcpreply> and C<dcc_request>.

=back

=head2 METHODS

=over

=item C<parse>

Takes a string of IRC protcol text. Returns a hashref on success or undef on failure.
The hashref contains the following fields:

  prefix
  command
  params ( this is an arrayref )
  raw_line

For example, if the filter receives the following line, the following hashref is produced:

  LINE: ':moo.server.net 001 lamebot :Welcome to the IRC network lamebot'

  HASHREF: {
	     prefix   => ':moo.server.net',
	     command  => '001',
	     params   => [ 'lamebot', 'Welcome to the IRC network lamebot' ],
	     raw_line => ':moo.server.net 001 lamebot :Welcome to the IRC network lamebot',
	   }

=back

=head1 KUDOS

Based on code originally developed by Jonathan Steinert and Dennis Taylor

=head1 SEE ALSO

L<POE::Filter::IRCD>

L<http://www.faqs.org/rfcs/rfc1459.html>

L<http://www.faqs.org/rfcs/rfc2812.html>

=head1 AUTHOR

Chris Williams <chris@bingosnet.co.uk>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2012 by Chris Williams, Jonathan Steinert and Dennis Taylor.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut