The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Email::MIME::ContentType;
use base 'Exporter';
use vars qw[
  $VERSION @EXPORT
  $STRICT_PARAMS
];
@EXPORT = qw(parse_content_type);
use strict;
use Carp;
$VERSION = '1.015';

$STRICT_PARAMS=1;

my $tspecials = quotemeta '()<>@,;:\\"/[]?=';
my $ct_default = 'text/plain; charset=us-ascii';
my $extract_quoted = 
    qr/(?:\"(?:[^\\\"]*(?:\\.[^\\\"]*)*)\"|\'(?:[^\\\']*(?:\\.[^\\\']*)*)\')/;

# For documentation, really:
{
  my $discrete  = qr/[^$tspecials]+/;
  my $composite = qr/[^$tspecials]+/;
  my $params    = qr/;.*/;

  sub parse_content_type { # XXX This does not take note of RFC2822 comments
      my $ct = shift;

      # If the header isn't there or is empty, give default answer.
      return parse_content_type($ct_default) unless defined $ct and length $ct;

      # It is also recommend (sic.) that this default be assumed when a
      # syntactically invalid Content-Type header field is encountered.
      return parse_content_type($ct_default)
          unless $ct =~ m[ ^ ($discrete) / ($composite) \s* ($params)? $ ]x;

      return {
          discrete   => lc $1,
          composite  => lc $2,
          attributes => _parse_attributes($3)
      };
  }
}

sub _parse_attributes {
    local $_ = shift;
    my $attribs = {};
    while ($_) {
        s/^;//;
        s/^\s+// and next;
        s/\s+$//;
        unless (s/^([^$tspecials]+)=//) {
          # We check for $_'s truth because some mail software generates a
          # Content-Type like this: "Content-Type: text/plain;"
          # RFC 1521 section 3 says a parameter must exist if there is a
          # semicolon.
          carp "Illegal Content-Type parameter $_" if $STRICT_PARAMS or $_;
          return $attribs;
        }
        my $attribute = lc $1;
        my $value = _extract_ct_attribute_value();
        $attribs->{$attribute} = $value;
    }
    return $attribs;
}

sub _extract_ct_attribute_value { # EXPECTS AND MODIFIES $_
    my $value;
    while ($_) { 
        s/^([^$tspecials]+)// and $value .= $1;
        s/^($extract_quoted)// and do {
            my $sub = $1; $sub =~ s/^["']//; $sub =~ s/["']$//;
            $value .= $sub;
        };
        /^;/ and last;
        /^([$tspecials])/ and do { 
            carp "Unquoted $1 not allowed in Content-Type!"; 
            return;
        }
    }
    return $value;
}

1;

__END__

=head1 NAME

Email::MIME::ContentType - Parse a MIME Content-Type Header

=head1 VERSION

version 1.013

=head1 SYNOPSIS

  use Email::MIME::ContentType;

  # Content-Type: text/plain; charset="us-ascii"; format=flowed
  my $ct = 'text/plain; charset="us-ascii"; format=flowed';
  my $data = parse_content_type($ct);

  $data = {
    discrete   => "text",
    composite  => "plain",
    attributes => {
      charset => "us-ascii",
      format  => "flowed"
    }
  };

=head1 FUNCTIONS

=head2 parse_content_type

This routine is exported by default.

This routine parses email content type headers according to section 5.1 of RFC
2045. It returns a hash as above, with entries for the discrete type, the
composite type, and a hash of attributes.

=head1 WARNINGS

This is not a valid content-type header, according to both RFC 1521 and RFC
2045:

  Content-Type: type/subtype;

If a semicolon appears, a parameter must.  C<parse_content_type> will carp if
it encounters a header of this type, but you can suppress this by setting
C<$Email::MIME::ContentType::STRICT_PARAMS> to a false value.  Please consider
localizing this assignment!

=head1 PERL EMAIL PROJECT

This module is maintained by the Perl Email Project.

L<http://emailproject.perl.org/wiki/Email::MIME::ContentType>

=head1 AUTHOR

Casey West, C<casey@geeknest.com>
Simon Cozens, C<simon@cpan.org>

=head1 SEE ALSO

L<Email::MIME>

=cut