The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl

use strict;
our $VERSION = '0.28';
$|++;

# ########## #
# Parse ARGV #
# ########## #
my %args = ();
my %opts = (
	# name	=> [char, expect_arg_bit ]
	'help'		=> ['h'],
	'usage'		=> ['u'],
	'version'	=> ['v'],
	'stdin'		=> [''],
	'brief'		=> ['b'],
	'namefile'	=> ['f', 1],
	'noalign'	=> ['N'],
	'describe'	=> ['d'],
	'file-compat'	=> [''],
	'output-format'	=> ['', 1],
	'language'	=> ['l', 1],
	'mimetype'	=> ['i'],
	'dereference'	=> ['L'],
	'separator'	=> ['F',1],
	'debug'		=> ['D'],
	'database'	=> ['', 1],
	'all'		=> ['a'],
	'magic-only'	=> ['M'],
);

$args{'file-compat'}++ if $0 =~ m#(^|/)file$#;

while ((@ARGV) && ($ARGV[0] =~ /^-/)) {
	my $opt = shift @ARGV;
	if ($opt =~ /^--?$/) {
		$args{stdin}++ if $args{'file-compat'} && $opt eq '-';
		last;
	}
	elsif ($opt =~ s/^--([\w-]+)(?:=(.*))?/$1/) {
		if (exists $opts{$opt}) {
			if ($opts{$opt}[1]) {
				my $arg = $2 || shift @ARGV;
				complain('--'.$opt, 2) unless defined $arg;
				$args{$opt} .= ( $args{$opt} ? ' ' : '' ) . $arg;
			}
			else { $args{$opt}++ }
		}
		else { complain('--'.$opt) }
	}
	elsif ($opt =~ s/^-(?!-)//) {
		foreach my $o (split //, $opt) {
			my ($key) = grep { $opts{$_}[0] eq $o } keys %opts;
			complain($o) unless $key;

			if ($opts{$key}[1]) {
				my $arg = shift @ARGV;
				complain('-'.$o, 2) unless defined $arg;
				$args{$key} .= ( $args{$key} ? ' ' : '' ).$arg; # join with whitespace
			}
			else { $args{$key}++; }
		}
	}
	else { complain($opt) }
}

if ($args{help} || $args{usage}) {
	eval 'use Pod::Usage';
	die "Could not find perl module Pod::Usage\n" if $@;
	pod2usage( {
		-verbose => 1,
		-exitval => 0,
	} );
}

if ($args{version}) {
	print "mimetype $VERSION\n\n", << 'EOV';
Copyright (c) 2003, 2012 Jaap G Karssenberg. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
EOV
	exit 0;
}

complain(undef, 4) unless scalar(@ARGV) || $args{stdin} || $args{namefile};

# ############# #
# prepare stuff #
# ############# #

our %desc; # desc caching hash

# --database
@File::MimeInfo::DIRS = split /:/, $args{database} if $args{database};

## Actually use our module ##
eval 'use File::MimeInfo::Magic qw/mimetype globs inodetype magic describe/;';
die $@ if $@;

*default = \&File::MimeInfo::default;

# --debug
if ($args{debug}) {
	$File::MimeInfo::DEBUG++;
	$File::MimeInfo::Magic::DEBUG++;
	print	'> Data dirs are: ', join( ', ',
			$args{database}
				? ( split /:/, $args{database} )
				: (
					File::BaseDir::xdg_data_home(),
					File::BaseDir::xdg_data_dirs()
				)
		), "\n";
}

# --file-compat
$args{describe}++ if $args{'file-compat'} && !$args{mimetype};

# --namefile
if ($args{namefile}) {
	open IN, $args{namefile}
		|| die "Couldn't open file: $args{namefile}\n";
	unshift @ARGV, map {chomp; $_} (<IN>);
	close IN;
}

# --language
$File::MimeInfo::LANG = $args{language} if $args{language};

# Formatting stuff
my $l = 5; # "STDIN"
unless ($args{brief} || $args{noalign}) {
	for (@ARGV) { $l = length($_) if $l < length($_) }
}

$args{separator} = ':' unless defined $args{separator};
my $format = $args{'output-format'}
	? parse_format($args{'output-format'})
	: $args{brief}
	? sub { $args{describe} ? desc($_[1]) : $_[1] }
	: $args{noalign}
	? sub { ( $_[0], $args{separator}, ' ', $args{describe} ? desc($_[1]) : $_[1] ) }
	: sub { ( $_[0], $args{separator}, ' 'x($l + 1 - length($_[0])),
		$args{describe} ? desc($_[1]) : $_[1] ) };

# --dereference
if ($args{dereference}) {
	eval 'use File::Spec';
	die "Could not find perl module File::Spec\n" if $@;
}
# --stdin
if ($args{stdin}) {
	eval 'use IO::Scalar';
	die "Could not find perl module IO::Scalar\n" if $@;
}

# ######## #
# do stuff #
# ######## #

# --stdin
if ($args{stdin}) {
	my $data;
	read(STDIN, $data, $File::MimeInfo::Magic::max_buffer);
	my $scalar = new IO::Scalar \$data;
	print $format->('STDIN', mimetype($scalar)), "\n";
	exit;
}

foreach my $file (@ARGV) {
	# --dereference
	my $f = ($args{dereference} && -l $file) ? resolvelink($file) : $file;
	# --magic-only
	if ($args{'magic-only'}) {
		print $format->($file, magic($f) || default($f)), "\n";
	}
	# --all
	elsif ($args{all}) {
		for (qw#inodetype globs magic default#) {
			my $m = eval "$_(\$f)";
			print $format->($file, $m), "\n" if $m;
		}
	}
	else { print $format->($file, mimetype($f)), "\n" }
}

exit;

# ########### #
# Subroutines #
# ########### #

sub complain { # Error messages
	my $opt = shift;
	my $m = shift || 1;

	my $bn = $0;
	$bn =~ s|^(.*/)*||;
	if ($m == 1) { print STDERR "$bn: unrecognized option '$opt'" }
	elsif ($m == 2) { print STDERR "$bn: option '$opt' requires an argument" }
	elsif ($m == 3) { print STDERR "$bn: $opt: No such file or directory\n" }
	elsif ($m == 4) { print STDERR "usage: $bn [options] files" }

	print "\nTry '$bn --help' for more information.\n" unless $m == 3;
	exit $m;
}

sub parse_format { # Advanced formatting
	my $form = shift;
	my $code = "sub { '$form' }";
	# code will get @_ = qw/file type/
	$code =~ s/(?<!\\)%f/'.\$_[0].'/g;
	$code =~ s/(?<!\\)%m/'.\$_[1].'/g;
	$code =~ s/(?<!\\)%d/'.desc(\$_[1]).'/g;
	return eval $code;
}

sub resolvelink { # --dereference
	my $file = shift;
	my $link = readlink($file) || return $file;
	my (undef, $dir, undef) = File::Spec->splitpath($file);
	$link = File::Spec->rel2abs($link, $dir);
	$link = resolvelink($link) if -l $link; # recurs
	return $link;
}

sub desc { # Cache description
	my $mt = shift;
	return undef unless $mt;
	$desc{$mt} ||= describe($mt) || describe($mt, ''); # second form overrules the language settings to default
}

__END__

=head1 NAME

mimetype - Determine file type

=head1 SYNOPSIS

mimetype [options] [-] files

=head1 DESCRIPTION

This script tries to determine the mime type of a file using the
Shared MIME-info database. It is intended as a kind of I<file(1)> work-alike,
but uses mimetypes instead of descriptions.

If one symlinks the I<file> command to I<mimetype> it will behave
a little more compatible, see L</--file-compat>.
Commandline options to specify alternative magic files are not
implemented the same because of the conflicting data formats.
Also the wording of the descriptions will differ.

For naming switches I followed the manpage of file(1) version 4.02
when possible. They seem to differ completely from the spec in the
'utilities' chapter of IEEE Std 1003.1-2001 (POSIX).

=head1 OPTIONS

=over 4

=item B<-a>, B<--all>

Show output of all rules that match the file.

TODO: this method now just returns one match for each
method (globs, magic, etc.).

=item B<-b>, B<--brief>

Do not prepend filenames to output lines (brief mode).

=item B<--database>=I<mimedir>:I<mimedir>:...

Force the program to look in these directories for the shared mime-info
database. The directories specified by the basedir specification
are ignored.

=item B<-d>, B<--describe>

Print file descriptions instead of mime types, this is the
default when using L</--file-compat>.

=item B<-D>, B<--debug>

Print debug information about how the mimetype was determined.

=item B<-f> I<namefile>, B<--namefile>=I<namefile>

Read the names of the files to be examined from the file 'namefile'
(one per line) before the argument list.

=item B<--file-compat>

Make mimetype behave a little more L<file(1)> compatible. This
is turned on automatically when you call mimetype by a link
called 'file'.

A single '-' won't be considered a separator
between options and filenames anymore, but becomes identical to L</--stdin>.
( You can still use '--' as separator, but
that is not backward compatible with the original file command. )
Also the default becomes to print descriptions instead of mimetypes.

=item B<-F> I<string>, B<--separator>=I<string>

Use string as custom separator between the file name and its mimetype
or description, defaults to ':' .

=item B<-h>, B<--help>

=item B<-u>, B<--usage>

Print a help message and exits.

=item B<-i>, B<--mimetype>

Use mime types, opposite to L</--describe>,
this is the default when _not_ using L</--file-compat>.

=item B<-L>, B<--dereference>

Follow symbolic links.

=item B<-l> I<code>, B<--language>=I<code>

The language attribute specifies a two letter language code, this makes
descriptions being outputted in the specified language.

=item B<-M>, B<--magic-only>

Do not check for extensions, globs or inode type, only look at the content
of the file. This is particularly useful if for some reason you don't trust
the name or the extension a file has.

=item B<-N>, B<--noalign>

Do not align output fields.

=item B<--output-format>

If you want an alternative output format, you can specify a format string
containing the following escapes:

	%f for the filename
	%d description
	%m mime type

Alignment is not available when using this,
you need to post-process the output to do that.

=item B<--stdin>

Determine type of content from STDIN, less powerful then normal file checking
because it only uses magic typing. This will happen also if the STDIN filehandle
is a pipe.

To use this option L<IO::Scalar> needs to be installed.

=item B<-v>, B<--version>

Print the version of the program and exit.

=back

=head1 ENVIRONMENT

=over 4

=item XDG_DATA_HOME

=item XDG_DATA_DIRS

These variables can list base directories to search for data files. The shared
mime-info will be expected in the "mime" sub directory of one of these
directories. If these are not set, there will be searched for the
following directories:

	$HOME/.local/share/mime
	/usr/local/share/mime
	/usr/share/mime

See also the "XDG Base Directory Specification"
L<http://freedesktop.org/Standards/basedir-spec>

=back

=head1 FILES

The base dir for all data files is determined by two environment variables,
see L</ENVIRONMENT>.

=over 4

=item F<BASE/mime/packages/SOURCE.xml>

All other files are compiled from these source files. To re-compile them
use B<update-mime-database(1)>.

=item F<BASE/mime/globs>

Compiled information about globs.

=item F<BASE/mime/magic>

Compiled information about magic numbers.

=item F<BASE/mime/MEDIA/SUBTYPE.xml>

Descriptions of a mimetype in multiple languages, used for the
L</--describe> switch.

=back

=head1 DIAGNOSTICS

If a file has an empty mimetype or an empty description,
most probably the file doesn't exist and the given name
doesn't match any globs. An empty description can also mean that
there is no description available in the language you specified.

The program exits with a non-zero exit value if either the commandline
arguments failed, a module it depends on wasn't found or the shared
mime-info database wasn't accessible. See L<File::MimeInfo> for more details.

=head1 TODO

The '--all' switch doesn't really show all matches, but only one per
mime-typing method. This needs to be implemented in the modules first.

=head1 BUGS

If you find bugs, please file them in our Github
issue tracker at L<https://github.com/mbeijen/File-MimeInfo/issues>.

B<mimetype> doesn't provide a switch for looking inside compressed files
because it seems to me that this can only be done by un-compressing the file,
something that defeats the purpose. On the other hand the option should
exist for strict compatibility with file(1). Possibly a subclass should be
made for this one day.

=head1 AUTHOR

Jaap Karssenberg E<lt>pardus@cpan.orgE<gt>
Maintained by Michiel Beijen E<lt>michiel.beijen@gmail.comE<gt>

=head1 COPYRIGHT

Copyright (c) 2003, 2012 Jaap G Karssenberg. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

=head1 SEE ALSO

L<file(1)>,
L<update-mime-database(1)>,
L<File::MimeInfo(3)>,
L<http://freedesktop.org/Software/shared-mime-info>