#!/usr/bin/perl
use strict;
use warnings;
# Simple encoding and decoding using integer codings
#
# For more information on these and other codings, see the CPAN module
# Data::BitStream.
package IntegerCoding;
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT = qw(encode_unary decode_unary
encode_gamma decode_gamma
encode_delta decode_delta
encode_omega decode_omega
encode_fib decode_fib );
our @EXPORT_OK = qw();
# If called as a script, parse the args and input.
&ic_script unless caller;
# Helper functions
# convert to/from decimal and BE binary, works with BE/LE, 32-/64-bit
sub dec_to_bin {
my $bits = shift;
my $v = shift;
if ($bits > 32) {
# return substr(unpack("B64", pack("Q>", $v)), -$bits); # needs v5.9.2
return substr(unpack("B32", pack("N", $v>>32)), -($bits-32))
. unpack("B32", pack("N", $v));
} else {
return scalar reverse unpack("b$bits", pack("V", $v));
}
}
sub bin_to_dec { no warnings 'portable'; oct '0b' . substr($_[1], 0, $_[0]); }
sub base_of { my $d = shift; my $base = 0; $base++ while ($d >>= 1); $base; }
# Unary: 0 based
sub encode_unary {
('0' x (shift)) . '1';
}
sub decode_unary {
index($_[0], '1', 0);
}
# Gamma: 1 based
sub encode_gamma {
my $d = shift;
die "Value must be between 1 and ~0" unless $d >= 1 and $d <= ~0;
my $base = base_of($d);
my $str = encode_unary($base);
$str .= dec_to_bin($base, $d) if $base > 0;
$str;
}
sub decode_gamma {
my $str = shift;
my $base = decode_unary($str);
my $val = 1 << $base;
$val |= bin_to_dec($base, substr($str, $base+1)) if $base > 0;
$val;
}
# Delta: 1 based
sub encode_delta {
my $d = shift;
die "Value must be between 1 and ~0" unless $d >= 1 and $d <= ~0;
my $base = base_of($d);
my $str = encode_gamma($base+1);
$str .= dec_to_bin($base, $d) if $base > 0;
$str;
}
sub decode_delta {
my $str = shift;
my $base = decode_gamma($str) - 1;
my $val = 1 << $base;
if ($base > 0) {
# We have to figure out how far we need to look
my $shift = length(encode_gamma($base+1));
$val |= bin_to_dec($base, substr($str, $shift));
}
$val;
}
# Omega: 1 based
sub encode_omega {
my $d = shift;
die "Value must be between 1 and ~0" unless $d >= 1 and $d <= ~0;
my $str = '0';
while ($d > 1) {
my $base = base_of($d);
$str = dec_to_bin($base+1, $d) . $str;
$d = $base;
}
$str;
}
sub decode_omega {
my $str = shift;
my $val = 1;
while (substr($str,0,1) eq '1') {
my $bits = $val+1;
die "off end of string" unless length($str) >= $bits;
$val = bin_to_dec($bits, $str);
substr($str,0,$bits) = '';
}
$val;
}
# Fibonacci: 1 based
# Specifically, the C1 (m=2) code of Fraenkel and Klein, 1996.
my @fibs; # Holds F[2] ... -> (1, 2, 3, 5, 8, ...)
sub _calc_fibs {
@fibs = ();
my ($v2, $v1) = (0,1);
while ($v1 <= ~0) {
($v2, $v1) = ($v1, $v2+$v1);
push(@fibs, $v1);
}
die unless defined $fibs[41]; # needed below
}
sub encode_fib {
my $d = shift;
die "Value must be between 1 and ~0" unless $d >= 1 and $d <= ~0;
_calc_fibs unless defined $fibs[0];
# Find the largest F(s) bigger than $n
my $s = ($d < $fibs[20]) ? 0 : ($d < $fibs[40]) ? 21 : 41;
$s++ while ($d >= $fibs[$s]);
my $r = '1';
while ($s-- > 0) {
if ($d >= $fibs[$s]) {
$d -= $fibs[$s];
$r .= "1";
} else {
$r .= "0";
}
}
scalar reverse $r;
}
sub decode_fib {
my $str = shift;
die "Invalid Fibonacci code" unless $str =~ /^[01]*11$/;
_calc_fibs unless defined $fibs[0];
my $val = 0;
foreach my $b (0 .. length($str)-2) {
$val += $fibs[$b] if substr($str, $b, 1) eq '1';
}
$val;
}
sub die_usage {
my $usage =<<EOU;
Usage:
--help This message
--encode <method> Encode with <method (unary,gamma,delta,omega,fib)
--decode <method> Decode with <method> (unary,gamma,delta,omega,fib)
EOU
die $usage;
}
use Getopt::Long;
sub ic_script {
my %subs = ( unary => [ \&encode_unary, \&decode_unary ],
gamma => [ \&encode_gamma, \&decode_gamma ],
delta => [ \&encode_delta, \&decode_delta ],
omega => [ \&encode_omega, \&decode_omega ],
fib => [ \&encode_fib, \&decode_fib ] );
my($encoding, $method, $help);
GetOptions('help|usage|?' => \$help,
'encode=s' => sub { $encoding = 1; $method = $_[1]; },
'decode=s' => sub { $encoding = 0; $method = $_[1]; },
) or die_usage;
die_usage if defined $help || !defined $encoding;
die "Unknown code: $method\n" unless defined $subs{lc $method};
my $sub = $subs{lc $method}->[1-$encoding];
die unless defined $sub;
while (<STDIN>) {
chomp;
next unless /^\s*\d+\s*$/; # Ignore all non-digit input
die "Must have a binary string for decoding"
if (!$encoding) && (length($_) == 0 or $_ =~ /[^01]/);
print $sub->($_), "\n";
}
1;
}
1;
__END__
=pod
=head1 NAME
integercoding.pl - simple encoding and decoding using integer codings
=head1 DESCRIPTION
Example code to encode and decode numbers into binary strings using various
integer codings.
=head1 SYNOPSIS
Command line examples:
echo "15" | perl integercoding.pl -encode omega
echo "00010001110111" | perl integercoding.pl -decode delta
Subroutine examples:
print "$_ encoded in gamma is ", encode_gamma($_), "\n" for (1 .. 10);
my $delta_str = encode_delta(317);
my $fib_str = encode_fib( decode_delta( $delta_str ) );
print "fib str: $fib_str encodes ", decode_fib($fib_str), "\n";
Print out a table of code sizes:
printf("%7s %7s %7s %7s %7s %7s %7s\n",
"Value", "Unary", "Binary", "Gamma", "Delta", "Omega", "Fib");
my @vals = (1..5);
push @vals, $vals[-1]*2, $vals[-1]*4, $vals[-1]*10 for (1..5);
foreach my $n (@vals) {
printf("%7d %7s %7s %7s %7s %7s %7s\n",
$n, $n+1, base_of($n)+1,
length(encode_gamma($n)), length(encode_delta($n)),
length(encode_omega($n)), length(encode_fib($n)) );
}
=head1 FUNCTIONS
The C<encode_> methods take a single unsigned integer as input and produce a
string of 0 and 1 characters representing the bit encoding of the integer
using that code.
$str = encode_unary(8); # die unless $str eq '000000001';
$str = encode_gamma(8); # die unless $str eq '0001000';
$str = encode_delta(8); # die unless $str eq '00100000';
$str = encode_omega(8); # die unless $str eq '1110000';
$str = encode_fib(8); # die unless $str eq '000011';
The C<decode_> methods take a single binary string as input and produce an
unsigned integer output decoded from the bit encoding.
$n = decode_unary('000000000000001'); # die unless $n == 14;
$n = decode_gamma('0001110'); # die unless $n == 14;
$n = decode_delta('00100110'); # die unless $n == 14;
$n = decode_omega('1111100'); # die unless $n == 14;
$n = decode_fib( '1000011'); # die unless $n == 14;
=head1 SEE ALSO
The CPAN module L<Data::BitStream> includes these codes and more.
Peter Elias, "Universal Codeword Sets and Representations of the Integers", IEEE Trans. Information Theory, Vol 21, No 2, pp 194-203, Mar 1975.
Peter Fenwick, "Punctured Elias Codes for variable-length coding of the integers," Technical Report 137, Department of Computer Science, The University of Auckland, Auckland, New Zealand, December 1996.
=over 4
=item L<http://en.wikipedia.org/wiki/Unary_coding>
=item L<http://en.wikipedia.org/wiki/Elias_gamma_coding>
=item L<http://en.wikipedia.org/wiki/Elias_delta_coding>
=item L<http://en.wikipedia.org/wiki/Elias_omega_coding>
=item L<http://en.wikipedia.org/wiki/Fibonacci_coding>
=back
=head1 AUTHORS
Dana Jacobsen <dana@acm.org>
=head1 COPYRIGHT
Copyright 2011 by Dana Jacobsen <dana@acm.org>
This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
See http://www.perl.com/perl/misc/Artistic.html
=cut