# You may distribute under the terms of either the GNU General Public License
# or the Artistic License (the same terms as Perl itself)
#
# (C) Paul Evans, 2010 -- leonerd@leonerd.org.uk
package Linux::SocketFilter::Assembler;
use strict;
use warnings;
our $VERSION = '0.04';
use Linux::SocketFilter qw( :bpf :skf pack_sock_filter );
use Exporter 'import';
our @EXPORT_OK = qw(
assemble
);
=head1 NAME
C<Linux::SocketFilter::Assembler> - assemble BPF programs from textual code
=head1 SYNOPSIS
use Linux::SocketFilter;
use Linux::SocketFilter::Assembler qw( assemble );
use IO::Socket::Packet;
use Socket qw( SOCK_DGRAM );
my $sock = IO::Socket::Packet->new(
IfIndex => 0,
Type => SOCK_DGRAM,
) or die "Cannot socket - $!";
$sock->attach_filter( assemble( <<"EOF" ) );
LD AD[PROTOCOL]
JEQ 0x0800, 0, 1
RET 20
JEQ 0x86dd, 0, 1
RET 40
RET 0
EOF
while( my $addr = $sock->recv( my $buffer, 40 ) ) {
printf "Packet: %v02x\n", $buffer;
}
=head1 DESCRIPTION
Linux sockets allow a filter to be attached, which determines which packets
will be allowed through, and which to block. They are most often used on
C<PF_PACKET> sockets when used to capture network traffic, as a filter to
determine the traffic of interest to the capturing application. By running
directly in the kernel, the filter can discard all, or most, of the traffic
that is not interesting to the application, allowing higher performance due
to reduced context switches between kernel and userland.
This module allows filter programs to be written in textual code, and
assembled into a binary filter, to attach to the socket using the
C<SO_ATTACH_FILTER> socket option.
=cut
=head1 FILTER MACHINE
The virtual machine on which these programs run is a simple load/store
register machine operating on 32-bit words. It has one general-purpose
accumulator register (C<A>) and one special purpose index register (C<X>).
It has a number of temporary storage locations, called scratchpads (C<M[]>).
It is given read access to the contents of the packet to be filtered in 8-bit
(C<BYTE[]>), 16-bit (C<HALF[]>) or 32-bit (C<WORD[]>) sized quantities. It
also has an implicit program counter, though direct access to it is not
provided.
The filter program is run by the kernel on every packet captured by the socket
to which it is attached. It can inspect data in the packet and certain other
items of metadata concerning the packet, and decide if this packet should be
accepted by the capture socket. It returns the number of bytes to capture if
it should be captured, or zero to indicate this packet should be ignored. It
starts on the first instruction, and proceeds forwards, unless the flow is
modified by a jump instruction. The program terminates on a C<RET>
instruction, which informs the kernel of the required fate of the packet. The
last instruction in the filter must therefore be a C<RET> instruction; though
others may appear at earlier points.
In order to guarantee termination of the program in all circumstances, the
virtual machine is not fully Turing-powerful. All jumps, conditional or
unconditional, may only jump forwards in the program. It is not possible to
construct a loop of instructions that executes repeatedly.
=cut
=head1 FUNCTIONS
=cut
=head2 $filter = assemble( $text )
Takes a program (fragment) in text form and returns a binary string
representing the instructions packed ready for C<attach_filter()>.
The program consists of C<\n>-separated lines of instructions or comments.
Leading whitespace is ignored. Blank lines are ignored. Lines beginning with
a C<;> (after whitespace) are ignored as comments.
=cut
sub assemble
{
my $self = __PACKAGE__;
my ( $text ) = @_;
my $ret = "";
foreach ( split m/\n/, $text ) {
s/^\s+//; # trim whitespace
next if m/^$/; # skip blanks
next if m/^;/; # skip comments
my ( $op, $args ) = split ' ', $_, 2;
my @args = defined $args ? split m/,\s*/, $args : ();
$self->can( "assemble_$op" ) or
die "Can't compile $_ - unrecognised op '$op'\n";
$ret .= $self->${\"assemble_$op"}( @args );
}
return $ret;
}
=head1 INSTRUCTION FORMAT
Each instruction in the program is formed of an opcode followed by its
operands. Where numeric literals are involved, they may be given in decimal,
hexadecimal, or octal form. Literals will be notated as C<lit> in the
following descriptions.
=cut
my $match_literal = qr/-?(?:\d+|0x[0-9a-f]+)/;
sub _parse_literal
{
my ( $lit ) = @_;
my $sign = ( $lit =~ s/^-// ) ? -1 : 1;
return $sign * oct( $lit ) if $lit =~ m/^0x?/; # oct can manage octal or hex
return $sign * int( $lit ) if $lit =~ m/\d+/;
die "Cannot parse literal $lit\n";
}
=pod
LD BYTE[addr]
LD HALF[addr]
LD WORD[addr]
Load the C<A> register from the 8, 16, or 32-bit quantity in the packet buffer
at the address. The address may be given in the forms
lit
X+lit
NET+lit
NET+X+lit
To load from an immediate or C<X>-index address, starting from either the
beginning of the buffer, or the beginning of the network header, respectively.
LD len
Load the C<A> register with the length of the packet.
LD lit
Load the C<A> register with a literal value
LD M[lit]
Load the C<A> register with the value from the given scratchpad cell
LD X
TXA
Load the C<A> register with the value from the C<X> register. (These two
instructions are synonymous)
LD AD[name]
Load the C<A> register with a value from the packet auxiliary data area. The
following data points are available.
=over 4
=over 4
=item PROTOCOL
The ethertype protocol number of the packet
=item PKTTYPE
The type of the packet; see the C<PACKET_*> constants defined in
L<Socket::Packet>.
=item IFINDEX
The index of the interface the packet was received on or transmitted from.
=back
=back
=cut
my %auxdata_offsets = (
PROTOCOL => SKF_AD_PROTOCOL,
PKTTYPE => SKF_AD_PKTTYPE,
IFINDEX => SKF_AD_IFINDEX,
);
sub assemble_LD
{
my ( undef, $src ) = @_;
my $code = BPF_LD;
if( $src =~ m/^(BYTE|HALF|WORD)\[(NET\+)?(X\+)?($match_literal)]$/ ) {
my ( $size, $net, $x, $offs ) = ( $1, $2, $3, _parse_literal($4) );
$code |= ( $size eq "BYTE" ) ? BPF_B :
( $size eq "HALF" ) ? BPF_H :
BPF_W;
$code |= ( $x ) ? BPF_IND :
BPF_ABS;
$offs += SKF_NET_OFF if $net;
pack_sock_filter( $code, 0, 0, $offs );
}
elsif( $src eq "len" ) {
pack_sock_filter( $code|BPF_W|BPF_LEN, 0, 0, 0 );
}
elsif( $src =~ m/^$match_literal$/ ) {
pack_sock_filter( $code|BPF_IMM, 0, 0, _parse_literal($src) );
}
elsif( $src =~ m/^M\[($match_literal)\]$/ ) {
pack_sock_filter( $code|BPF_MEM, 0, 0, _parse_literal($1) );
}
elsif( $src eq "X" ) {
pack_sock_filter( BPF_MISC|BPF_TXA, 0, 0, 0 );
}
elsif( $src =~ m/^AD\[(.*)\]$/ and exists $auxdata_offsets{$1} ) {
pack_sock_filter( $code|BPF_W|BPF_ABS, 0, 0, SKF_AD_OFF + $auxdata_offsets{$1} );
}
else {
die "Unrecognised instruction LD $src\n";
}
}
sub assemble_TXA { pack_sock_filter( BPF_MISC|BPF_TXA, 0, 0, 0 ) }
=pod
LDX lit
Load the C<X> register with a literal value
LDX M[lit]
Load the C<X> register with the value from the given scratchpad cell
LDX A
TAX
Load the C<X> register with the value from the C<A> register. (These two
instructions are synonymous)
=cut
sub assemble_LDX
{
my ( undef, $src ) = @_;
my $code = BPF_LDX;
if( $src =~ m/^$match_literal$/ ) {
pack_sock_filter( $code|BPF_IMM, 0, 0, _parse_literal($src) );
}
elsif( $src =~ m/^M\[($match_literal)\]$/ ) {
pack_sock_filter( $code|BPF_MEM, 0, 0, _parse_literal($1) );
}
elsif( $src eq "A" ) {
pack_sock_filter( BPF_MISC|BPF_TAX, 0, 0, 0 );
}
else {
die "Unrecognised instruction LDX $src\n";
}
}
sub assemble_TAX { pack_sock_filter( BPF_MISC|BPF_TAX, 0, 0, 0 ) }
=pod
LDMSHX BYTE[lit]
Load the C<X> register with a value obtained from a byte in the packet masked
and shifted (hence the name). The byte at the literal address is masked by
C<0x0f> to obtain the lower 4 bits, then shifted 2 bits upwards. This
special-purpose instruction loads the C<X> register with the size, in bytes,
of an IPv4 header beginning at the given literal address.
=cut
sub assemble_LDMSHX
{
my ( undef, $src ) = @_;
if( $src =~ m/^BYTE\[($match_literal)\]$/ ) {
pack_sock_filter( BPF_LDX|BPF_MSH|BPF_B, 0, 0, _parse_literal($1) );
}
else {
die "Unrecognised instruction LDMSHX $src\n";
}
}
=pod
ST M[lit]
Store the value of the C<A> register into the given scratchpad cell
STX M[lit]
Store the value of the C<X> register into the given scratchpad cell
=cut
sub assemble_ST { shift->assemble_store( BPF_ST, @_ ) }
sub assemble_STX { shift->assemble_store( BPF_STX, @_ ) }
sub assemble_store
{
my ( undef, $code, $dest ) = @_;
if( $dest =~ m/^M\[($match_literal)\]$/ ) {
pack_sock_filter( $code, 0, 0, _parse_literal($1) );
}
else {
die "Unrecognised instruction ST(X?) $dest\n";
}
}
=pod
ADD src # A = A + src
SUB src # A = A - src
MUL src # A = A * src
DIV src # A = A / src
AND src # A = A & src
OR src # A = A | src
LSH src # A = A << src
RSH src # A = A >> src
Perform arithmetic or bitwise operations. In each case, the operands are the
C<A> register and the given source, which can be either the C<X> register or
a literal. The result is stored in the C<A> register.
=cut
sub assemble_ADD { shift->assemble_alu( BPF_ADD, @_ ) }
sub assemble_SUB { shift->assemble_alu( BPF_SUB, @_ ) }
sub assemble_MUL { shift->assemble_alu( BPF_MUL, @_ ) }
sub assemble_DIV { shift->assemble_alu( BPF_DIV, @_ ) }
sub assemble_AND { shift->assemble_alu( BPF_AND, @_ ) }
sub assemble_OR { shift->assemble_alu( BPF_OR, @_ ) }
sub assemble_LSH { shift->assemble_alu( BPF_LSH, @_ ) }
sub assemble_RSH { shift->assemble_alu( BPF_RSH, @_ ) }
sub assemble_alu
{
my ( undef, $code, $val ) = @_;
$code |= BPF_ALU;
if( $val eq "X" ) {
pack_sock_filter( $code|BPF_X, 0, 0, 0 );
}
elsif( $val =~ m/^$match_literal$/ ) {
pack_sock_filter( $code|BPF_K, 0, 0, _parse_literal($val) );
}
else {
die "Unrecognised alu instruction on $val\n";
}
}
=pod
JGT src, jt, jf # test if A > src
JGE src, jt, jf # test if A >= src
JEQ src, jt, jf # test if A == src
JSET src, jt, jf # test if A & src is non-zero
Jump conditionally based on comparisons between the C<A> register and the
given source, which is either the C<X> register or a literal. If the
comparison is true, the C<jt> branch is taken; if false the C<jf>. Each branch
is a numeric count of the number of instructions to skip forwards.
=cut
sub assemble_JGT { shift->assemble_jmp( BPF_JGT, @_ ) }
sub assemble_JGE { shift->assemble_jmp( BPF_JGE, @_ ) }
sub assemble_JSET { shift->assemble_jmp( BPF_JSET, @_ ) }
sub assemble_JEQ { shift->assemble_jmp( BPF_JEQ, @_ ) }
sub assemble_jmp
{
my ( undef, $code, $val, $jt, $jf ) = @_;
$code |= BPF_JMP;
if( $val eq "X" ) {
pack_sock_filter( $code|BPF_X, $jt, $jf, 0 );
}
elsif( $val =~ m/^$match_literal$/ ) {
pack_sock_filter( $code|BPF_K, $jt, $jf, _parse_literal($val) );
}
else {
die "Unrecognised jmp instruction on $val\n";
}
}
=pod
JA jmp
Jump unconditionally forward by the given number of instructions.
=cut
sub assemble_JA
{
my ( undef, $target ) = @_;
pack_sock_filter( BPF_JMP, 0, 0, $target+0 );
}
=pod
RET lit
Terminate the filter program and return the literal value to the kernel.
RET A
Terminate the filter program and return the value of the C<A> register to the
kernel.
=cut
sub assemble_RET
{
my ( undef, $val ) = @_;
my $code = BPF_RET;
if( $val =~ m/^$match_literal$/ ) {
pack_sock_filter( $code|BPF_K, 0, 0, _parse_literal($val) );
}
elsif( $val eq "A" ) {
pack_sock_filter( $code|BPF_A, 0, 0, 0 );
}
else {
die "Unrecognised instruction RET $val\n";
}
}
# Keep perl happy; keep Britain tidy
1;
=head1 AUTHOR
Paul Evans <leonerd@leonerd.org.uk>