The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.

package Parse::SVNDiff::Window;

use base qw(Class::Tangram);

use strict;
use warnings;
use bytes;

use Parse::SVNDiff::Ber qw(parse_ber);
use Parse::SVNDiff::Instruction;

use Data::Lazy 0.06;

our $schema =
    { fields =>
      {
       int => [ qw( source_offset source_length target_length lazy ) ],
       idbif => [ qw(inst_data new_data instructions) ],
      }
    };

use constant DEFAULT_BLOCK_SIZE => 4096;

# size a block of something (data or instructions) has to be before we
# make it lazy
use constant MINIMUM_LAZY_SIZE => 16;

sub parse {
    my $self = shift;
    my $fh = shift;

    $self->set_source_offset(parse_ber($fh));
    $self->set_source_length(parse_ber($fh));
    $self->set_target_length(parse_ber($fh));

    my $inst_length = parse_ber($fh);
    my $data_length = parse_ber($fh);

    my $am_lout = $self->lazy;

    #kill 2, $$;

    if ( $am_lout and $inst_length >= MINIMUM_LAZY_SIZE ) {
	my $offset = tell($fh);

	# sadly, have to break encapsulation here, because I never
	# made Class::Tangram use "Want" et al.  it's self, anyway.
	tie $self->{instructions}, 'Data::Lazy',
	    sub {
		#kill 2, $$;
		my $oldpos = tell($fh);
		seek($fh, $offset, 0);
		$self->parse_instructions($fh, $inst_length);
		seek($fh, $oldpos, 0);
		return $self->{instructions};
	    };
	seek($fh, $inst_length, 1);
    } else {
	$self->parse_instructions($fh, $inst_length);
    }

    if ( $am_lout and $data_length >= MINIMUM_LAZY_SIZE ) {
	my $offset = tell($fh);
	tie $self->{new_data}, 'Data::Lazy',
	    sub {
		#kill 2, $$;
		my $oldpos = tell($fh);
		seek($fh, $offset, 0);
		local($/) = \$data_length;
		$self->set_new_data(<$fh>);
		seek($fh, $oldpos, 0);
		return $self->{new_data};
	    };
	seek($fh, $data_length, 1);
    } else {
	local($/) = \$data_length;
	my $new_data = <$fh>;
	$self->set_new_data($new_data);
    }
}

sub parse_instructions {
    my $self = shift;
    my $fh = shift;
    my $inst_length = shift;

    my $pos = tell $fh;

    local($/) = \1;

    my @inst;
    while ( <$fh> ) {
	my $selector = ord($_) >> 6;
	# following || not followed

	my $length = (ord($_) & 0b00111111) || parse_ber($fh);

	my $offset = (($selector == 0b10)
		      ? 0
		      : parse_ber($fh));

	#kill 2, $$ if
	     #( !defined($selector)
	       #or !defined($length)
	       #or !defined($offset) );
	push @inst, [ $selector, $length, $offset ];

	last if tell($fh) - $pos >= $inst_length;
    }
    #kill 2, $$;
    $self->set_instructions(\@inst);
}

sub open {
    my $self = shift;
    $self->new_data;
}

# if new_data chunks are sufficiently big (say, more than pipe
# buffering size), it might make sense to use this iterator version of
# new_data.  untested.

sub new_data_iter {
    my $self = shift;
    if ( $self->{new_data} ) {
	my $spent;
	return sub {
	    $spent++ ? "" : $self->{new_data};
	}
    } else {
	my ($fh, $offset, $length) = @{$self->{data}};
	my $max_size = (stat $fh)[11] || DEFAULT_BLOCK_SIZE;
	return sub {
	    return "" if $length == 0;
	    (tell($fh) == $offset)
		or (seek($fh, $offset, 0)
		    or die "fh $fh can't seek to $offset; $!");
	    local($/) = \($length > $max_size ? $max_size : $length);
	    my $chunk = <$fh>;
	    $offset += length($chunk);
	    $length -= length($chunk);
	    return $chunk;
	};
    }
}

sub dump {
    my $self = shift;

    my $inst_dump = join("", map {

	my ($selector, $length, $offset) = @$_;
	my $dump = "";
        if ($length >= 0b01000000) {
	    #untested branch
            $dump .= chr($selector << 6) . pack('w', $length);
        }
        else {
            $dump .= chr(($selector << 6) + $length);
	}
	unless ($selector == SELECTOR_NEW) {
	    $dump .= pack('w', $offset);
	}
	#kill 2, $$;
	$dump;
    } @{$self->instructions});

    #kill 2, $$;
    my $new_data = $self->new_data;

    return (pack('w w w w w',
		 (map { $self->$_ }
		  qw( source_offset source_length target_length )),
		 length($inst_dump), length($new_data),
		),
	    $inst_dump,
	    $new_data);
}

# 
sub apply_fh {
    my $self = shift;
    my $source_fh = shift;
    my $target_fh = shift;

    my $chunk_size = #eval{(stat $source_fh)[11]} ||
	DEFAULT_BLOCK_SIZE;

    my $data_offset   = 0;
    #my $target_offset = tell($target_fh);
    my $source_offset = $self->source_offset;

    # bah, would need to seek output stream without this.
    my $target_chunk = "";

    my $new_data = $self->new_data;

    foreach my $inst (@{$self->instructions}) {

	my ($selector, $length, $offset) = @{$inst};

	if ($selector == SELECTOR_SOURCE) {

	    seek($source_fh, $source_offset + $offset, 0);

	    local($/) = \$length;
	    $target_chunk .= <$source_fh>;
	    #_copy($source_fh, $target_fh, $length, $chunk_size);

	}
	elsif ($selector == SELECTOR_TARGET) {

	    my $overflow = -(length($target_chunk) - ($offset + $length));

	    if ($overflow <= 0) {
		# originally untested branch
		$target_chunk .= substr($target_chunk, $offset, $length);
	    }
	    else {
		$target_chunk .=
		    substr( ( substr($target_chunk, $offset)
			      x ( int($overflow / (length($target_chunk)
						   - $offset) ) + 1 )
			    ), 0, $length
			  );
	    }
	}
	else {
	    $target_chunk .= substr( $new_data,
				     0,
				     $length,
				     "" );
	}
    }

    print $target_fh $target_chunk;

}

our $chunk_size = 4096;

sub _copy {
    my ($source_fh, $target_fh, $length, $chunks);
    while ( $length > 0 ) {
	local($/) = \($length > $chunks ? $chunks : $length);
	my $chunk = <$source_fh>;
	$length -= length $chunk;
	print($target_fh, $chunk);
    }
}


1;