The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Data::Deduper;
use strict;
use warnings;
our $VERSION = '0.03';

sub new {
    my $class = shift;
    my %args = ( @_ == 1 ? %{ $_[0] } : @_ );
    bless {
        data => \@{ $args{data} } || {},
        expr => $args{expr} || sub { my ( $a, $b ) = @_; $a eq $b },
        size => $args{size} || 10,
      },
      __PACKAGE__;
}

sub init {
    my $self  = shift;
    my @ret   = ( @_ == 1 ? @{ $_[0] } : @_ );
    my $count = @ret;
    my $size  = $self->{size};
    @ret = @ret[ ( $count - $size ) .. $count - 1 ] if $count > $size;
    @{ $self->{data} = \@ret };
}

sub dedup {
    my $self  = shift;
    my @newer = ( @_ == 1 ? @{ $_[0] } : @_ );
    my @data  = @{ $self->{data} };
    my @ret;
    for my $a (@newer) {
        next if grep { $self->{expr}( $_, $a ) } @data;
        push @data, $a;
        push @ret,  $a;
    }
    my $count = @data;
    my $size  = $self->{size};
    @data = @data[ ( $count - $size ) .. $count - 1 ] if $count > $size;
    $self->{data} = \@data;
    @ret;
}

sub data {
    my $self  = shift;
    @{ $self->{data} };
}

1;
__END__

=head1 NAME

Data::Deduper - remove duplicated item from array

=head1 SYNOPSIS

    use Data::Deduper;
    my @data = (1, 2, 3);
    my $dd = Data::Deduper->new(
        expr => sub { my ($a, $b) = @_; $a eq $b },
        size => 3,
        data => \@data,
    );
    # show only 4. because 4 is newer.
    for ($dd->dedup(3, 4)) {
        print $_;
    }
    # show 2 3 4 in whole items. max size of items is 3.
    for ($dd->data) {
        print $_;
    }

=head1 DESCRIPTION

Data::Deduper removes duplicated items in array. This is useful for fetching RSS/Atom feed continual.

=head1 INTERFACE

=head2 C<< Data::Deduper->new( expr => $expr, size => $size, data => $data ) >>

Creates a deduper instance.
$expr is specified as expr of grep. $size mean max size of array. $data is
initial array.

=head2 C<< $deduper->init( \@data ) >>

Reset items. return whole items.

=head2 C<< $deduper->deup( \@data ) >>

Dedup items. each item in @data will be checked whether is duplicate item. And if the item is not duplicated, it add to the items.
Return items added only. Note that return ignore duplicated items.

=head2 C<< $deduper->data() >>

Return whole items.

=head1 AUTHOR

Yasuhiro Matsumoto E<lt>mattn.jp@gmail.comE<gt>

=head1 SEE ALSO

L<XML::Feed::Deduper>

=head1 LICENSE

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut