lib/Data/Deduper.pm - metacpan.org

package Data::Deduper;
use strict;
use warnings;
our $VERSION = '0.03';

sub new {
    my $class = shift;
    my %args = ( @_ == 1 ? %{ $_[0] } : @_ );
    bless {
        data => \@{ $args{data} } || {},
        expr => $args{expr} || sub { my ( $a, $b ) = @_; $a eq $b },
        size => $args{size} || 10,
      },
      __PACKAGE__;
}

sub init {
    my $self  = shift;
    my @ret   = ( @_ == 1 ? @{ $_[0] } : @_ );
    my $count = @ret;
    my $size  = $self->{size};
    @ret = @ret[ ( $count - $size ) .. $count - 1 ] if $count > $size;
    @{ $self->{data} = \@ret };
}

sub dedup {
    my $self  = shift;
    my @newer = ( @_ == 1 ? @{ $_[0] } : @_ );
    my @data  = @{ $self->{data} };
    my @ret;
    for my $a (@newer) {
        next if grep { $self->{expr}( $_, $a ) } @data;
        push @data, $a;
        push @ret,  $a;
    }
    my $count = @data;
    my $size  = $self->{size};
    @data = @data[ ( $count - $size ) .. $count - 1 ] if $count > $size;
    $self->{data} = \@data;
    @ret;
}

sub data {
    my $self  = shift;
    @{ $self->{data} };
}

1;
__END__

=head1 NAME

Data::Deduper - remove duplicated item from array

=head1 SYNOPSIS

    use Data::Deduper;
    my @data = (1, 2, 3);
    my $dd = Data::Deduper->new(
        expr => sub { my ($a, $b) = @_; $a eq $b },
        size => 3,
        data => \@data,
    );
    # show only 4. because 4 is newer.
    for ($dd->dedup(3, 4)) {
        print $_;
    }
    # show 2 3 4 in whole items. max size of items is 3.
    for ($dd->data) {
        print $_;
    }

=head1 DESCRIPTION

Data::Deduper removes duplicated items in array. This is useful for fetching RSS/Atom feed continual.

=head1 INTERFACE

=head2 C<< Data::Deduper->new( expr => $expr, size => $size, data => $data ) >>

Creates a deduper instance.
$expr is specified as expr of grep. $size mean max size of array. $data is
initial array.

=head2 C<< $deduper->init( \@data ) >>

Reset items. return whole items.

=head2 C<< $deduper->deup( \@data ) >>

Dedup items. each item in @data will be checked whether is duplicate item. And if the item is not duplicated, it add to the items.
Return items added only. Note that return ignore duplicated items.

=head2 C<< $deduper->data() >>

Return whole items.

=head1 AUTHOR

Yasuhiro Matsumoto E<lt>mattn.jp@gmail.comE<gt>

=head1 SEE ALSO

L<XML::Feed::Deduper>

=head1 LICENSE

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut

	Global
`s`	Focus search bar
`?`	Bring up this help dialog

	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)

	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse

	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)