lib/Set/Functional.pm - metacpan.org

package Set::Functional;

use 5.006;

use Exporter qw{import};

=head1 NAME

Set::Functional - set operations for functional programming

=head1 VERSION

Version 1.04

=cut

our $VERSION = '1.04';

our @EXPORT_OK  = qw{
	setify setify_by
	cartesian
	difference difference_by
	disjoint disjoint_by
	distinct distinct_by
	intersection intersection_by
	symmetric_difference symmetric_difference_by
	union union_by
	is_disjoint is_disjoint_by
	is_equal is_equal_by
	is_equivalent is_equivalent_by
	is_pairwise_disjoint is_pairwise_disjoint_by
	is_proper_subset is_proper_subset_by
	is_proper_superset is_proper_superset_by
	is_subset is_subset_by
	is_superset is_superset_by
};
our %EXPORT_TAGS = (all => \@EXPORT_OK);

=head1 SYNOPSIS

This module provides basic set operations for native lists.  The primary goal
is to take advantage of Perl's native functional programming capabilities
while relying solely on Pure Perl constructs to perform the set operations as
fast as possible.  All of these techniques have been benchmarked against other
common Perl idioms to determine the optimal solution.  These benchmarks can
be found in this package (shortly).

Each function is provided in two forms.  The first form always expects simple
flat data structures of defined elements.  The second form expects a BLOCK
(refered to as a choice function) to evaluate each member of the list to a
defined value to determine how the element is a set member.  These can be
identified by the suffix "_by".  None of these functions check definedness
inline so as to eliminate the costly O(n) operation.  All functions have been
prototyped to give them a native Perl-ish look and feel.

Example usage:

	use Set::Functional ':all';

	# Set Creation
	my @deduped_numbers = setify(1 .. 10, 2 .. 11);
	my @deduped_objects_by_name = setify_by { $_->{name} } ({name => 'fred'}, {name => 'bob'}, {name => 'fred'});

	# Set Operation
	my @all_permutations = cartesian \@arr1, \@arr2, \@arr3, \@arr4;

	my @only_arr1_elements = difference \@arr1, \@arr2, \@arr3, \@arr4;
	my @only_arr1_elements_by_name = difference_by { $_->{name} } \@arr1, \@arr2, \@arr3, \@arr4;

	my @unique_per_set = disjoint \@arr1, \@arr2, \@arr3, \@arr4;
	my @unique_per_set_by_name = disjoint_by { $_->{name} } \@arr1, \@arr2, \@arr3, \@arr4;

	my @unique_elements = distinct \@arr1, \@arr2, \@arr3, \@arr4;
	my @unique_elements_by_name = distinct_by { $_->{name} } \@arr1, \@arr2, \@arr3, \@arr4;

	my @shared_elements = intersection \@arr1, \@arr2, \@arr3, \@arr4;
	my @shared_elements_by_name = intersection_by { $_->{name} } \@arr1, \@arr2, \@arr3, \@arr4;

	my @odd_occuring_elements = symmetric_difference \@arr1, \@arr2, \@arr3, \@arr4;
	my @odd_occuring_elements_by_name = symmetric_difference_by { $_->{name} } \@arr1, \@arr2, \@arr3, \@arr4;

	my @all_elements = union \@arr1, \@arr2, \@arr3, \@arr4;
	my @all_elements_by_name = union_by { $_->{name} } \@arr1, \@arr2, \@arr3, \@arr4;

	# Set Predicates
	my $is_all_of_arr1_distinct_from_arr2 = is_disjoint \@arr1, \@arr2;
	my $is_all_of_arr1_distinct_from_arr2_by_name = is_disjoint_by { $_->{name} } \@arr1, \@arr2;

	my $is_arr1_the_same_as_arr2 = is_equal \@arr1, \@arr2;
	my $is_arr1_the_same_as_arr2_by_name = is_equal_by { $_->{name} } \@arr1, \@arr2;

	my $are_all_sets_mutually_unique = is_pairwise_disjoint \@arr1, \@arr2, \@arr3, \@arr4;
	my $are_all_sets_mutually_unique_by_name = is_pairwise_disjoint_by { $_->{name} } \@arr1, \@arr2, \@arr3, \@arr4;

	my $is_all_of_arr1_in_arr2_but_not_the_same_as_arr2 = is_proper_subset \@arr1, \@arr2;
	my $is_all_of_arr1_in_arr2_but_not_the_same_as_arr2_by_name = is_proper_subset_by { $_->{name} } \@arr1, \@arr2;

	my $is_all_of_arr1_in_arr2 = is_subset \@arr1, \@arr2;
	my $is_all_of_arr1_in_arr2_by_name = is_subset_by { $_->{name} } \@arr1, \@arr2;

	my $is_all_of_arr2_in_arr1_but_not_the_same_as_arr1 = is_proper_superset \@arr1, \@arr2;
	my $is_all_of_arr2_in_arr1_but_not_the_same_as_arr1_by_name = is_proper_superset_by { $_->{name} } \@arr1, \@arr2;

	my $is_all_of_arr2_in_arr1 = is_superset \@arr1, \@arr2;
	my $is_all_of_arr2_in_arr1_by_name = is_superset_by { $_->{name} } \@arr1, \@arr2;

=head1 CONSTRUCTORS

=cut

=head2 setify(@)

Given a list, return a new set.  Order is not guaranteed.

	setify 1 .. 10, 6 .. 15 => 1 .. 15

=cut

sub setify(@) {
	my %set;

	undef @set{@_} if @_;

	return keys %set;
}

=head2 setify_by(&@)

Given a choice function and a list, return a new set defined by the choice
function. Order is not guaranteed.

=cut

sub setify_by(&@){
	my $func = shift;

	my %set;

	@set{ map { $func->($_) } @_ } = @_ if @_;

	return values %set;
}

=head1 OPERATORS

=cut

=head2 cartesian(@)

Given multiple set references, return multiple sets containing all permutations
of one element from each set.  If the empty set is provided, no sets are
returned since the number of sets generated should be the product of the input
sets' cardinalities.  If no sets are provided then none are returned.

	cartesian [1 .. 3], [1 .. 2] => [1,1],[1,2],[2,1],[2,2],[3,1],[3,2]
	cartesin => ()
	cartesin [1 .. 3], [] => ()

=cut

sub cartesian(@) {
	return unless @_;

	my @results;
	my $repetitions = 1;

	($repetitions *= @$_) || return for @_;
	$#results = $repetitions - 1;

	for my $idx (0 .. $#results) {
		$repetitions = @results;
		$results[$idx] = [map { $_->[int($idx/($repetitions /= @$_)) % @$_] } @_];
	}

	return @results;
}

=head2 difference(@)

Given multiple set references, return a new set with all the elements in the first set
that don't exist in subsequent sets.

	difference [1 .. 10], [6 .. 15] => 1 .. 5

=cut

sub difference(@) {
	my $first = shift;

	return unless $first && @$first;

	my %set;

	undef @set{@$first};

	do { delete @set{@$_} if @$_ } for @_;

	return keys %set;
}

=head2 difference_by(&@)

Given a choice function and multiple set references, return a new set with all the elements
in the first set that don't exist in subsequent sets according to the choice function.

=cut

sub difference_by(&@) {
	my $func = shift;
	my $first = shift;

	return unless $first && @$first;

	my %set;

	@set{ map { $func->($_) } @$first } = @$first;

	do { delete @set{ map { $func->($_) } @$_ } if @$_ } for @_;

	return values %set;
}

=head2 disjoint(@)

Given multiple set references, return corresponding sets containing all the elements from
the original set that exist in any set exactly once.

	disjoint [1 .. 10], [6 .. 15] => [1 .. 5], [11 .. 15]

=cut

sub disjoint(@) {
	my %element_to_count;

	do { ++$element_to_count{$_} for @$_ } for @_;

	return map { [grep { $element_to_count{$_} == 1 } @$_] } @_;
}

=head2 disjoint_by(&@)

Given a choice function and multiple set references, return corresponding sets containing
all the elements from the original set that exist in any set exactly once
according to the choice function.

=cut

sub disjoint_by(&@) {
	my $func = shift;

	my %key_to_count;

	do { ++$key_to_count{$func->($_)} for @$_ } for @_;

	return map { [grep { $key_to_count{$func->($_)} == 1 } @$_] } @_;
}

=head2 distinct(@)

Given multiple set references, return a new set containing all the elements that exist
in any set exactly once.

	distinct [1 .. 10], [6 .. 15] => 1 .. 5, 11 .. 15

=cut

sub distinct(@) {
	my %element_to_count;

	do { ++$element_to_count{$_} for @$_ } for @_;

	return grep { $element_to_count{$_} == 1 } keys %element_to_count;
}

=head2 distinct_by(&@)

Given a choice function and multiple set references, return a new set containing all the
elements that exist in any set exactly once according to the choice function.

=cut

sub distinct_by(&@) {
	my $func = shift;

	my %key_to_count;

	for (@_) {
		for (@$_) {
			my $key = $func->($_);
			$key_to_count{$key} = exists $key_to_count{$key} ? undef : $_;
		}
	}

	return grep { defined } values %key_to_count;
}

=head2 intersection(@)

Given multiple set references, return a new set containing all the elements that exist
in all sets.

	intersection [1 .. 10], [6 .. 15] => 6 .. 10

=cut

sub intersection(@) {
	my $first = shift;

	return unless $first && @$first;

	my %set;

	undef @set{@$first};

	for (@_) {
		my @int = grep { exists $set{$_} } @$_;
		return unless @int;
		%set = ();
		undef @set{@int};
	}

	return keys %set;
}

=head2 intersection_by(&@)

Given a choice function and multiple set references, return a new set containing all the
elements that exist in all sets according to the choice function.

=cut

sub intersection_by(&@) {
	my $func = shift;
	my $first = shift;

	return unless $first && @$first;

	my %set;

	@set{ map { $func->($_) } @$first } = @$first;

	for (@_) {
		my @int = grep { exists $set{$func->($_)} } @$_;
		return unless @int;
		%set = ();
		@set{ map { $func->($_) } @int } = @int;
	}

	return values %set;
}

=head2 symmetric_difference(@)

Given multiple set references, return a new set containing all the elements that
exist an odd number of times across all sets.

	symmetric_difference [1 .. 10], [6 .. 15], [4, 8, 12] => 1 .. 5, 8, 11 .. 15

=cut

sub symmetric_difference(@) {
	my $count;
	my %element_to_count;

	do { ++$element_to_count{$_} for @$_ } for @_;

	return grep { $element_to_count{$_} % 2 } keys %element_to_count;
}

=head2 symmetric_difference_by(&@)

Given a choice function and multiple set references, return a new set containing
all the elements that exist an odd number of times across all sets according to
the choice function.

=cut

sub symmetric_difference_by(&@) {
	my $func = shift;

	my $count;
	my %key_to_count;

	do { ++$key_to_count{$func->($_)} for @$_ } for @_;

	return map {
		grep {
			$count = delete $key_to_count{$func->($_)};
			defined($count) && $count % 2
		} @$_
	} @_;
}

=head2 union(@)

Given multiple set references, return a new set containing all the elements that exist
in any set.

	union [1 .. 10], [6 .. 15] => 1 .. 15

=cut

sub union(@) {
	my %set;

	do { undef @set{@$_} if @$_ } for @_;

	return keys %set;
}

=head2 union_by(&@)

Given a choice function and multiple set references, return a new set containing all the
elements that exist in any set according to the choice function.

=cut

sub union_by(&@) {
	my $func = shift;

	my %set;

	do { @set{ map { $func->($_) } @$_ } = @$_ if @$_ } for @_;

	return values %set;
}

=head1 PREDICATES

=cut

=head2 is_disjoint($$)

Given two set references, return true if both sets contain none of the same values.

	is_disjoint [1 .. 5], [6 .. 10] => true
	is_disjoint [1 .. 6], [4 .. 10] => false

=cut

sub is_disjoint($$) {
	my @set = &intersection(@_[0,1]);
	return ! @set;
}

=head2 is_disjoint_by(&$$)

Given a choice function and two sets references, return true if both sets
contain none of the same values according to the choice function.

=cut

sub is_disjoint_by(&$$) {
	my @set = &intersection_by(@_[0,1,2]);
	return ! @set;
}

=head2 is_equal($$)

Given two set references, return true if both sets contain all the same values.
Aliased by is_equivalent.

	is_equal [1 .. 5], [1 .. 5] => true
	is_equal [1 .. 10], [6 .. 15] => false

=cut

sub is_equal($$) {
	my @set = &intersection(@_[0,1]);
	return @set == @{$_[0]} && @set == @{$_[1]};
}
*is_equivalent = \&is_equal;

=head2 is_equal_by(&$$)

Given a choice function and two sets references, return true if both sets
contain all the same values according to the choice function.
Aliased by is_equivalent_by.

=cut

sub is_equal_by(&$$) {
	my @set = &intersection_by(@_[0,1,2]);
	return @set == @{$_[1]} && @set == @{$_[2]};
}
*is_equivalent_by = \&is_equal_by;

=head2 is_pairwise_disjoint(@)

Given multiple set references, return true if every set is disjoint from every
other set.

	is_pairwise_disjoint [1 .. 5], [6 .. 10], [11 .. 15] => true
	is_pairwise_disjoint [1 .. 5], [6 .. 10], [11 .. 15], [3 .. 8] => false

=cut

sub is_pairwise_disjoint(@) {
	my @sets = &disjoint(@_);
	do { return 0 if @{$sets[$_]} != @{$_[$_]} } for 0 .. $#sets;
	return 1;
}

=head2 is_pairwise_disjoint_by(&@)

Given a choice function and multiple set references, return true if every set
is disjoint from every other set according to the choice function.

=cut

sub is_pairwise_disjoint_by(&@) {
	my @sets = &disjoint_by((shift), @_);
	do { return 0 if @{$sets[$_]} != @{$_[$_]} } for 0 .. $#sets;
	return 1;
}

=head2 is_proper_subset($$)

Given two set references, return true if the first set is fully contained by
but is not equivalent to the second.

	is_proper_subset [1 .. 5], [1 .. 10] => true
	is_proper_subset [1 .. 5], [1 .. 5] => false

=cut

sub is_proper_subset($$) {
	my @set = &intersection(@_[0,1]);
	return @set == @{$_[0]} && @set != @{$_[1]};
}

=head2 is_proper_subset_by(&$$)

Given a choice function and two set references, return true if the first set
is fully contained by but is not equivalent to the second according to the
choice function.

=cut

sub is_proper_subset_by(&$$) {
	my @set = &intersection_by(@_[0,1,2]);
	return @set == @{$_[1]} && @set != @{$_[2]};
}

=head2 is_proper_superset($$)

Given two set references, return true if the first set fully contains but is
not equivalent to the second.

	is_proper_superset [1 .. 10], [1 .. 5] => true
	is_proper_superset [1 .. 5], [1 .. 5] => false

=cut

sub is_proper_superset($$) {
	my @set = &intersection(@_[0,1]);
	return @set != @{$_[0]} && @set == @{$_[1]};
}

=head2 is_proper_superset_by(&$$)

Given a choice function and two set references, return true if the first set
fully contains but is not equivalent to the second according to the choice
function.

=cut

sub is_proper_superset_by(&$$) {
	my @set = &intersection_by(@_[0,1,2]);
	return @set != @{$_[1]} && @set == @{$_[2]};
}

=head2 is_subset($$)

Given two set references, return true if the first set is fully contained by
the second.

	is_subset [1 .. 5], [1 .. 10] => true
	is_subset [1 .. 5], [1 .. 5] => true
	is_subset [1 .. 5], [2 .. 11] => false

=cut

sub is_subset($$) {
	my @set = &intersection(@_[0,1]);
	return @set == @{$_[0]};
}

=head2 is_subset_by(&$$)

Given a choice function and two set references, return true if the first set
is fully contained by the second according to the choice function.

=cut

sub is_subset_by(&$$) {
	my @set = &intersection_by(@_[0,1,2]);
	return @set == @{$_[1]};
}

=head2 is_superset($$)

Given two set references, return true if the first set fully contains the
second.

	is_superset [1 .. 10], [1 .. 5] => true
	is_superset [1 .. 5], [1 .. 5] => true
	is_subset [1 .. 5], [2 .. 11] => false

=cut

sub is_superset($$) {
	my @set = &intersection(@_[0,1]);
	return @set == @{$_[1]};
}

=head2 is_superset_by(&$$)

Given a choice function and two set references, return true if the first set
fully contains the second according to the choice function.

=cut

sub is_superset_by(&$$) {
	my @set = &intersection_by(@_[0,1,2]);
	return @set == @{$_[2]};
}

=head1 AUTHOR

Aaron Cohen, C<< <aarondcohen at gmail.com> >>

Special thanks to:
L<Logan Bell|http://metacpan.org/author/logie>
L<Thomas Whaples|https://github.com/twhaples>
L<Dibin Pookombil|https://github.com/dibinp>

=head1 BUGS

Please report any bugs or feature requests to C<bug-set-functional at rt.cpan.org>, or through
the web interface at L<https://github.com/aarondcohen/Set-Functional/issues>.  I will
be notified, and then you'll automatically be notified of progress on your bug as I make changes.

=head1 TODO

=over 4

=item * Add SEE ALSO section

=back

=head1 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc Set::Functional

You can also look for information at:

=over 4

=item * Official GitHub Repo

L<https://github.com/aarondcohen/Set-Functional>

=item * GitHub's Issue Tracker (report bugs here)

L<https://github.com/aarondcohen/Set-Functional/issues>

=item * CPAN Ratings

L<http://cpanratings.perl.org/d/Set-Functional>

=item * Official CPAN Page

L<http://search.cpan.org/dist/Set-Functional/>

=back

=head1 LICENSE AND COPYRIGHT

Copyright 2011-2014 Aaron Cohen.

This program is free software; you can redistribute it and/or modify it
under the terms of either: the GNU General Public License as published
by the Free Software Foundation; or the Artistic License.

See http://dev.perl.org/licenses/ for more information.

=cut

1; # End of Set::Functional
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)