The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Chemistry::Ring::Find;

$VERSION = 0.20;
# $Id: Find.pm,v 1.2 2009/05/10 21:12:44 itubert Exp $

=head1 NAME

Chemistry::Ring::Find - Find the rings (cycles) in a molecule

=head1 SYNOPSIS

    use Chemistry::Ring::Find ':all';

    # find the smallest ring containing $atom
    my $ring = find_ring($atom);

    # find all the rings containing $bond
    my @rings = find_ring($bond, all => 1);

    # see below for more options

    # find the six 4-atom rings in cubane
    @rings = find_rings($cubane);

    # find a cubane SSSR with five rings
    @rings = find_rings($cubane, sssr => 1);

=head1 DESCRIPTION

The Chemistry::Ring::Find module implements a breadth-first ring finding
algorithm, and it can find all rings that contain a given atom or bond, the
Smallest Set of Smallest Rings (SSSR), or the "almost SSSR", which is an
unambiguous set of rings for cases such as cubane.The algorithms are  based on
ideas from:

1) Leach, A. R.; Dolata, D. P.; Prout, P. Automated Conformational Analysis and
Structure Generation: Algorithms for Molecular Perception J. Chem. Inf. Comput.
Sci. 1990, 30, 316-324

2) Figueras, J. Ring perception using breadth-first search. J. Chem. Inf.
Comput.  Sci. 1996, 36, 986-991.

Ref. 2 is only used for find_ring, not for find_rings, because it has been
shown that the overall SSSR method in ref 2 has bugs. Ref 1 inspired
find_rings, which depends on find_ring.

This module is part of the PerlMol project, L<http://www.perlmol.org/>.

=head1 FUNCTIONS

These functions may be exported explicitly, or all by using the :all tag, but
nothing is exported by default.

=over 

=cut


use strict;
use warnings;
no  warnings qw(recursion);
use Chemistry::Ring;

our @ISA = qw(Exporter);
our @EXPORT_OK = qw(find_ring find_rings);
our %EXPORT_TAGS = ( all => \@EXPORT_OK ); 

our $DEBUG = 0;

=item find_ring($origin, %opts)

Find the smallest ring containg $origin, which may be either an atom or a bond.
Returns a Chemistry::Ring object. Options:

=over

=item all

If true, find all the rings containing $origin. If false, return the first ring
found. Defaults to false. "All" is supposed to include only "simple" rings,
that is, rings that are not a combination of smaller rings. 

=item min

Only find rings with a the given minimum size. Defaults to zero.

=item max

Only find rings up to the given maximium size. Defaults to unlimited size.

=item size

Only find rings with this size. Same as setting min and max to the same size.
Default: unspecified.

=item exclude

An array reference containing a list of atoms that must NOT be present in the
ring. Defaults to the empty list.

=item mirror

If true, find each ring twice (forwards and backwards). Defaults to false.

=back

=cut

# $origin is an atom
# options: min, max, size, all, mirror, exclude
sub find_ring {
    no warnings qw(uninitialized);
    my ($origin, %opts) = @_;
    my $min_size = $opts{min} || $opts{size} || 0;
    my $max_size = $opts{max} || $opts{size};
    my %paths;
    my %bond_paths;
    my @q;
    my @rings;
    my %used_end_nodes;
    my $required_bond;
    my %exclude;
    @exclude{ @{$opts{exclude} || []} } = ();

    if ($origin->isa("Chemistry::Bond")) {
        $required_bond = $origin;
        ($origin) = $origin->atoms;
    }
    @q = ($origin);
    $paths{$origin} = [$origin];
    $bond_paths{$origin} = [];
    # $path{$atom} means how to get to $atom from $origin

    my $a;
    while ($a = shift @q) {
        my $from = $paths{$a}[-2];
        print "at $a from $from\n" if $DEBUG;
        for my $bn ($a->bonds_neighbors($from)) {
            my $nei  = $bn->{to};
            my $bond = $bn->{bond};
            next if exists $exclude{$nei};
            print "  -> $nei\n" if $DEBUG;
            if ($paths{$nei}) {
                # a hypothetical check_collision() would have to do this:
                # %paths, $min_size, $max_size, $used_end_nodes
                # %bond_paths, $required_bond, @rings
                #  check_size
                #  check_redundant
                #  check_required_bond
                #  check_contains_ring
                #  push

                print "found a path collision... " if $DEBUG;
                # check to make sure that the ring really started at $origin
                # and the size is what was requested
                my $size = @{$paths{$nei}} + @{$paths{$a}} - 1;
                #if($paths{$nei}[1] != $paths{$a}[1]
                if($paths{$nei}[1] ne $paths{$a}[1]
                    and $size >= $min_size
                    and !$max_size || $size <= $max_size)
                {
                    print "VALID\n" if $DEBUG;
                    my @atoms = (@{$paths{$a}}, reverse @{$paths{$nei}});
                    print "RING = ", print_path(\@atoms) if $DEBUG;
                    pop @atoms;
                    if ($used_end_nodes{$atoms[1]} and !$opts{mirror}) {
                        print "skipping redundant ring\n" if $DEBUG;
                        next; # don't want to find rings twice
                    }
                    my @bonds = (@{$bond_paths{$a}}, $bond,
                        reverse @{$bond_paths{$nei}});
                    if ($required_bond 
                        and not grep {$_ eq $required_bond} @bonds) {
                        print "does not include required bond ("
                            . join(" ", $required_bond->atoms)
                            . ")\n" if $DEBUG;
                        next;
                    }
                    if (contains_ring(\@atoms, \@rings)) {
                        print "contains another ring\n" if $DEBUG;
                        next;
                    }
                    my $r = Chemistry::Ring->new;
                    $r->add_atom(@atoms);
                    $r->add_bond(@bonds);
                    return $r unless $opts{all};  # FOUND VALID RING
                    push @rings, $r;
                    $used_end_nodes{$atoms[-1]} = 1;
                    #@used_nodes{@atoms} = ();
                } else {
                    print "NOT VALID", 
                        print_path( [@{$paths{$a}}, 
                            reverse @{$paths{$nei->id}}]) if $DEBUG;
                }
            } else {
                if (!$max_size || @{$paths{$a}} < ($max_size / 2) + 0.1) {
                    push @q, $nei;
                    print "    pushing path\n" if $DEBUG;        
                    $paths{$nei} = [@{$paths{$a}}, $nei];
                    $bond_paths{$nei} = [@{$bond_paths{$a}}, $bond];
                    print print_path($paths{$nei}) if $DEBUG;
                } else {
                    print "path too long; " if $DEBUG;
                    print print_path($paths{$a}) if $DEBUG;
                    #path too long
                }
            }
        }
    }
    @rings;
}


sub print_path {
    my $p = shift;
    my $ret = "    PATH: ";
    for my $a (@$p) {
        $ret .=  "$a - ";
    }
    $ret .= "\n";
}

# contains_ring($atoms, $rings)
# returns true if one of the rings in the array ref $rings is a proper subset
# of the atom list in the array ref $atom.
sub contains_ring {
    my ($atoms, $rings) = @_;
    my %seen;
    @seen{@$atoms} = ();
    for my $ring (@$rings) {
        my $unique_atoms = $ring->atoms; 
        next if $unique_atoms >= @$atoms; # ring is same size or bigger
        # make sure that $ring has at least one atom not in $atoms
        for my $atom ($ring->atoms) {
            if (exists $seen{$atom}) {
                $unique_atoms--;
            } else {
                last; # there's at least one unique atom!
            }
        }
        return 1 unless $unique_atoms;
    }
    0;
}

=item @rings = find_rings($mol, %options)

Find "all" the rings in the molecule. In general it return the Smallest Set of
Smallest Rings (SSSR). However, since it is well known that the SSSR is not
unique for molecules such as cubane (where the SSSR consists of five
unspecified four-member rings, even if the symmetry of the molecule would
suggest that the six faces of the cube are equivalent), in such cases
find_rings will return a non-ambiguous "non-smallest" set of smallest rings,
unless the "sssr" option is given. For example,

    @rings = find_rings($cubane);
    # returns SIX four-member rings

    @rings = find_rings($cubane, sssr => 1);
    # returns FIVE four-member rings (an unspecified subset of
    # the six rings above.)

=cut

sub find_rings {
    my ($mol, %opts) = @_;
    my $visited = {};
    my @ring_bonds;
    for my $atom ($mol->atoms) {
        next if $visited->{$atom};
        push @ring_bonds, find_ring_bonds($mol, \%opts, $atom, $visited);
    }
    #print "rb($_; @{$_->{atoms}})\n" for @ring_bonds;
    my @rings;
    my $n_rings = @ring_bonds;
    #print "cyclomatic number=$n_rings\n";
    for my $ring_bond (@ring_bonds) {
        push @rings, find_ring($ring_bond, all => 1)
    }
    my %seen;
    my @ring_keys = map { join " ", sort $_->atoms } @rings;
    #print "key($_)\n" for @ring_keys;
    @seen{@ring_keys} = @rings;
    @rings = sort { $a->atoms <=> $b->atoms } values %seen;
    if (!$opts{sssr} and @rings > $n_rings) {
        $n_rings++ if $rings[$n_rings]->atoms == $rings[$n_rings-1]->atoms;
    }
    splice @rings, $n_rings if defined $rings[$n_rings];
    #splice @rings, $n_rings;
    @rings;
}

# find a set of "ring closure bonds" by doing a depth-first search
# it will find a number of bonds equal to the cyclomatic number
sub find_ring_bonds {
    my ($mol, $opts, $atom, $visited) = @_;

    my @ring_bonds;
    $visited->{$atom}  = 1;
    for my $bn ($atom->bonds_neighbors) {
        my $nei  = $bn->{to};
        my $bond = $bn->{bond};
        #next if $visited->{$bond};
        next if not defined $bond or $visited->{$bond};
        $visited->{$bond}  = 1;
        if ($visited->{$nei}) { # closed ring
            #print "closing ring\n";
            push @ring_bonds, $bond;
        } else {
            push @ring_bonds, 
                find_ring_bonds($mol, $opts, $nei, $visited);
        }
    }
    @ring_bonds;
}

1;

=back

=head1 BUGS

The "all" option in find_ring doesn't quite work as expected. It finds all
simple rings and some bridged rings. It never finds fused rings (which is good).

=head1 VERSION

0.20

=head1 SEE ALSO

L<Chemistry::Ring>, L<http://www.perlmol.org>.

=head1 AUTHOR

Ivan Tubert-Brohman E<lt>itub@cpan.orgE<gt>

=head1 COPYRIGHT

Copyright (c) 2009 Ivan Tubert-Brohman. All rights reserved. This program is
free software; you can redistribute it and/or modify it under the same terms as
Perl itself.

=cut