The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.
package MogileFS::ReplicationPolicy::MultipleNetworks;

use strict;
use base 'MogileFS::ReplicationPolicy';
use MogileFS::Util qw(weighted_list);
use MogileFS::ReplicationRequest qw(ALL_GOOD TOO_GOOD TEMP_NO_ANSWER);

my %cache;
my $age;

sub AVOIDNETWORK { return "AVOIDNETWORK"; }

sub new {
    my ($class, $mindevcount) = @_;
    return bless {
        mindevcount => $mindevcount,
    }, $class;
}

sub new_from_policy_args {
    my ($class, $argref) = @_;
    # Note: "MultipleNetworks()" is okay, in which case the 'mindevcount'
    # on the class is used.  (see below)
    $$argref =~ s/^\s* \( \s* (\d*) \s* \) \s*//x
        or die "$class failed to parse args: $$argref";
    return $class->new($1)
}

sub mindevcount { $_[0]{mindevcount} }

sub replicate_to {
    my ($self, %args) = @_;

    my $fid      = delete $args{fid};      # fid scalar to copy
    my $on_devs  = delete $args{on_devs};  # arrayref of device objects
    my $all_devs = delete $args{all_devs}; # hashref of { devid => MogileFS::Device }
    my $failed   = delete $args{failed};   # hashref of { devid => 1 } of failed attempts this round

    # old-style
    my $min      = delete $args{min};
    $min         = $self->{mindevcount} || $min;

    warn "Unknown parameters: " . join(", ", sort keys %args) if %args;
    die "Missing parameters" unless $on_devs && $all_devs && $failed && $fid;

    # number of devices we currently live on
    my $already_on = @$on_devs;

    # a silly special case, bail out early.
    return ALL_GOOD if $min == 1 && $already_on;

    # total disks available which are candidates for having files on them
    my $total_disks = scalar grep { $_->dstate->should_have_files } values %$all_devs;

    # if we have two copies and that's all the disks there are
    # anywhere, be happy enough
    return ALL_GOOD if $already_on >= 2 && $already_on == $total_disks;

    # see which and how many unique hosts/networks we're already on.
    my %on_dev;
    my %on_host;
    my %on_network;
    foreach my $dev (@$on_devs) {
        $on_host{$dev->hostid} = 1;
        $on_dev{$dev->id} = 1;

        my $on_ip = $dev->host->ip;
        if ($on_ip) {
            my $network = network_for_ip($on_ip);
            $on_network{$network->desc} = $network;
        }
    }

    my $uniq_hosts_on       = scalar keys %on_host;
    my $uniq_networks_on    = scalar keys %on_network || 1;

    my ($total_uniq_hosts, $total_uniq_networks) = unique_hosts_and_networks($all_devs);

    # target as many networks as we can, but not more than min
    my $target_networks = ($min < $total_uniq_networks) ? $min : $total_uniq_networks;

    # we're never good if our copies aren't on as many networks as possible
    if (($target_networks / $uniq_networks_on) <= 1) {
        return TOO_GOOD if $uniq_hosts_on >  $min;
        return TOO_GOOD if $uniq_hosts_on == $min && $already_on > $min;

        return ALL_GOOD if $uniq_hosts_on == $min;
        return ALL_GOOD if $uniq_hosts_on >= $total_uniq_hosts && $already_on >= $min;
    }

    # if there are more hosts we're not on yet, we want to exclude devices we're already
    # on from our applicable host search.
    # also exclude hosts on networks we're already on
    my @skip_network = values %on_network;
    my %skip_host; # hostid => 1
    if ($uniq_hosts_on < $total_uniq_hosts) {
        %skip_host = %on_host;

        if (@skip_network) {
            # work out hosts from the devs passed to us
            my %seen_host;
            foreach my $device (values %$all_devs) {
                next if ($seen_host{$device->host->id}++);

                foreach my $disliked_network (@skip_network) {
                    if (($disliked_network->match($device->host->ip)) and
                        (not $skip_host{$device->host->id})) {
                        $skip_host{$device->host->id} = AVOIDNETWORK;
                    }
                }
            }
        }
    }

    my @all_dests = sort {
        $b->percent_free <=> $a->percent_free
    } grep {
        ! $on_dev{$_->devid} &&
        ! $failed->{$_->devid} &&
        $_->should_get_replicated_files
    } values %$all_devs;

    return TEMP_NO_ANSWER unless @all_dests;

    my @ideal         = grep { ! $skip_host{$_->hostid} } @all_dests;
    # wrong network is less desperate than wrong host
    my @network_desp  = grep {   $skip_host{$_->hostid} &&
                                 $skip_host{$_->hostid} eq AVOIDNETWORK } @all_dests;
    my @host_desp     = grep {   $skip_host{$_->hostid} &&
                                 $skip_host{$_->hostid} ne AVOIDNETWORK } @all_dests;

    @ideal        = weighted_list(map { [$_, 100 * $_->percent_free] }
        splice(@ideal, 0, 20));
    @network_desp = weighted_list(map { [$_, 100 * $_->percent_free] }
        splice(@network_desp, 0, 20));
    @host_desp    = weighted_list(map { [$_, 100 * $_->percent_free] }
        splice(@host_desp, 0, 20));

    my @desp = (@network_desp, @host_desp);

    return MogileFS::ReplicationRequest->new(
                                             ideal     => \@ideal,
                                             desperate => \@desp,
                                            );
}

# can't just scalar keys %cache to count networks
# might include networks for which we have no hosts yet
sub unique_hosts_and_networks {
    my ($devs) = @_;

    my %host;
    my %netmask; 
    foreach my $devid (keys %$devs) {
        my $dev = $devs->{$devid};
        next unless $dev->dstate->should_get_repl_files;

        $host{$dev->hostid}++;

        my $ip = $dev->host->ip;
        $netmask{network_for_ip($ip)->desc}++;
    }

    return (scalar keys %host, scalar keys %netmask || 1);
}


{
    my %cache; # '192.168.0.0/24' => Net::Netmask->new2('192.168.0.0/24');
    my $age;   # increments every time we look

    # turn a server ip into a network
    # defaults to /16 ranges
    # this can be overridden with a "zone_$location" setting per network "zone" and
    # a lookup field listing all "zones"
    # e.g.
    # mogadm settings set network_zones location1,location2
    # mogadm settings set zone_location1 192.168.0.0/24
    # mogadm settings set zone_location2 10.0.0.0/24
    # zone names and netmasks must be unique
    sub network_for_ip {
        my ($ip) = @_;

        if (not $ip) { # can happen in testing
            return Net::Netmask->new('default');
        }

        # clear the cache occasionally
        if (($age == 0) or ($age++ > 500)) {
            clear_and_build_cache();
            $age = 1;
        }

        my $network;
        foreach my $zone (keys %cache) {
            if ($cache{$zone}->match($ip)) {
                $network = $cache{$zone};
            }
        }

        if (not $network) { 
            ($network) = ($ip =~ m/(\d+\.\d+)./);
            $network .= '/16'; # default
            $network = Net::Netmask->new2($network);
        }

        return $network;
    }

    sub clear_and_build_cache {
        undef %cache;

        my @zones = split(/\s*,\s*/,MogileFS::Config->server_setting("network_zones"));

        foreach my $zone (@zones) {
            my $netmask = MogileFS::Config->server_setting("zone_".$zone);

            if (not $netmask) {
                warn "couldn't find network_zone <<zone_".$zone.">> check your server settings";
                next;
            }

            if ($cache{$netmask}) {
                warn "duplicate netmask <$netmask> in network zones. check your server settings";
            }

            $cache{$netmask} = Net::Netmask->new2($netmask);

            if (Net::Netmask::errstr()) {
                warn "couldn't parse <$zone> as a netmask. error was <".Net::Netmask::errstr().
                     ">. check your server settings";
            }
        }
    }

    sub stuff_cache { # for testing, or it'll try the db
        my ($self, $ip, $netmask) = @_;

        $cache{$ip} = $netmask;
        $age = 1;
    }
}

1;

# Local Variables:
# mode: perl
# c-basic-indent: 4
# indent-tabs-mode: nil
# End:

__END__

=head1 NAME

MogileFS::ReplicationPolicy::MultipleNetworks

=head1 RULES

This policy tries to put files onto devices which are on different networks, if that isn't possible then devices on the same network are returned as "desperate" options.

We aim to have as many copies as we can on unique networks, if there are 2 copies on one network and none on another, with a min of 2,  we will still over-replicate to the other network. When called from the rebalancer we will therefore rebalance across networks and reduce the correct copy.

By default we class 2 hosts as being on 2 different networks if they're are on different /16 networks (255.255.0.0). This can be controlled using server settings, with a list of network "zones", and then a definition of a netmask for each "zone".

mogadm settings set network_zones location1,location2
mogadm settings set zone_location1 192.168.0.0/24
mogadm settings set zone_location2 10.0.0.0/24

Zone names and netmasks must each be unique.

=head1 SEE ALSO

L<MogileFS::Worker::Replicate>

L<MogileFS::ReplicationPolicy>

l<MogileFS::ReplicationRequest>