package Elastic::Model::Index;
$Elastic::Model::Index::VERSION = '0.52';
use Carp;
use Moose;
with 'Elastic::Model::Role::Index';
use namespace::autoclean;
no Moose;
#===================================
sub create {
#===================================
my $self = shift;
my $params = $self->index_config(@_);
$self->model->store->create_index(%$params);
return $self;
}
#===================================
sub reindex {
#===================================
my $self = shift;
my $domain = shift
or croak "No (domain) passed to reindex()";
my %args = ( repoint_uids => 1, @_ );
my $verbose = !$args{quiet};
my $scan = $args{scan} || '2m';
my $size = $args{size} || 1000;
my $bulk_size = $args{bulk_size} || $size;
my $dest_index = $self->name;
my $model = $self->model;
my $transform = $args{transform} || sub {@_};
printf "Reindexing domain ($domain) to index ($dest_index)\n" if $verbose;
if ( $self->exists ) {
print "Index ($dest_index) already exists.\n" if $verbose;
}
else {
print "Creating index ($dest_index)\n" if $verbose;
$self->create();
}
# store all changed UIDs so that we can repoint them
# later, when they're used in docs that aren't being reindexed
my %uids;
my $doc_updater = sub {
my ($doc) = $transform->(@_);
$uids{ $doc->{_index} }{ $doc->{_type} }{ $doc->{_id} } = 1;
$doc->{_index} = $dest_index;
return $doc;
};
# Map all indices that 'domain' points to, to $index->name
my $old = $model->domain($domain)->namespace->alias($domain);
my %map
= map { $_ => 1 } $old->is_alias
? keys %{ $old->aliased_to }
: ($domain);
my $uid_updater = sub {
my $uid = shift;
$uid->{index} = $dest_index
if $map{ $uid->{index} };
};
my $updater = $self->doc_updater( $doc_updater, $uid_updater );
my $source = $model->view->domain($domain)->size($size)->scan($scan);
$model->store->reindex(
source => sub { $source->shift_element },
verbose => $verbose,
transform => $updater,
bulk_size => $bulk_size,
on_conflict => $args{on_conflict},
on_error => $args{on_error},
);
return 1 unless $args{repoint_uids};
$self->repoint_uids(
uids => \%uids,
verbose => $verbose,
exclude => [ keys %map ],
size => $size,
bulk_size => $bulk_size,
scan => $scan,
on_conflict => $args{uid_on_conflict},
on_error => $args{uid_on_error},
);
}
#===================================
sub repoint_uids {
#===================================
my ( $self, %args ) = @_;
my $verbose = $args{verbose};
my $scan = $args{scan} || '2m';
my $size = $args{size} || 1000;
my $bulk_size = $args{bulk_size} || $size;
my $model = $self->model;
my $index_name = $self->name;
my $uids = $args{uids} || {};
unless (%$uids) {
print "No UIDs to repoint\n" if $verbose;
return 1;
}
my %exclude = map { $_ => 1 } ( $index_name, @{ $args{exclude} || [] } );
my @indices = grep { not $exclude{$_} } $model->all_live_indices;
unless (@indices) {
print "No UIDs to repoint\n" if $verbose;
return 1;
}
my @uid_attrs = $self->_uid_attrs_for_indices(@indices);
unless (@uid_attrs) {
print "No UIDs to repoint\n" if $verbose;
return 1;
}
my $view = $model->view->domain( \@indices )->size($size);
my $doc_updater = sub {
my $doc = shift;
$doc->{_version}++;
return $doc;
};
my %map;
my $uid_updater = sub {
my $uid = shift;
return unless $uids->{ $uid->{index} }{ $uid->{type} }{ $uid->{id} };
$uid->{index} = $index_name;
};
my $updater = $self->doc_updater( $doc_updater, $uid_updater );
for my $index ( keys %$uids ) {
my $types = $uids->{$index};
for my $type ( keys %$types ) {
my @ids = keys %{ $types->{$type} };
printf( "Repointing %d UIDs from %s/%s\n",
0 + @ids, $index, $type )
if $verbose;
while (@ids) {
my $clauses
= $self->_build_uid_clauses( \@uid_attrs, $index, $type,
[ splice @ids, 0, $size ] );
my $source = $view->filter( or => $clauses )->scan($scan);
$model->store->reindex(
source => sub {
$source->shift_element;
},
bulk_size => $bulk_size,
quiet => 1,
transform => $updater,
on_conflict => $args{on_conflict},
on_error => $args{on_error},
);
}
print "\n" if $verbose;
}
}
print "\nDone\n" if $verbose;
return 1;
}
#===================================
sub _uid_attrs_for_indices {
#===================================
my $self = shift;
my @indices = @_;
my $mapping = $self->model->store->get_mapping( index => \@indices );
my %attrs = map { $_ => 1 }
map { _find_uid_attrs( $_->{properties} ) }
map { values %{ $_->{mappings} } } values %$mapping;
return keys %attrs;
}
#===================================
sub _find_uid_attrs {
#===================================
my ( $mapping, $level ) = @_;
my @attrs;
$level = '' unless $level;
keys %$mapping;
while ( my ( $k, $v ) = each %$mapping ) {
next unless $v->{properties};
my $attr = $level ? "$level.$k" : $k;
if ( $k eq 'uid' and $v->{properties} and $v->{properties}{index} ) {
push @attrs, $attr;
next;
}
push @attrs, _find_uid_attrs( $v->{properties} || {}, $attr );
}
return @attrs;
}
#===================================
sub _build_uid_clauses {
#===================================
my ( $self, $uid_attrs, $index, $type, $ids ) = @_;
my @clauses;
for my $id (@$ids) {
push @clauses, map {
+{ and => [
{ term => { "$_.index" => $index } },
{ term => { "$_.type" => $type } },
{ term => { "$_.id" => $id } }
]
}
} @$uid_attrs;
}
return \@clauses;
}
#===================================
sub doc_updater {
#===================================
my ( $self, $doc_updater, $uid_updater ) = @_;
return sub {
my $doc = $doc_updater->(@_);
my @stack = values %{ $doc->{_source} };
while ( my $val = shift @stack ) {
unless ( ref $val eq 'HASH' ) {
push @stack, @$val if ref $val eq 'ARRAY';
next;
}
my $uid = $val->{uid};
if ( $uid
and ref $uid eq 'HASH'
and $uid->{index}
and $uid->{type} )
{
$uid_updater->($uid);
}
else {
push @stack, values %$val;
}
}
return $doc;
};
}
__PACKAGE__->meta->make_immutable;
1;
=pod
=encoding UTF-8
=head1 NAME
Elastic::Model::Index - Create and administer indices in Elasticsearch
=head1 VERSION
version 0.52
=head1 SYNOPSIS
$index = $model->namespace('myapp')->index;
$index = $model->namespace('myapp')->index('index_name');
$index->create( settings => \%settings );
$index->reindex( 'old_index' );
See also L<Elastic::Model::Role::Index/SYNOPSIS>.
=head1 DESCRIPTION
L<Elastic::Model::Index> objects are used to create and administer indices
in an Elasticsearch cluster.
See L<Elastic::Model::Role::Index> for more about usage.
See L<Elastic::Manual::Scaling> for more about how indices can be used in your
application.
=head1 METHODS
=head2 create()
$index = $index->create();
$index = $index->create( settings => \%settings, types => \@types );
Creates an index called L<name|Elastic::Role::Model::Index/name> (which
defaults to C<< $namespace->name >>).
The L<type mapping|Elastic::Manual::Terminology/Mapping> is automatically
generated from the attributes of your doc classes listed in the
L<namespace|Elastic::Model::Namespace>. Similarly, any
L<custom analyzers|Elastic::Model/"Custom analyzers"> required
by your classes are added to the index
L<\%settings|http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/indices-update-settings.html>
that you pass in:
$index->create( settings => {number_of_shards => 1} );
To create an index with a sub-set of the types known to the
L<namespace|Elastic::Model::Namespace>, pass in a list of C<@types>.
$index->create( types => ['user','post' ]);
=head2 reindex()
# reindex $domain_name to $index->name
$index->reindex( $domain_name );
# more options
$index->reindex(
$domain,
repoint_uids => 1,
size => 1000,
bulk_size => 1000,
scan => '2m',
quiet => 0,
transform => sub {...},
on_conflict => sub {...} | 'IGNORE'
on_error => sub {...} | 'IGNORE'
uid_on_conflict => sub {...} | 'IGNORE'
uid_on_error => sub {...} | 'IGNORE'
);
While you can add to the L<mapping|Elastic::Manual::Terminology/Mapping> of
an index, you can't change what is already there. Especially during development,
you will need to reindex your data to a new index.
L</reindex()> reindexes your data from L<domain|Elastic::Manual::Terminology/Domain>
C<$domain_name> into an index called C<< $index->name >>. The new index is
created if it doesn't already exist.
See L<Elastic::Manual::Reindex> for more about reindexing strategies. The
documentation below explains what each parameter does:
=over
=item size
The C<size> parameter defaults to 1,000 and controls how many documents
are pulled from C<$domain> in each request. See L<Elastic::Model::View/size>.
B<Note:> documents are pulled from the C<domain>/C<view> using
L<Elastic::Model::View/scan()>, which can pull a maximum of
L<size|Elastic::Model::View/size> C<* number_of_primary_shards> in a single
request. If you have large docs or underpowered servers, you may want to
change the C<size> parameter.
=item bulk_size
The C<bulk_size> parameter defaults to C<size> and controls how many documents
are indexed into the new domain in a single bulk-indexing request.
=item scan
C<scan> is the same as L<Elastic::Model::View/scan> - it controls how long
Elasticsearch should keep the "scroll" live between requests. Defaults to
'2m'. Increase this if the reindexing process is slow and you get
scroll timeouts.
=item repoint_uids
If true (the default), L</repoint_uids()> will be called automatically to
update any L<UIDs|Elastic::Model::UID> (which point at the old index) in
indices other than the ones currently being reindexed.
=item transform
If you need to change the structure/data of your doc while reindexing, you
can pass a C<transform> coderef. This will be called before any changes
have been made to the doc, and should return the new doc. For instance,
to convert the single-value C<tag> field to an array of C<tags>:
$index->reindex(
'new_index',
'transform' => sub {
my $doc = shift;
$doc->{_source}{tags} = [ delete $doc->{_source}{tag} ];
return $doc
}
);
=item on_conflict / on_error
If you are indexing to the new index at the same time as you
are reindexing, you may get document conflicts. You can handle the conflicts
with a coderef callback, or ignore them by by setting C<on_conflict> to
C<'IGNORE'>:
$index->reindex( 'myapp_v2', on_conflict => 'IGNORE' );
Similarly, you can pass an C<on_error> handler which will handle other errors,
or all errors if no C<on_conflict> handler is defined.
See L<Search::Elasticsearch::Bulk/Using-callbacks> for more.
=item uid_on_conflict / uid_on_error
These work in the same way as the C<on_conflict> or C<on_error> handlers,
but are passed to L</repoint_uids()> if C<repoint_uids> is true.
=item quiet
By default, L</reindex()> prints out progress information. To silence this,
set C<quiet> to true:
$index->reindex( 'myapp_v2', quiet => 1 );
=back
=head2 repoint_uids()
$index->repoint_uids(
uids => [ ['myapp_v1','user',10],['myapp_v1','user',12]...],
exclude => ['myapp_v2'],
scan => '2m',
size => 1000,
bulk_size => 1000,
quiet => 0,
on_conflict => sub {...} | 'IGNORE'
on_error => sub {...} | 'IGNORE'
);
The purpose of L</repoint_uids()> is to update stale L<UID|Elastic::Model::UID>
attributes to point to a new index. It is called automatically from
L</reindex()>.
Parameters:
=over
=item uids
C<uids> is a hash ref the stale L<UIDs|Elastic::Model::UID> which should be
updated.
For instance: you have reindexed C<myapp_v1> to C<myapp_v2>, but domain
C<other> has documents with UIDs which point to C<myapp_v1>. You
can updated these by passing a list of the old UIDs, as follows:
$index = $namespace->index('myapp_v2');
$index->repoint_uids(
uids => { # index
myapp_v1 => { # type
user => {
1 => 1, # ids
2 => 1,
}
}
}
);
=item exclude
By default, all indices known to the L<model|Elastic::Model::Role::Model> are
updated. You can exclude indices with:
$index->repoint_uids(
uids => \@uids,
exclude => ['index_1', ...]
);
=item size
This is the same as the C<size> parameter to L</reindex()>.
=item bulk_size
This is the same as the C<bulk_size> parameter to L</reindex()>.
=item scan
This is the same as the C<scan> parameter to L</reindex()>.
=item quiet
This is the same as the C<quiet> parameter to L</reindex()>.
=item on_conflict / on_error
These are the same as the C<uid_on_conflict> and C<uid_on_error> handlers
in L</reindex()>.
=back
=head2 doc_updater()
$coderef = $index->doc_updater( $doc_updater, $uid_updater );
L</doc_updater()> is used by L</reindex()> and L</repoint_uids()> to update
the top-level doc and any UID attributes with callbacks.
The C<$doc_updater> receives the C<$doc> as its only attribute, and should
return the C<$doc> after making any changes:
$doc_updater = sub {
my ($doc) = @_;
$doc->{_index} = 'foo';
return $doc
};
The C<$uid_updater> receives the UID as its only attribute:
$uid_updater = sub {
my ($uid) = @_;
$uid->{index} = 'foo'
};
=head1 IMPORTED ATTRIBUTES
Attributes imported from L<Elastic::Model::Role::Index>
=head2 L<namespace|Elastic::Model::Role::Index/namespace>
=head2 L<name|Elastic::Model::Role::Index/name>
=head1 IMPORTED METHODS
Methods imported from L<Elastic::Model::Role::Index>
=head2 L<close()|Elastic::Model::Role::Index/close()>
=head2 L<open()|Elastic::Model::Role::Index/open()>
=head2 L<refresh()|Elastic::Model::Role::Index/refresh()>
=head2 L<delete()|Elastic::Model::Role::Index/delete()>
=head2 L<update_analyzers()|Elastic::Model::Role::Index/update_analyzers()>
=head2 L<update_settings()|Elastic::Model::Role::Index/update_settings()>
=head2 L<delete_mapping()|Elastic::Model::Role::Index/delete_mapping()>
=head2 L<is_alias()|Elastic::Model::Role::Index/is_alias()>
=head2 L<is_index()|Elastic::Model::Role::Index/is_index()>
=head1 SEE ALSO
=over
=item *
L<Elastic::Model::Role::Index>
=item *
L<Elastic::Model::Alias>
=item *
L<Elastic::Model::Namespace>
=item *
L<Elastic::Manual::Scaling>
=back
=head1 AUTHOR
Clinton Gormley <drtech@cpan.org>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2015 by Clinton Gormley.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut
__END__
# ABSTRACT: Create and administer indices in Elasticsearch