The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package App::DuckPAN::Fathead;
our $AUTHORITY = 'cpan:DDG';
# ABSTRACT: Searches a given output.txt file for a query match
$App::DuckPAN::Fathead::VERSION = '1019';
use Moo;
with 'App::DuckPAN::HasApp';

use DBI;
use JSON;
use Path::Tiny;
use HTML::TreeBuilder;
use HTML::Element;
use Data::Printer return_value => 'dump';

has selected => (
	is => 'rw',
	lazy => 1,
	required => 0,
	predicate => 1,
	trigger => 1
);

sub _trigger_selected {
	my ( $self, $id ) = @_;
	my $dir = path("lib/fathead/$id");
	unless ($dir->is_dir) {
		my $full_path = $dir->realpath;
		$self->app->emit_and_exit(1, "Directory not found: $full_path") ;
	}
	my $file = $dir->child("output.txt");
	unless ($file->exists){
		my $full_path = $file->realpath;
		$self->app->emit_and_exit(1, "No output.txt was found in $full_path");
	}
	$self->_set_output_txt($file);
	return $dir;
}

has _trigger_words => (
	is      => 'ro',
	builder => 1,
	lazy    => 1,
);

sub _build__trigger_words {
	my ($self) = @_;
	my $tf = 'trigger_words.txt';
	return [] unless $self->has_selected;
	my $file = path("lib/fathead/", $self->selected, $tf);
	unless ($file->exists){
		my $full_path = $file->realpath;
		$self->app->emit_debug("No $tf was found in $full_path");
		return [];
	}
	chomp (my @words = $file->lines);
	return \@words;
}

has _trigger_re => (
	is      => 'ro',
	lazy    => 1,
	builder => 1,
);

sub _build__trigger_re {
	my ($self) = @_;
	my @words = @{$self->_trigger_words};
	my $text = join '|', map { quotemeta $_ } @words;
	return qr/\b(?:$text)\b/i;
}

has output_txt => (
	is => 'rwp',
	lazy => 1,
	required => 0
);

has dbh => (
	is => 'rw',
	lazy => 1,
	required => 0,
	builder => 1
);

sub _build_dbh {
	my ( $self ) = @_;

	# Open output.txt file for searching
	# Handles as a CSV with "\t" separator
	# Provide numbered column names
	my $dbh = DBI->connect ("dbi:CSV:", undef, undef, {
		f_dir                   => $self->output_txt->parent,
		f_ext                   => ".txt/r",
		csv_sep_char            => "\t",
		csv_quote_char          => undef,
		csv_escape_char         => undef,
		csv_allow_whitespace    => 1,
		csv_allow_quotes        => 1,
		RaiseError              => 1,
		PrintError              => 0,
		csv_tables => {
			output => {
				file => 'output.txt',
				col_names => [
					"title",
					"type",
					"redirect",
					"col4",
					"categories",
					"col6",
					"related_topics",
					"col8",
					"external_links",
					"disambiguation",
					"images",
					"abstract",
					"abstract_url",
				],
			},
		},
	}) or die $DBI::errstr;

	return $dbh;
}

# Get a Fathead result from the DB
# Requery when we get a Redirect
sub _search_output {

	my ($self, $query) = @_;

	my $trigger_re = $self->_trigger_re;
	$query =~ s/^$trigger_re\s+|\s+$trigger_re$//;
	my $result = $self->_db_lookup($query);

	while ($result && $result->{type} eq 'R') {
		my $redirect = $result->{redirect};
		$self->app->emit_notice("Following Redirect: '$result->{title}' -> '$redirect'");
		$result = $self->_db_lookup($redirect);
	}
	return $result;
}

# Attempt to get a result from DB (output.txt)
# Capture & display any raised errors
sub _db_lookup {
	my ($self, $query) = @_;

	my $result;
	$@ = '';

	eval {
		# TODO lowercase all titles first
		my $sth = $self->dbh->prepare("SELECT * FROM output WHERE lower(title) = ?");
		$sth->execute(lc $query);
		while (my $row = $sth->fetchrow_hashref) {
			$result = $row;
		}
		$sth->finish();
	};
	$self->app->emit_error("SQL database error: $@") if $@;
	return $result;
}

sub structured_answer_for_query {
	my ($self, $query) = @_;
	my $result = $self->_search_output($query) or return undef;
	return $self->_build_structured_answer($result);
}

# Build a Structured Answer hash
# Properties depend on Fathead result type
sub _build_structured_answer {
	my ($self, $data) = @_;

	# Get IA Metadata via ID lookup
	# Assume selected is an ID
	my $metadata = DDG::Meta::Data->get_ia(id => $self->selected) // {};
	$self->app->emit_error("No Metadata found for ID: ".$self->selected) unless keys %$metadata;

	# DBD::Csv ignores col_names letter casing
	# So, manually map columns to template properties
	# TODO update info_detail template to use lowercase variable names
	my %extra_data;

	my $out = {
		id => $self->selected,
		signal => "high",
		meta => $metadata,
	};

	# Article Result
	if ($data->{type} eq 'A') {
		$out->{duckbar_topic} = 'About';
		$out->{model} = 'FatheadArticle';
		$out->{templates} = { detail => 'info_detail' };
		%extra_data = (
			Heading 	=> $data->{title},
			Abstract 	=> $self->_replace_newlines($data->{abstract}),
			AbstractURL	=> $data->{abstract_url},
			FirstURL 	=> $metadata->{src_url},
			Image 		=> $self->_get_image($data->{images}),
		);
	}

	# Disambiguation Result
	if ($data->{type} eq 'D') {
		$out->{duckbar_topic} = 'Meanings';
		$out->{model} = 'FatheadListItem';
		$out->{templates} = { item => 'meanings_item' };
		%extra_data = (
			Heading => $data->{title}." (".$metadata->{name}.")",
			RelatedTopics => $self->_parse_disambiguations($data->{disambiguation}, $out)
		);
	}

	# Category Pages Result
	if ($data->{type} eq 'C') {
		$out->{duckbar_topic} = 'List';
		$out->{model} = 'FatheadListItem';
		$out->{templates} = { item => 'categories_item' };
	}

	$out->{data} = { %$data, %extra_data };
	return $out;
}

# Emulate internal processing to build JSON
# matching DDG API result format
sub _parse_disambiguations {
	my ($self, $disambiguations, $out) = @_;
	my @out;
	my @disambiguations = split /\\n/, $disambiguations;
	foreach my $disambiguation (@disambiguations){
		my $result = {};
		if ($disambiguation =~ m/^\*\[\[(.+)\]\],(.+)$/) {

			my $title = $1;
			my $html  = $2;

			# Parse HTML into plaintext
			my $root = HTML::TreeBuilder->new_from_content($html);
			$root->elementify();
			my $text = $root->as_trimmed_text;

			# Build URL Path
			my $href = "/?q=$title&ia=about";
			my $a = HTML::Element->new('a', href => $href);
			$a->push_content($title);

			$result = {
				Result => $a->as_HTML . "$text",    # generates `<a href="$url">$title</a>$text` which gets parsed by template helpers
				FirstURL => $href,
				Text => $text
			};
		}
		push @out, $result;
	}
	return \@out;
}

# Emulate internal processing to build JSON
# matching DDG API result format
sub _get_image {
	my ($self, $image) = @_;
	my $url = "";
	if ($image =~ m/^\[\[Image:(.+)\]\]$/) {
		$url = $1;
	}
	return $url;
}


sub _replace_newlines {
	my ($self, $abstract) = @_;
	$abstract =~ s/\\n/<br>/g;
	return $abstract;
}


1;

__END__

=pod

=head1 NAME

App::DuckPAN::Fathead - Searches a given output.txt file for a query match

=head1 VERSION

version 1019

=head1 AUTHOR

DuckDuckGo <open@duckduckgo.com>, Zach Thompson <zach@duckduckgo.com>, Zaahir Moolla <moollaza@duckduckgo.com>, Torsten Raudssus <torsten@raudss.us> L<https://raudss.us/>

=head1 COPYRIGHT AND LICENSE

This software is Copyright (c) 2013 by DuckDuckGo, Inc. L<https://duckduckgo.com/>.

This is free software, licensed under:

  The Apache License, Version 2.0, January 2004

=cut