The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.
# $Id: Apache2.pm 70 2013-01-31 11:35:42Z jo $
# Cindy::Apache2 - mod_perl2 interface for the Cindy module.
#
# Copyright (c) 2008 Joachim Zobel <jz-2008@heute-morgen.de>. All rights reserved.
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#

package Cindy::Apache2;

use strict;
use warnings;

our $VERSION = '0.06';

use APR::Brigade ();
use Apache2::Response ();
use Apache2::SubRequest ();
use Apache2::Filter ();
use Apache2::Log;
use Apache2::Const -compile => qw(OK DECLINED 
    HTTP_NOT_MODIFIED HTTP_NOT_FOUND SERVER_ERROR);
use APR::Const    -compile => qw(:error SUCCESS);
#use Log::Log4perl qw(:easy);

use Apache2::RequestRec ();
use APR::Finfo ();

use Memoize;
use Storable qw(dclone);

use Cindy;
use Cindy::Log;

use constant CIS => 'CIS';
use constant DATA => 'DATA';
use constant DOC => 'DOC';

#
# funktion: handler	
# param.:	-
# return:	-
# Since this handler actually needs 3 parameters, these
# are passed as enviroment variables. Their names are
# CINDY_CIS_URL, CINDY_DATA_URL, CINDY_DOC_URL or
# CINDY_CIS_FILE, CINDY_DATA_FILE, CINDY_DOC_FILE
# as an alternative. 
#
sub handler {
	my ($r)	= @_;
  #$r = Apache2::RequestRec->new($r);
  
  my $rv;

  # Subrequest for DOC
  my ($doc, $doc_type);
  $rv = read_subrequest($r, DOC, \$doc, \$doc_type);
  $r->log->debug("DOC content type:".$doc_type);
  if ($rv != Apache2::Const::OK) {
    return $rv;
  }
  # Subrequest for DATA
  my ($data, $data_type);
  $rv = read_subrequest($r, DATA, \$data, \$data_type);
  $r->log->debug("DATA content type:".$data_type);
  if ($rv != Apache2::Const::OK) {
    return $rv;
  }
  # Subrequest for CIS
  my $cis;
  $rv = read_subrequest($r, CIS, \$cis);
  if ($rv != Apache2::Const::OK) {
    return $rv;
  }
  
  # Do the 304
  if ($r->meets_conditions == Apache2::Const::HTTP_NOT_MODIFIED) {
    $r->set_last_modified;
    return Apache2::Const::HTTP_NOT_MODIFIED;
  } 
 
  # Parse DOC
  $doc = parse_by_type($doc_type, $doc, DOC);
  if (!defined($doc)) {
    return Apache2::Const::SERVER_ERROR;
  }  
  # Parse DATA
  $data = parse_by_type($data_type, $data, DATA);
  if (!defined($data)) {
    return Apache2::Const::SERVER_ERROR;
  }
  # Parse CIS
  if (!$cis) {
    ERROR "Failed to retrieve content for CIS.";
    return Apache2::Const::SERVER_ERROR;
  }
  $cis = parse_cis_string_cached($cis);
  if (!defined($cis)) {
    return Apache2::Const::SERVER_ERROR;
  }

  INFO "Parsing succeeded. Will do injection now.";
  my $out = eval { inject($data, $doc, $cis) };
  if ($@) {
    ERROR $@;
    return Apache2::Const::SERVER_ERROR;
  }

  INFO "Injection successful. Sending content.";
  $r->set_last_modified;
  $r->content_type('text/html;charset='.$doc->actualEncoding());
  print $out->toStringHTML();

  dump_xpath_profile();

  return Apache2::Const::OK;
}

#
# Read data of the given kind into a stringref
#
sub read_subrequest($$$;$)
{
  my ($r, $what, $rtext, $rtype) = @_;
  my $rsub = lookup_by_env($r, $what);
  if (!defined($rsub)) {
    return Apache2::Const::HTTP_NOT_FOUND;
  }
  my $rv = $rsub->run_trapped($rtext);
  if ($rv != Apache2::Const::OK) {
    return $rv;
  }
  if ($rtype) {
    my $ctype = $rsub->make_content_type($rsub->content_type);
    $$rtype = $ctype;
  }
  copy_mtime($rsub, $r);
  
  return Apache2::Const::OK;
}

#
# Reads a subrequests LastModified header and sets it
# for the main request 
#
sub copy_mtime($$)
{
  my ($from, $to) = @_;
  # If no mtime is available 
  # we asume the document has just 
  # been created.
  my $mtime = $from->mtime || time; 
  $to->update_mtime($mtime);
}

#
# return An apache subrequest object
# 
sub lookup_by_env($$)
{
  my ($r, $pname) = @_;

  my $rtn;
  my $env_file = $r->subprocess_env("CINDY_$pname"."_FILE");
  if ($env_file) {
    DEBUG "Looking up '$env_file' for $pname."; 
    $rtn = $r->lookup_file($env_file);
  }

  my $env_uri = $r->subprocess_env("CINDY_$pname"."_URI");
  if ($env_uri) {
    WARN "CINDY_$pname._FILE=$env_file overwritten "
          ."by CINDY_$pname._URI=$env_uri." 
      if ($rtn);
    DEBUG "Looking up '$env_uri' for $pname."; 
    $rtn = $r->lookup_uri($env_uri);
  }

  #if ($lastmod) {
  #  $r_sub->headers_in('If-Modified-Since', time2str($lastmod));
  #}
  my $env_x = $env_file || $env_uri;
  if (!$rtn) {
    ERROR "Could not lookup '$env_x' for $pname."; 
  } else {
    DEBUG "Lookup succeeded for '$env_x' for $pname."; 
  }

  return $rtn;     
}

#
# return An XML:LibXML root node.
# 
sub parse_by_type($$$)
{
  my ($type, $text, $what) = @_;

  my $rtn;

  eval {
    if ($type =~ m/html/io) {
      my %opt = (html_parse_noimplied => 1);
      if ($type =~ /;\s*charset\s*=\s*(\S+)/) {
        # We pass the encoding from the header
        # to the HTML parser
        $opt{encoding} = $1;
      }
      $rtn = parse_html_string($text, \%opt);
    } elsif ($type =~ m/xml/io) {
      $rtn = parse_xml_string($text);
    } else {
      ERROR "Invalid $what Content-Type $type.";
      return undef;
    }
  };
  if ($@) {
    ERROR $@;
    return undef;
  }

  if ($what eq DATA) {
    # Data will not be modified
    $rtn->indexElements();
  }
  return $rtn;
}

#
# This is shamelessly stolen from Apache2::TrapSubRequest
#
sub Apache2::SubRequest::run_trapped {
  my ($r, $dataref) = @_;
  ERROR 'Usage: $subr->run_trapped(\$data)'
      unless ref $dataref eq 'SCALAR';

  $$dataref = '' unless defined $$dataref;
  $r->pnotes(__PACKAGE__, $dataref);
  $r->add_output_filter(\&_filter);
  
  $r->log->debug("Before trapped run:".$r->content_type);
  my $rv = $r->run;
  $r->log->debug("After trapped run:".$r->content_type);
  return $rv;
}

sub _filter {
  my ($f, $bb) = @_;
  my $r = $f->r;
  my $dataref = $r->pnotes(__PACKAGE__);

  $r->log->debug("In trap filter:".$r->content_type);

  $bb->flatten(my $string);
  $$dataref .= $string;
  # Do not send anything to the client
  $bb->cleanup;
  
  return Apache2::Const::OK;
}


#
# parse_cis_string_cached
# 
# - Same parameter as parse_cis_string
# return is copied to avoid modifications of
# the cached structure.
#
sub parse_cis_string_cached
{
  memoize_all();
  my $cjs = eval { Cindy::parse_cis_string($_[0]) };
  if ($@) {
    ERROR $@;
    return undef;
  }
  return dclone($cjs);
}

my $is_memoized = 0;
sub memoize_all
{
  return if ($is_memoized);
  memoize('Cindy::parse_cis_string');
  $is_memoized = 1;
}

#
# This wraps the function 
# ap_make_content_type from protocol.c
# that is not wrapped by mod_perl.
#
# The content type that apache 
# finally sends over the network is 
# make_content_type($r->content_type) 
# (see ap_http_header_filter in http_filters.c)
#
# This is (as far as I understand it) because of 
# the meaning of default (use, if no other is set).
# make_content_type adds the default charset that 
# has been set with AddDefaultCharset just 
# before sending the header.
#
package Apache2::RequestRec;

require XSLoader;
XSLoader::load('Cindy::Apache2', $VERSION);

1;
__END__

=head1 NAME

Cindy::Apache2 - use unmodified XML or HTML documents as templates.

=head1 SYNOPSIS

  RewriteEngine On
  RewriteRule ^/cindy/content/(.*)$  /cindy/content.data/$1 [NS,E=CINDY_DATA_URI:/cindy/content.data/$1]
 
  PerlModule Cindy::Apache2
  <Location /cindy/content>
  SetEnv CINDY_CIS_URI /cindy/tmpl/frame.cjs
  SetEnv CINDY_DOC_URI /cindy/tmpl/frame.html

  SetHandler  perl-script
  PerlHandler Cindy::Apache2
  </Location>


=head1 DESCRIPTION

C<Cindy::Apache2> uses the Cindy module in an apache
content handler. Cindy merges data into a document template 
using a content injection sheet to create its response. As
you see above it is used by configuring apache. This can 
be done from .htaccess.

Since the handler needs 3 components for a request 
their names are passed as 
enviroment variables. These are CINDY_DOC_URI, 
CINDY_DATA_URI and CINDY_CIS_URI. Alternatively
CINDY_DOC_FILE, CINDY_DATA_FILE, CINDY_CIS_FILE can be used.
While the former ones are used as URIs (similiar to SSIs 
include virtual), the latter
ones require a file system path. In each case an internal 
subrequest is made. This means that all three components
can be dynamically created.

If one of the components does not return a 200 status, 
processing is aborted and that status is returned. The 
last modified headers of the components are used to
either add a last modified header to the response or to
respond with a 304.

If the enviroment variable CINDY_FATALS_TO_BROWSER is set
error messages are forwarded to the browser.

=head1 AUTHOR

Joachim Zobel <jz-2008@heute-morgen.de> 

=head1 SEE ALSO

See the C<Cindy> documentation for further explanantions on 
content injection sheets and on what is done with those 3 files.

See L<http://www.heute-morgen.de/site/> for a more elaborate 
example of what can be done with Cindy::Apache2.