Toby Inkster > WWW-DataWiki-0.001 > WWW::DataWiki::HowNotToDoIt

Download:
WWW-DataWiki-0.001.tar.gz

Annotate this POD

Website

View/Report Bugs
Source  

NAME ^

WWW::DataWiki::HowNotToDoIt - version 0

SYNOPSIS ^

  #!/usr/bin/perl

  use 5.010;
  use common::sense;

  use CGI;
  use CGI::Carp 'fatalsToBrowser';
  use DateTime;
  use DateTime::Format::HTTP;
  use DateTime::Format::ISO8601;
  use DateTime::Format::Strptime;
  use Digest::MD5 qw[];
  use HTTP::Negotiate qw[choose];
  use PerlIO::gzip;

  # Defer loading RDF::Trine until we need it, as it's quite big
  # and we don't always need it.
  sub use_Trine
  {
    local $@;
    eval 'use RDF::TriN3;'
      unless RDF::Trine::Model->can('new');
    die $@ if $@;
  }

  # Defer loading RDF::Query until we need it, as it's quite big
  # and we don't always need it.
  sub use_Query
  {
    local $@;
    eval 'use RDF::Query;'
      unless RDF::Query->can('new');
    die $@ if $@;
  }

  # Parse requested URI
  our $CGI   = CGI->new;
  our $PATH  = '/home/tai/vhosts/wiki.ontologi.es/Versions/';
  our $URI   = 'http://wiki.ontologi.es/';
  our $V_URI = 'http://wiki.ontologi.es/Versions/';
  our $E_URI = 'http://buzzword.org.uk/2010/n3edit/?wiki=';
  our $COMPACTDATE = DateTime::Format::Strptime->new(pattern=>'%Y%m%dT%H%M%SZ');
  our ($SHORT,$DATE,$EXT) = split /\./, substr($CGI->path_info, 1);
  our $NS    = undef;

  our $namespaces = {
    };

  if ($SHORT =~ /^([a-z0-9-]+:)?([a-z][a-z0-9-]*[a-z0-9])$/)
  {
    ($NS,$SHORT) = ($1,$2);

    if ($NS)
    {
      $NS =~ s/:$//;

      die "Namespace $NS does not exist."
        unless $namespaces->{ $NS };

      $PATH = $namespaces->{ $NS }{'PATH'} ?
        $namespaces->{ $NS }{'PATH'} :
        ($PATH . $NS . '__');
      $URI  = $namespaces->{ $NS }{'URI'} ?
        $namespaces->{ $NS }{'URI'} :
        ($URI . $NS . ':');
      $V_URI= $namespaces->{ $NS }{'V_URI'} ?
        $namespaces->{ $NS }{'V_URI'} :
        ($V_URI . $NS . '__');
      $E_URI= $namespaces->{ $NS }{'E_URI'} ?
        $namespaces->{ $NS }{'E_URI'} :
        ($E_URI . $NS . ':');

      $NS .= ':';
    }
  }
  else
  {
    die "Unsupported page name. Must use only lower alphanumeric, plus hyphen.";
  }

  if ($DATE =~ /[a-su-y]/i and $DATE !~ /^latest$/i)
  {
    $EXT  = $DATE;
    $DATE = undef;
  }
  our @all_versions = sort
    map { $_ =~ s!^.*/([^/]+)\.n3.gz$!$1!; $_; }
    glob("${PATH}${SHORT}/*.n3.gz");

  # This allows a greater degree of flexibility than CGI::header.
  sub decent_headers
  {
    my (%headers) = @_;
    
    $headers{'Status'}        ||= '200 OK';
    $headers{'Content-Type'}  ||= 'text/plain; charset=utf-8';
    
    my @keys = sort
      {
        {
          'Status'        => 0 ,
          'Content-Type'  => 10 ,
        }->{$_} || 999
      }
      keys %headers;
    
    foreach my $h (@keys)
    {
      my @lines = (ref $headers{$h} eq 'ARRAY') ? @{ $headers{$h} } : ($headers{$h});
      foreach my $line (@lines)
      {
        printf("%s: %s\r\n", $h, (
          (ref $line eq 'ARRAY') ? (join ', ', @$line) : $line
          ));
      }
    }
    print "\r\n";
  }

  # HTTP 2xx/304 responses.
  sub SendData
  {
    my ($data, $datetime, $variant, $headers, $skip304check) = @_;

    $headers ||= {};
    my %headers = %$headers;
    
    my $digest = Digest::MD5->new;
    $digest->add($data);
    my $etag = $NS.$SHORT."\@${datetime}.${variant}".'='.$digest->clone->hexdigest;
    
    my $format = {
      'n3'        => 'text/n3; charset=utf-8',
      'nt'        => 'text/plain; charset=utf-8',
      'canonical' => 'text/plain; charset=utf-8',
      'html'      => 'text/html; charset=utf-8',
      'xhtml'     => 'application/xhtml+xml; charset=utf-8',
      'turtle'    => 'text/turtle; charset=utf-8',
      'rdf'       => 'application/rdf+xml; charset=utf-8',
      'json'      => 'application/json',
      }->{$variant} || 'application/octet-stream';
    
    $headers{'Content-Type'}     ||= $format;
    $headers{'MS-Author-Via'}    ||= ['DAV','SPARQL'];
    $headers{'Content-Base'}     ||= $URI.$SHORT;
    $headers{'Content-Location'} ||= $URI.$SHORT.'.'.$datetime.'.'.$variant;
    $headers{'Link'}             ||= [["<${URI}${SHORT}.latest>;rel=\"latest-version\""],
                           ["<${V_URI}${SHORT}/>;rel=\"version-history\""],
                           ["<${E_URI}${SHORT}>;rel=\"edit\";anchor=\"${URI}${SHORT}.latest\""]];
    $headers{'Last-Modified'}    ||= DateTime::Format::HTTP->format_datetime(
                              $COMPACTDATE->parse_datetime($datetime));
    $headers{'ETag'}             ||= "\"$etag\"";
    $headers{'Content-MD5'}      ||= $digest->b64digest.'==';
    $headers{'Vary'}             ||= [[qw(Accept Accept-Datetime Accept-Encoding)]];
    
    unless ($skip304check)
    {
      my ($condition) = checkConditions($etag, $datetime);
      if ($condition)
      {
        $headers{'Status'} = $condition;
        $data = '';
      }
    }
    
    decent_headers(%headers);
    print $data;
    exit;
  }

  # HTTP 3xx/4xx/5xx responses.
  sub SendError
  {
    my ($status, $body, $headers) = @_;

  #  open my $elog, ">>${PATH}/e.log";
  #  print $elog "$status >>>> $body\n";
  #  close $elog;

    $headers ||= {};
    my %headers = %$headers;

    $headers{'Status'}           ||= ($status || '599 Unspecified Error');
    $headers{'Content-Type'}     ||= 'text/plain';
    $headers{'MS]Author-Via'}    ||= ['SPARQL', 'DAV'];
    
    decent_headers(%headers);
    print $body . "\n";
    exit;
  }

  sub checkConditions
  {
    my ($etag, $datetime) = @_;
    
    if ($CGI->http('if_modified_since'))
    {
      my $ims = $COMPACTDATE->format_datetime(
              DateTime::Format::HTTP->parse_datetime(
                $CGI->http('if_modified_since')));
      if ($ims ge $datetime)
      {
        return ('304 Not Modified', "Has not been modified since request If-Modified-Since header.");
      }
    }
    
    if ($CGI->http('if_unmodified_since'))
    {
      my $ims = $COMPACTDATE->format_datetime(
              DateTime::Format::HTTP->parse_datetime(
                $CGI->http('if_unmodified_since')));
      if ($ims lt $datetime)
      {
        return ('412 Precondition Failed', "Has been modified since request If-Unmodified-Since header.");
      }
    }
    
    if ($CGI->http('if_none_match') =~ /^\s*\*\s*$/)
    {
      return '304 Not Modified';
    }
    elsif (length $CGI->http('if_none_match'))
    {
      my $header  = $CGI->http('if_none_match');
      $header =~ s!W/\"!\"!g;  # not issuing any weak etags
      $header =~ s/(^\s*\"|\"\s*$)//g;
      my @matchers = split /\"\s+\"/, $header;
      foreach my $m (@matchers)
      {
        if ($m eq $etag)
        {
          return ('304 Not Modified', "Matched tag $m in request If-None-Match header.");
        }
      }
    }
    
    if ($CGI->http('if_match') =~ /^\s*\*\s*$/)
    {
      # continue
    }
    elsif (length $CGI->http('if_match'))
    {
      my $header  = $CGI->http('if_match');
      $header =~ s!W/\"!\"!g;  # not issuing any weak etags
      $header =~ s/(^\s*\"|\"\s*$)//g;
      my @matchers = split /\"\s+\"/, $header;
      foreach my $m (@matchers)
      {
        if ($m eq $etag)
        {
          return;
        }
      }
      return ('412 Precondition Failed', "Tag $etag not matched in request If-Match header.");
    }
    
    return;
  }

  # Handle PUT/POST
  if ($CGI->request_method =~ /^(put|post)$/i)
  {
    SendError('405 Method Not Allowed', "Allowed: HEAD, GET. To PUT data, don't append datetime ($DATE) or format ($EXT) suffixes to the URL.")
      if defined $DATE || (defined $EXT and $EXT ne 'n3');
    
    # We've been posted/put some content with a content-type.
    my ($IN, $CT);
    if ($CGI->request_method =~ /put/i)
    {
      $IN = $CGI->param('PUTDATA');
      $CT = $CGI->content_type;
    }
    elsif ($CGI->request_method =~ /post/i and $CGI->content_type =~ /sparql.query/i)
    {
      $IN = $CGI->param('POSTDATA');
      $CT = $CGI->content_type;
    }
    elsif($CGI->param('data'))
    {
      $IN = $CGI->param('data');
      $CT = $CGI->param('format') || 'text/n3';
    }
    
    my $olddata;
    if (@all_versions)
    {
      my $oldversion = $all_versions[-1];
      local $/ = undef;
      open my $fh, "<:gzip", $PATH.$SHORT."/${oldversion}.n3.gz";
      $olddata = <$fh>;
      close $fh;
      my $digest = Digest::MD5->new;
      $digest->add($olddata);
      my $oldetag = $NS.$SHORT."\@${oldversion}.n3".'='.$digest->clone->hexdigest;
      my ($bail, $reason) = &checkConditions($oldetag, $oldversion);
      
      if ($bail)
      {
        SendError('412 Precondition Failed', $reason);
      }
    }
    else
    {
      if ($CGI->http('if_match') =~ /^\s*\*\s*$/)
      {
        SendError('412 Precondition Failed', "'If-Match: *' in request header did not match, as no such resource exists.");
      }
    }
    
    &use_Trine;
    
    # Check it's a supported type.
    my ($parser, $sparql);
    given ($CT)
    {
      when (/turtle/i)     { $parser = RDF::Trine::Parser::Turtle->new; }
      when (/n3/i)         { $parser = RDF::Trine::Parser::Notation3->new; }
      when (/html/i)       { $parser = RDF::Trine::Parser::RDFa->new; }
      when (/xml/i)        { $parser = RDF::Trine::Parser::RDFXML->new; }
      when (/json/i)       { $parser = RDF::Trine::Parser::RDFJSON->new; }
      when (/text.plain/i) { $parser = RDF::Trine::Parser::NTriples->new; }
      when (/sparql.query/){ $sparql++; }
      default
      {
        SendError("415 Unsupported Media Type ($CT)",
          'Acceptable: text/turtle, text/n3, text/plain (i.e. N-Triples), application/xhtml+xml (i.e. XHTML+RDFa 1.0), application/rdf+xml and application/json (i.e. RDF/JSON).');
      }
    }
    
    # Check it's syntactically sound
    my $model;
    
    if ($sparql)
    {
      # MS-Author-Via: SPARQL
      &use_Query;
      
      # These are horrible hacks.
      if (1 || $CGI->http('user-agent') =~ /firefox/i)
      {
        if ($IN =~ /^ \s* WHERE \s* { (.*) } \s* (INSERT|DELETE) \s* { (.*) } \s* $/six)
          { $IN = "$2 { $3 } WHERE { $1 }"; }
        $IN =~ s/INSERT/INSERT DATA/i
          unless $IN =~ /WHERE/i || $IN =~ /INSERT\s+DATA/is;
        $IN =~ s/DELETE/DELETE DATA/i
          unless $IN =~ /WHERE/i || $IN =~ /DELETE\s+DATA/is;
      }

      $@ = undef;
      eval {
        $model = RDF::Trine::Model->temporary_model;
        RDF::Trine::Parser::Notation3->new
          ->parse_into_model($URI.$SHORT, $olddata, $model);
        my $query = RDF::Query->new($IN,
          { update=>1, load_data=>0, base=>$URI.$SHORT, lang=>'sparql11' });
        die RDF::Query->error if defined RDF::Query->error;
        $query->execute($model);
      };
      if ($@)
      {
        SendError('422 Unprocessable Entity', "$IN => ".$@);
      }
    }
    else
    {
      $@ = undef;
      eval {  
        $model = RDF::Trine::Model->temporary_model;
        $parser->parse_into_model($URI.$SHORT, $IN, $model);
      };
      if ($@)
      {
        SendError('422 Unprocessable Entity', $@);
      }
    }
    
    # We want to save as Notation 3, not whatever the hell
    # format it was posted to us in.
    my $best;
    if (
      defined $parser
      and (
        $parser->isa('RDF::Trine::Parser::Turtle')
        or $parser->isa('RDF::Trine::Parser::Notation3')
        or $parser->isa('RDF::Trine::Parser::NTriples')
      )
    )
    {
      # It's already some flavour of N3.
      $best = $IN;
    }
    else
    {
      # Serialise to N3 (Turtle to be exact).
      my $ser = RDF::Trine::Serializer::Turtle->new;
      $best = $ser->serialize_model_to_string($model);
    }
    
    # Save it!
    my $now = DateTime->now(formatter=>$COMPACTDATE);
    $now->set_time_zone('UTC');
    mkdir $PATH.$SHORT
      unless -d $PATH.$SHORT;
    open my $fh, ">:gzip", $PATH.$SHORT."/${now}.n3.gz";
    print $fh $best;
    close $fh;
    
    # Respond.
    SendData($best, "$now", "n3", {Status => ($olddata ? '200 OK' : '201 Created')}, 1);  
  }

  # Handle GET/HEAD and command-line usage
  elsif ($CGI->request_method =~ /^(get|head|)$/i)
  {
    my ($chosen_version, $chosen_format);
    
    # BEGIN: Handle choosing version, including Accept-Datetime header.
    {
      unless (@all_versions)
      {
        SendError('404 Not Found', 'This page has not yet been created.');
      }
      
      # If no date given in URI, but requested in Accept-Datetime
      # header, then reformat that to the expected datetime format.
      if ($CGI->http('accept_datetime') && !$DATE)
      {
        $DATE = $COMPACTDATE->format_datetime(
          DateTime::Format::HTTP->parse_datetime(
            $CGI->http('accept_datetime')));
      }
      
      # if no date given, or latest version requested, find latest version.
      if ($DATE =~ /^latest$/ || !$DATE)
      {
        $chosen_version = $all_versions[-1];
        $chosen_version =~ s!^.*/([^/]+)\.n3.gz$!$1!;
      }
      # otherwise, find latest version that is no later than given date.
      elsif ($DATE)
      {
        my $req_date = DateTime::Format::ISO8601->parse_datetime($DATE);
        my $test = $COMPACTDATE->format_datetime($req_date);
        my @candidates = 
          sort
          grep { $_ le $test }
          @all_versions;
        $chosen_version = (@candidates) ? $candidates[-1] : undef;
      }
      
      # No appropriate version found.
      if (! $chosen_version)
      {
        if ($CGI->http('accept_datetime'))
        {
          my $suggested_version = (@all_versions) ? $all_versions[0] : undef;
          $suggested_version =~ s!^.*/([^/]+)\.n3.gz$!$1!;
          
          SendError('406 Not Acceptable',
            "You requested Accept-Datetime:  ".$CGI->http('accept_datetime')."\n".
            "Earliest available version:     ".DateTime::Format::HTTP->format_datetime($COMPACTDATE->parse_datetime($suggested_version))."\n"
            );
        }
        else
        {
          SendError('303 See Other', "See ${URI}${SHORT}",
            {-Location => $URI.$SHORT.($EXT?".${EXT}":'')});
        }
      }
    }
    # END: Handle choosing version, including Accept-Datetime header.
    
    # Choose format
    my $chosen_format = $EXT || choose([
      ['n3',       1.0,  'text/n3',     undef, 'utf-8'],
      ['turtle',   0.6,  'text/turtle', undef, 'utf-8'],
      ['nt',       0.6,  'text/plain',  undef, 'utf-8'],
      ['json',     0.6,  'application/json',      undef, 'utf-8'],
      ['rdf',      0.6,  'application/rdf+xml',   undef, 'utf-8'],
      ['xhtml',    0.02, 'application/xhtml+xml', undef, 'utf-8'],
      ['html',     0.01, 'text/html',   undef, 'utf-8'],
      ]) || 'n3';
    
    # Handle requests for Gzipped Notation 3.
    if ($chosen_format eq 'n3' and $CGI->http('Accept-Encoding')=~/gzip/i)
    {
      my $data;
      {
        local $/ = undef;
        open my $fh, "<", $PATH.$SHORT."/${chosen_version}.n3.gz";
        $data = <$fh>;
        close $fh;
      }
      SendData($data, $chosen_version, $chosen_format, {'Content-Encoding' => 'gzip'});
    }
    
    # Otherwise we're going to need to unzip the data...
    my $data;
    {
      local $/ = undef;
      open my $fh, "<:gzip", $PATH.$SHORT."/${chosen_version}.n3.gz";
      $data = <$fh>;
      close $fh;
    }
    
    # Handle requests for uncompressed Notation 3.
    if ($chosen_format eq 'n3')
    {
      SendData($data, $chosen_version, $chosen_format);
    }
    
    &use_Trine;
    
    # Sometimes, if we've been requested for Turtle/NTriples, we might
    # be able to get away with serving the Notation 3 as-is.
    if ($chosen_format =~ m/^(nt|turtle)$/)
    {
      # check that by attempting to parse it with a non-N3 parser.
      my $parser = $chosen_format eq 'nt' ?
        RDF::Trine::Parser::NTriples->new :
        RDF::Trine::Parser::Turtle->new;
      eval {
        my $tmpmodel = RDF::Trine::Model->temporary_model;
        $parser->parse_into_model($URI.$SHORT, $data, $tmpmodel);
      };
      # No errors, so...
      if (! $@)
      {
        SendData($data, $chosen_version, $chosen_format);
      }
    }
    
    # OK, we're going to need to reserialize the data...
    my $sclass = 'RDF::Trine::Serializer::' . {
      'rdf'       => 'RDFXML',
      'json'      => 'RDFJSON',
      'canonical' => 'NTriples::Canonical',
      'turtle'    => 'Turtle',
      'nt'        => 'NTriples',
      }->{$chosen_format};
    
    if ($sclass eq 'RDF::Trine::Serializer::' or !$sclass->can('new'))
    {
      SendError('404 Not Found', "No variant of '/${NS}${SHORT}' found with suffix '.${EXT}'. Try '.n3'??")
        unless ($chosen_format eq 'html' or $chosen_format eq 'xhtml');
    }

    my $parser = RDF::Trine::Parser::Notation3->new;
    my $tmpmodel_n3 = RDF::Trine::Model->temporary_model;
    $parser->parse_into_model($URI.$SHORT, $data, $tmpmodel_n3);
    my $tmpmodel = RDF::Trine::Model->temporary_model;
    my $iter = $tmpmodel_n3->as_stream;
    while (my $st = $iter->next)
    {
      $tmpmodel->add_statement($st) if $st->rdf_compatible;
    }

    if ($chosen_format eq 'html' or $chosen_format eq 'xhtml')
    {
      {
        local $@ = undef;
        eval 'use RDF::RDFa::Generator;';
        die $@ if $@;
        eval 'use HTML::HTML5::Writer;';
        die $@ if $@;
      }
      my $gen = RDF::RDFa::Generator->new(
        base     => $URI.$SHORT,
        safe_xml_literals => 1,
        style    => 'HTML::Pretty',
        );
      my $dom = $gen->inject_document(<<TEMPLATE, $tmpmodel);
  <html xmlns="http://www.w3.org/1999/xhtml">
    <head profile="http://www.w3.org/1999/xhtml/vocab">
      <title>Data Wiki: ${NS}${SHORT}</title>
      <style type="text/css">
        dt { font-weight: bold ; }
        div[about] { border: 2px solid green; background: #dfd; padding: 1em; margin: 1em 0; }
        img { float: right; }
      </style>
    </head>
    <body>
      <h1>${NS}${SHORT}</h1>
      <p>This is a page of data on the wiki.ontologi.es Data Wiki.</p>
      <h2>Data</h2>
    </body>
  </html>
  TEMPLATE

      if ($chosen_format eq 'html')
      {
        SendData(HTML::HTML5::Writer->new->document($dom), $chosen_version, $chosen_format);
      }
      else
      {
        SendData(HTML::HTML5::Writer->new(
          markup  => 'xhtml',
          doctype => HTML::HTML5::Writer->DOCTYPE_XHTML_RDFA
          )->document($dom), $chosen_version, $chosen_format);
      }
    }
    
    $data = $sclass->new(
      namespaces => $parser->{bindings}||{},  # little hack to retain prefixes.
      style      => 'HTML::Pretty',
      )->serialize_model_to_string($tmpmodel);
    
    SendData($data, $chosen_version, $chosen_format);
  }

  else
  {
    SendError('405 Method Not Allowed', "Allowed: HEAD, GET, PUT, POST.");
  }

SEE ALSO ^

WWW::DataWiki.

AUTHOR ^

Toby Inkster <tobyink@cpan.org>.

COPYRIGHT AND LICENCE ^

This software is copyright (c) 2010-2011 by Toby Inkster.

This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself.

DISCLAIMER OF WARRANTIES ^

THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.

syntax highlighting: