package Archive::Har;
use warnings;
use strict;
use English qw(-no_match_vars);
use Archive::Har::Creator();
use Archive::Har::Browser();
use Archive::Har::Page();
use Archive::Har::Entry();
use XML::LibXML();
use IO::Compress::Gzip();
use IO::Uncompress::Gunzip();
use JSON();
use overload '""' => 'string';
our $VERSION = '0.21';
sub new {
my ( $class, $params ) = @_;
my $self = {};
bless $self, $class;
$self->_init();
return $self;
}
sub _init {
my ($self) = @_;
foreach my $key ( keys %{$self} ) {
delete $self->{$key};
}
$self->{log} = {};
return;
}
sub gzip {
my ( $self, $gzipped ) = @_;
my $uncompressed = $self->string();
my $old;
IO::Compress::Gzip::gzip( \$uncompressed, \$old,
-Level => IO::Compress::Gzip::Z_BEST_COMPRESSION() )
or
Carp::croak("Failed to gzip HAR archive:$IO::Compress::Gzip::GzipError");
if ( defined $gzipped ) {
my $string;
IO::Uncompress::Gunzip::gunzip( \$gzipped, \$string )
or Carp::croak('Failed to gunzip HAR archive');
$self->string($string);
}
return $old;
}
sub hashref {
my ( $self, $ref ) = @_;
my $old = JSON->new()->utf8()->decode( $self->string() );
if ( ( @_ > 1 ) && ( defined $ref ) ) {
$self->_init();
$self->version( $ref->{log}->{version} );
$self->creator( Archive::Har::Creator->new( $ref->{log}->{creator} ) );
if ( defined $ref->{log}->{browser} ) {
$self->browser(
Archive::Har::Browser->new( $ref->{log}->{browser} ) );
}
if ( defined $ref->{log}->{pages} ) {
$self->pages( $ref->{log}->{pages} );
}
$self->entries( $ref->{log}->{entries} );
$self->comment( $ref->{log}->{comment} );
}
return $old;
}
sub string {
my ( $self, $string ) = @_;
if ( defined $string ) {
my $utf8_regex = qr/\xef\xbb\xbf/smx;
my $utf16_regex = qr/(?:\xfe\xff|\xff\xfe)/smx;
my $utf32_regex = qr/(?:\x00\x00\xfe\xff|\xff\xfe\x00\x00)/smx;
$string =~ s/^(?:$utf8_regex|$utf16_regex|$utf32_regex)//smxg;
}
my $json = JSON->new();
$json = $json->utf8();
$json = $json->allow_blessed(1);
$json = $json->convert_blessed(1);
$json = $json->pretty();
$json = $json->canonical(1);
my $old = $json->encode($self);
if ( ( @_ > 1 ) && ( defined $string ) ) {
my $ref = JSON->new()->utf8()->decode($string);
$self->hashref($ref);
}
return $old;
}
sub _xml_creator {
my ( $self, $ie_log ) = @_;
foreach my $ie_creator ( $ie_log->getChildrenByTagName('creator') ) {
$self->creator( Archive::Har::Creator->new() );
foreach my $ie_name ( $ie_creator->getChildrenByTagName('name') ) {
$self->creator()->name( $ie_name->findvalue('text()') );
}
foreach my $ie_value ( $ie_creator->getChildrenByTagName('version') ) {
$self->creator()->version( $ie_value->findvalue('text()') );
}
}
return;
}
sub _xml_browser {
my ( $self, $ie_log ) = @_;
foreach my $ie_browser ( $ie_log->getChildrenByTagName('browser') ) {
$self->browser( Archive::Har::Creator->new() );
foreach my $ie_name ( $ie_browser->getChildrenByTagName('name') ) {
$self->browser()->name( $ie_name->findvalue('text()') );
}
foreach my $ie_value ( $ie_browser->getChildrenByTagName('version') ) {
$self->browser()->version( $ie_value->findvalue('text()') );
}
}
return;
}
sub _xml_pages {
my ( $self, $ie_log ) = @_;
foreach my $ie_pages ( $ie_log->getChildrenByTagName('pages') ) {
my @pages;
foreach my $ie_page ( $ie_pages->getChildrenByTagName('page') ) {
my $page = Archive::Har::Page->new();
foreach my $ie_id ( $ie_page->getChildrenByTagName('id') ) {
$page->id( $ie_id->findvalue('text()') );
}
foreach my $ie_title ( $ie_page->getChildrenByTagName('title') ) {
$page->title( $ie_title->findvalue('text()') );
}
foreach my $ie_started (
$ie_page->getChildrenByTagName('startedDateTime') )
{
$page->started_date_time( $ie_started->findvalue('text()') );
}
my $page_timings = Archive::Har::Page::PageTimings->new();
foreach
my $ie_timings ( $ie_page->getChildrenByTagName('pageTimings') )
{
foreach my $ie_content (
$ie_timings->getChildrenByTagName('onContentLoad') )
{
$page_timings->on_content_load(
$ie_content->findvalue('text()') );
}
foreach
my $ie_load ( $ie_timings->getChildrenByTagName('onLoad') )
{
$page_timings->on_load( $ie_load->findvalue('text()') );
}
$page->page_timings($page_timings);
}
push @pages, $page;
}
$self->pages( \@pages );
}
return;
}
sub _xml_cookies {
my ( $self, $ie_object, $object ) = @_;
foreach my $ie_cookies ( $ie_object->getChildrenByTagName('cookies') ) {
my @cookies;
foreach my $ie_cookie ( $ie_cookies->getChildrenByTagName('cookie') ) {
my $cookie = Archive::Har::Entry::Cookie->new();
foreach my $ie_name ( $ie_cookie->getChildrenByTagName('name') ) {
$cookie->name( $ie_name->findvalue('text()') );
}
foreach my $ie_value ( $ie_cookie->getChildrenByTagName('value') ) {
$cookie->value( $ie_value->findvalue('text()') );
}
push @cookies, $cookie;
}
$object->cookies( \@cookies );
}
return;
}
sub _xml_headers {
my ( $self, $ie_object, $object ) = @_;
foreach my $ie_headers ( $ie_object->getChildrenByTagName('headers') ) {
my @headers;
foreach my $ie_header ( $ie_headers->getChildrenByTagName('header') ) {
my $header = Archive::Har::Entry::Header->new();
foreach my $ie_name ( $ie_header->getChildrenByTagName('name') ) {
$header->name( $ie_name->findvalue('text()') );
}
foreach my $ie_value ( $ie_header->getChildrenByTagName('value') ) {
$header->value( $ie_value->findvalue('text()') );
}
push @headers, $header;
}
$object->headers( \@headers );
}
return;
}
sub _xml_request {
my ( $self, $ie_entry, $entry ) = @_;
foreach my $ie_request ( $ie_entry->getChildrenByTagName('request') ) {
my $request = $entry->request();
foreach my $ie_method ( $ie_request->getChildrenByTagName('method') ) {
$request->method( $ie_method->findvalue('text()') );
}
foreach my $ie_url ( $ie_request->getChildrenByTagName('url') ) {
$request->url( $ie_url->findvalue('text()') );
}
foreach
my $ie_version ( $ie_request->getChildrenByTagName('httpVersion') )
{
$request->http_version( $ie_version->findvalue('text()') );
}
$self->_xml_cookies( $ie_request, $request );
$self->_xml_headers( $ie_request, $request );
foreach my $ie_query_string (
$ie_request->getChildrenByTagName('queryString') )
{
my @query_strings;
foreach
my $ie_param ( $ie_query_string->getChildrenByTagName('param') )
{
my $query_string =
Archive::Har::Entry::Request::QueryString->new();
foreach my $ie_name ( $ie_param->getChildrenByTagName('name') )
{
$query_string->name( $ie_name->findvalue('text()') );
}
foreach
my $ie_value ( $ie_param->getChildrenByTagName('value') )
{
$query_string->value( $ie_value->findvalue('text()') );
}
push @query_strings, $query_string;
}
$request->query_string( \@query_strings );
}
foreach
my $ie_post_data ( $ie_request->getChildrenByTagName('postData') )
{
my $post_data = Archive::Har::Entry::Request::PostData->new();
foreach my $ie_mime_type (
$ie_post_data->getChildrenByTagName('mimeType') )
{
$post_data->mime_type( $ie_mime_type->findvalue('text()') );
}
foreach my $ie_text ( $ie_post_data->getChildrenByTagName('text') )
{
$post_data->text( $ie_text->findvalue('text()') );
}
$request->post_data($post_data);
}
foreach my $ie_headers_size (
$ie_request->getChildrenByTagName('headersSize') )
{
$request->headers_size( $ie_headers_size->findvalue('text()') );
}
foreach
my $ie_body_size ( $ie_request->getChildrenByTagName('bodySize') )
{
$request->body_size( $ie_body_size->findvalue('text()') );
}
$entry->request($request);
}
return;
}
sub _xml_response {
my ( $self, $ie_entry, $entry ) = @_;
foreach my $ie_response ( $ie_entry->getChildrenByTagName('response') ) {
my $response = $entry->response();
foreach my $ie_status ( $ie_response->getChildrenByTagName('status') ) {
$response->status( $ie_status->findvalue('text()') );
}
foreach my $ie_status_text (
$ie_response->getChildrenByTagName('statusText') )
{
$response->status_text( $ie_status_text->findvalue('text()') );
}
foreach
my $ie_version ( $ie_response->getChildrenByTagName('httpVersion') )
{
$response->http_version( $ie_version->findvalue('text()') );
}
$self->_xml_cookies( $ie_response, $response );
$self->_xml_headers( $ie_response, $response );
foreach my $ie_content ( $ie_response->getChildrenByTagName('content') )
{
my $content = Archive::Har::Entry::Response::Content->new();
foreach
my $ie_mime_type ( $ie_content->getChildrenByTagName('mimeType') )
{
$content->mime_type( $ie_mime_type->findvalue('text()') );
}
foreach my $ie_text ( $ie_content->getChildrenByTagName('text') ) {
$content->text( $ie_text->findvalue('text()') );
}
foreach my $ie_size ( $ie_content->getChildrenByTagName('size') ) {
$content->size( $ie_size->findvalue('text()') );
}
$response->content($content);
}
foreach my $ie_redirect_url (
$ie_response->getChildrenByTagName('redirectionURL') )
{
$response->redirect_url( $ie_redirect_url->findvalue('text()') );
}
foreach my $ie_headers_size (
$ie_response->getChildrenByTagName('headersSize') )
{
$response->headers_size( $ie_headers_size->findvalue('text()') );
}
foreach
my $ie_body_size ( $ie_response->getChildrenByTagName('bodySize') )
{
$response->body_size( $ie_body_size->findvalue('text()') );
}
$entry->response($response);
}
return;
}
sub _xml_entries {
my ( $self, $ie_log ) = @_;
foreach my $ie_entries ( $ie_log->getChildrenByTagName('entries') ) {
my @entries;
foreach my $ie_entry ( $ie_entries->getChildrenByTagName('entry') ) {
my $entry = Archive::Har::Entry->new();
foreach
my $ie_pageref ( $ie_entry->getChildrenByTagName('pageref') )
{
$entry->pageref( $ie_pageref->findvalue('text()') );
}
foreach my $ie_started (
$ie_entry->getChildrenByTagName('startedDateTime') )
{
$entry->started_date_time( $ie_started->findvalue('text()') );
}
foreach
my $ie_timings ( $ie_entry->getChildrenByTagName('timings') )
{
my $timings = Archive::Har::Entry::Timings->new();
foreach
my $ie_send ( $ie_timings->getChildrenByTagName('send') )
{
$timings->send( $ie_send->findvalue('text()') );
}
foreach
my $ie_wait ( $ie_timings->getChildrenByTagName('wait') )
{
$timings->wait( $ie_wait->findvalue('text()') );
}
foreach my $ie_receive (
$ie_timings->getChildrenByTagName('receive') )
{
$timings->receive( $ie_receive->findvalue('text()') );
}
$entry->timings($timings);
}
$self->_xml_request( $ie_entry, $entry );
$self->_xml_response( $ie_entry, $entry );
push @entries, $entry;
}
$self->entries( \@entries );
}
return;
}
sub xml {
my ( $self, $xml ) = @_;
my $parser = XML::LibXML->new();
my $ie_dom = $parser->parse_string($xml);
my $ie_log = $ie_dom->documentElement();
$self->_init();
foreach my $ie_version ( $ie_log->getChildrenByTagName('version') ) {
$self->version( $ie_version->findvalue('text()') );
}
$self->_xml_creator($ie_log);
$self->_xml_browser($ie_log);
$self->_xml_pages($ie_log);
$self->_xml_entries($ie_log);
return;
}
sub version {
my ( $self, $new ) = @_;
my $old = $self->{log}->{version};
if ( @_ > 1 ) {
$self->{log}->{version} = $new;
}
if ( defined $old ) {
return $old;
}
else {
return '1.1';
}
}
sub creator {
my ( $self, $new ) = @_;
my $old = $self->{log}->{creator};
if ( @_ > 1 ) {
$self->{log}->{creator} = $new;
}
return $old;
}
sub browser {
my ( $self, $new ) = @_;
my $old = $self->{log}->{browser};
if ( @_ > 1 ) {
$self->{log}->{browser} = $new;
}
return $old;
}
sub pages {
my ( $self, $new ) = @_;
my $old = $self->{log}->{pages};
if ( @_ > 1 ) {
if ( defined $new ) {
$self->{log}->{pages} = [];
my $page_count = 0;
foreach my $page ( @{$new} ) {
if ( !defined $page->{id} ) {
$page->{id} = 'page_' . $page_count;
}
push @{ $self->{log}->{pages} }, Archive::Har::Page->new($page);
$page_count += 1;
}
}
}
if ( defined $old ) {
return @{$old};
}
else {
return ();
}
}
sub entries {
my ( $self, $entries ) = @_;
my $old = $self->{log}->{entries} || [];
if ( @_ > 1 ) {
$self->{log}->{entries} = [];
foreach my $entry ( @{$entries} ) {
push @{ $self->{log}->{entries} }, Archive::Har::Entry->new($entry);
}
}
return @{$old};
}
sub comment {
my ( $self, $comment ) = @_;
my $old = $self->{log}->{comment};
if ( @_ > 1 ) {
$self->{log}->{comment} = $comment;
}
return $old;
}
sub TO_JSON {
my ($self) = @_;
my $json = {};
$json->{version} = $self->version();
$json->{creator} = $self->creator();
if ( defined $self->browser() ) {
$json->{browser} = $self->browser();
}
if ( defined $self->pages() ) {
$json->{pages} = [ $self->pages() ];
}
$json->{entries} = [ $self->entries() ];
if ( defined $self->comment() ) {
$json->{comment} = $self->comment();
}
return { log => $json };
}
1; # End of Archive::Har
__END__
=head1 NAME
Archive::Har - Provides an interface to HTTP Archive (HAR) files
=head1 VERSION
Version '0.21'
=for stopwords xml gzip stringified HAR gzipped gzipping JSON hashref gunzipping
=head1 SYNOPSIS
use Archive::Har();
my $http_archive_string = '"log": { "version": "1.1", .... ';
my $har = Archive::Har->new();
$har->string($http_archive_string);
print $har->creator()->name() . ' version ' . $har->creator()->version();
$har->creator()->name("new name"); # update har
print $har->browser()->name() . ' version ' . $har->browser()->version();
foreach my $page = $har->pages()) {
$page->comment("Something interesting here");
print "Page Title: " . $page->title() . "\n";
}
print $har; # print har in stringified pretty form
...
=head1 DESCRIPTION
This Module is intended to provide an interface to create/read/update
entire HTTP Archive (HAR) files.
=head1 SUBROUTINES/METHODS
=head2 new
Archive::Har->new() will return a new HAR object, ready to process HTTP archives
=head2 string
$har->string() accepts a stringified version of an L<HTTP archive|http://www.softwareishard.com/blog/har-12-spec/> and parses it. It returns the previous state of the archive in stringified form
=head2 hashref
$har->hashref() accepts a hashref of the L<HTTP archive|http://www.softwareishard.com/blog/har-12-spec/> and parses it. It returns a hashref of the previous state of the archive
=head2 gzip
$har->gzip() accepts a gzipped version of an L<HTTP archive|http://www.softwareishard.com/blog/har-12-spec/> and parses it. It returns a gzipped version of the previous state of the archive
=head2 xml
$har->xml() accepts a stringified version of Internet Explorer's Network Inspector XML export and parses it. There is no return value
=head2 version
$har->version() will return the version of the HTTP Archive ('1.1' by default)
=head2 creator
$har->creator() will return the L<creator|Archive::Har::Creator> object for the HTTP Archive
=head2 browser
$har->browser() will return the L<browser|Archive::Har::Browser> object for the HTTP Archive
=head2 pages
$har->pages() will return the list of L<page|Archive::Har::Page> objects for the HTTP Archive
=head2 entries
$har->entries() will return the list of L<entry|Archive::Har::Entry> objects for the HTTP Archive
=head2 comment
$har->comment() will return the comment for the HTTP Archive
=head1 DIAGNOSTICS
=over
=item C<< Failed to gzip HAR archive >>
An error occurred while gzipping.
=item C<< Failed to gunzip HAR archive >>
An error occurred while gunzipping.
=back
=head1 CONFIGURATION AND ENVIRONMENT
Archive::Har requires no configuration files or environment variables.
=head1 DEPENDENCIES
Archive::Har requires the following non-core Perl modules
=over
=item *
L<JSON|JSON>
=item *
L<IO::Compress::Gzip|IO::Compress:Gzip>
=item *
L<IO::Uncompress::Gunzip|IO::Uncompress:Gunzip>
=item *
L<XML::LibXML|XML::LibXML>
=back
=head1 INCOMPATIBILITIES
None reported
=head1 SEE ALSO
L<HTTP Archive 1.2 Specification|http://www.softwareishard.com/blog/har-12-spec/>
=head1 AUTHOR
David Dick, C<< <ddick at cpan.org> >>
=head1 BUGS AND LIMITATIONS
Please report any bugs or feature requests to C<bug-archive-har at rt.cpan.org>, or through
the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Archive-Har>. I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.
=head1 LICENSE AND COPYRIGHT
Copyright 2015 David Dick.
This program is free software; you can redistribute it and/or modify it
under the terms of either: the GNU General Public License as published
by the Free Software Foundation; or the Artistic License.
See http://dev.perl.org/licenses/ for more information.