The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package SimpleXlsx;

use strict;
use warnings;
use Archive::Zip qw( :ERROR_CODES );
use XML::Simple;
use File::Basename;
use Data::Dumper;

require Exporter;

our @ISA = qw(Exporter);

# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.

our @EXPORT_OK = ( 'parse' );

our $VERSION = '0.05';

our $zip;

# Preloaded methods go here.

sub new {
  my $package = shift;
  
  $zip = Archive::Zip->new();
  return bless({}, $package);
}

sub getValues {
  my(@zStrings) = $zip->membersMatching('^xl/sharedStrings');
  
  if ($#zStrings > 0) {
    warn "Error: Multiple shared strings are not [yet] supported\n";
  }
  
  my($xml) = new XML::Simple;
  my($sstrings) = $zStrings[0];
  $sstrings = $sstrings->contents();
  my($tstrings) = $xml->XMLin($sstrings);
  
  my(@strings);
  for my $idx (0 .. $#{$tstrings->{'si'}}) {
    push @strings, $tstrings->{'si'}->[$idx]->{'t'};
  }
  
  return @strings;
}

sub getWorksheets {
  return $zip->membersMatching('^xl/worksheets');
}

sub getStyles {
  my(@zStyles) = $zip->membersMatching('^xl/styles');
  my($data) = $zStyles[0]->contents();
  
  my($xml) = new XML::Simple;
  $data = $xml->XMLin($data);
  
  my(@cellFormats);
  my(%fonts);
  my(%borders);
  
  my($xcellFormats) = $data->{'cellXfs'}->{'xf'};
  my($xfonts) = $data->{'fonts'}->{'font'};
  my($xborders) = $data->{'borders'}->{'border'};
  
  my($idx) = 0;
  if (ref($xfonts)) {
    for my $ind (0 .. $#{$xfonts}) {
      $fonts{$idx} = {
        'Name'    => $xfonts->[$ind]->{'name'}->{'val'},
        'Size'    => $xfonts->[$ind]->{'sz'}->{'val'},
        'Bold'    => defined $xfonts->[$ind]->{'b'} ? '1' : '0'
      };
    
      $idx++;
    }
  }
  
  $idx = 0;
  for my $ind (0 .. $#{$xborders}) {
    $borders{$idx} = {
      'Left'      => {
        'Color' => defined $xborders->[$ind]->{'left'}->{'color'}->{'indexed'} ? $xborders->[$ind]->{'left'}->{'color'}->{'indexed'} : '',
        'Style' => defined $xborders->[$ind]->{'left'}->{'style'} ? $xborders->[$ind]->{'left'}->{'style'} : ''
      },
      'Right'     => {
        'Color' => defined $xborders->[$ind]->{'right'}->{'color'}->{'indexed'} ? $xborders->[$ind]->{'right'}->{'color'}->{'indexed'} : '',
        'Style' => defined $xborders->[$ind]->{'right'}->{'style'} ? $xborders->[$ind]->{'right'}->{'style'} : ''
      },
      'Top'       => {
        'Color' => defined $xborders->[$ind]->{'top'}->{'color'}->{'indexed'} ? $xborders->[$ind]->{'top'}->{'color'}->{'indexed'} : '',
        'Style' => defined $xborders->[$ind]->{'top'}->{'style'} ? $xborders->[$ind]->{'left'}->{'top'} : ''
      },
      'Bottom'    => {
        'Color' => defined $xborders->[$ind]->{'bottom'}->{'color'}->{'indexed'} ? $xborders->[$ind]->{'bottom'}->{'color'}->{'indexed'} : '',
        'Style' => defined $xborders->[$ind]->{'bottom'}->{'style'} ? $xborders->[$ind]->{'bottom'}->{'style'} : ''
      },
      'Diagonal'  => {
        'Color' => defined $xborders->[$ind]->{'diagonal'}->{'color'}->{'indexed'} ? $xborders->[$ind]->{'diagonal'}->{'color'}->{'indexed'} : '',
        'Style' => defined $xborders->[$ind]->{'diagonal'}->{'style'} ? $xborders->[$ind]->{'diagonal'}->{'style'} : ''
      }
    };
    
    $idx++;
  }
  
  $idx = 0;
  for my $ind (0 .. $#{$xcellFormats}) {
    my($bix) = $xcellFormats->[$ind]->{'borderId'};
    push @cellFormats, {
      'fillId'    => $xcellFormats->[$ind]->{'fillId'},
      'Font'      => $fonts{$xcellFormats->[$ind]->{'fontId'}},
      'xfId'      => $xcellFormats->[$ind]->{'xfId'},
      'numFmtId'  => $xcellFormats->[$ind]->{'numFmtId'},
      'Border'    => $borders{$bix}
    };
    
    $idx++;
  }
  
  return \@cellFormats;
}

sub parse {
  my($self, $file) = @_;
  
  my($ret) = $zip->read($file);
  unless ($ret == AZ_OK) {
    warn "Unable to read file \"$file\" ($!)\n";
    return undef;
  }
  
  # For now we are only interested in worksheets and the shared strings
  my(@zWorksheets) = $self->getWorksheets();
  my(@strings) = $self->getValues();
  my($styles) = $self->getStyles();
  my(%worksheets);
  my(@sheetNames);
  
  $worksheets{'Worksheets'} = [];
  
  $worksheets{'Total Worksheets'} = ($#zWorksheets + 1);
  for my $file (@zWorksheets) {
    my(%worksheet);
    my($contents) = $file->contents();
    my($name) = basename($file->fileName());
    $name =~ s/\.xml$//;
    
    my($xml) = new XML::Simple;
    my($data) = $xml->XMLin($contents);
    
    my($sData) = $data->{'sheetData'}->{'row'};
    my($sMerge) = $data->{'mergeCells'}->{'mergeCell'};
    
    my(%merge);
    for my $mc (@{$sMerge}) {
      my($from, $to) = split(':', $mc->{'ref'});
      
      $from =~ /([a-zA-Z]+)([0-9]+)/;
      my($col1, $row1) = ($1, $2);
      
      $to =~ /([a-zA-Z]+)([0-9]+)/;
      my($col2, $row2) = ($1, $2);
      
      $merge{$row1} = {
        'From' => { 'Row' => $row1, 'Column' => $col1 },
        'To' => { 'Row' => $row2, 'Column' => $col2 }
      };
    }
    
    my(@tcol);
    for my $col (0 .. $#{$sData->[0]->{'c'}}) {
      push @tcol, $sData->[0]->{'c'}->[$col]->{'r'};
    }
    $worksheet{'Columns'} = \@tcol;
    
    my(@trow);
    my(%tdata);
    for my $row (0 .. $#{$sData}) {
      my($cols) = $sData->[$row]->{'c'};
      
      my(@rdata);
      my(@sdata);
      for my $col (0 .. $#{$cols}) {
        if (!defined $cols->[$col]->{'v'}) {
          push @rdata, '';
        }
        else {
          if (defined $cols->[$col]->{'t'}) {
            push @rdata, ($cols->[$col]->{'t'} eq 's' ?  $strings[$cols->[$col]->{'v'}] : $cols->[$col]->{'v'});
          }
        }

        if (defined $styles->[$cols->[$col]->{'s'}]) {
          push @sdata, $styles->[$cols->[$col]->{'s'}];
        }
      }

      if (defined $sData->[$row]->{'r'}) {
        push @trow, $sData->[$row]->{'r'};
        $tdata{$sData->[$row]->{'r'}}{'Data'} = \@rdata;
        $tdata{$sData->[$row]->{'r'}}{'Style'} = \@sdata;
      }
    }
    
    $worksheet{'Rows'} = \@trow;
    $worksheet{'Data'} = \%tdata;
    $worksheet{'Merge'} = \%merge;
    
    $worksheets{$name} = \%worksheet;
    
    push @sheetNames, $name;
  }
  
  $worksheets{'Worksheets'} = \@sheetNames;
  
  return \%worksheets;
}

1;
__END__

=head1 NAME

SimpleXlsx - Perl extension to read data from a Microsoft Excel 2007 XLSX file

=head1 SYNOPSIS

  use SimpleXlsx;
  
  my($xlsx) = SimpleXlsx->new();
  my($worksheets) = $xlsx->parse('/path/to/workbook.xlsx');

=head1 DESCRIPTION

SimpleXlsx is a rudamentary extension to allow parsing information stored in
Microsoft Excel XLSX spreadsheets.

=head2 EXPORT

None by default.

=head1 SEE ALSO

This module is intended as a quick method of extracting the raw data from
the XLSX file format. This module uses Archive::Zip to extract the contents
of the XLSX file and XML::Simple for parsing the contents.

=head1 AUTHOR

Joe Estock, E<lt>jestock@blendernet.comE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2010 by Joe Estock

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.8 or,
at your option, any later version of Perl 5 you may have available.

=cut