/* -*- c -*- */
/* gzip.xs
*
* Copyright (C) 2001, Nicholas Clark
*
* You may distribute this work under the terms of either the GNU General
* Public License or the Artistic License, as specified in perl's README
* file.
*
*/
#define PERL_NO_GET_CONTEXT
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include "perliol.h"
/* Constant associated with zip files: */
#define ZIP_ENDCENTRALDIRSIZE 22L
#define ZIP_ENDCENTRALDIRNEEDED 16
#define ZIP_CENTRALDIRENTRYSIZE 46
#define ZIP_LOCALFILEHEADERSIZE 30
#define ZIP_MAXCOMMENTLEN 65535
#define UNZIP_VERSION 20
#define ZIP_DIRATTR 0x10
#define ZIP_DEFLATED 8
#define ZIP_STORED 0
/* This associated with this implemementation: */
struct cache {
off_t size;
off_t progress;
time_t mtime;
};
#define CLASSNAME "ex::lib::zip"
#define LIBZIB_OBJ HV
#define UNZIP_SEARCH_BUFFERSIZE 512 + 3
/* returns the offset of the central directory, or 0 if it's not a valid
zip file (for any reason). */
static off_t
openzipfile (PerlIO *zipfile, off_t length)
{
off_t searchfrom;
off_t searchlen;
off_t searchoffset;
off_t toread;
off_t tosearch;
unsigned char buffer[UNZIP_SEARCH_BUFFERSIZE];
unsigned char *where;
dTHX;
if (length < ZIP_ENDCENTRALDIRSIZE) {
#ifdef DEBUG_LIBZIP
PerlIO_debug("length %ld < ZIP_ENDCENTRALDIRSIZE (%ld) so can't be zip\n",
(long)length, (long)ZIP_ENDCENTRALDIRSIZE);
#endif
return 0;
}
if (length < (ZIP_MAXCOMMENTLEN + ZIP_ENDCENTRALDIRSIZE)) {
searchfrom = 0;
searchlen = length - ZIP_ENDCENTRALDIRSIZE;
} else {
searchlen = ZIP_MAXCOMMENTLEN;
searchfrom = length - searchlen - ZIP_ENDCENTRALDIRSIZE;
}
/* First read just enough to work if there is zero comment. */
toread = 4 + ZIP_ENDCENTRALDIRNEEDED;
tosearch = 1;
searchoffset = searchfrom + searchlen;
#ifdef DEBUG_LIBZIP
PerlIO_debug("Search %ld to %ld, starting search from %ld, length %ld\n",
(long)searchfrom, (long)(searchfrom + searchlen),
(long)searchoffset, (long)toread);
#endif
while (1) {
unsigned char *buffer_end = buffer + toread;
where = buffer + tosearch;
if (PerlIO_seek (zipfile, searchoffset, SEEK_SET) == -1
|| (PerlIO_read (zipfile, buffer, toread) != toread)) {
#ifdef DEBUG_LIBZIP
PerlIO_debug("Read or seek failed\n");
#endif
return 0;
}
while (where-- > buffer) {
#if 0
#ifdef DEBUG_LIBZIP
if (isprint(where[0]))
PerlIO_debug(" %c\n", where[0]);
else
PerlIO_debug("%02X\n", where[0]);
#endif
#endif
if (where[0] == 0x50 && where[1] == 0x4b && where[2] == 0x05
&& where[3] == 0x06) {
/* Found the signature. */
int entries;
long size;
long offset;
/* Record where in the file we found the signature */
searchoffset += (where - buffer);
#ifdef DEBUG_LIBZIP
PerlIO_debug("Hit at %ld\n", (long)searchoffset);
#endif
/* Do we have enough of the end central directory to be useful?
*/
where +=4;
if ((where + ZIP_ENDCENTRALDIRNEEDED) > (buffer_end)) {
int got = buffer_end - where;
int need = ZIP_ENDCENTRALDIRNEEDED - got;
#ifdef DEBUG_LIBZIP
PerlIO_debug("Got %d, reading %d\n", got, need);
#endif
memmove (buffer, where, got);
/* File pointer will be at end of block we just buffered. */
if (PerlIO_read (zipfile, buffer + got, need) != need)
return 0;
where = buffer;
}
/* Got it. */
#ifdef DEBUG_LIBZIP
PerlIO_debug("Hit!\n");
#endif
if (where[0] || where[1] || where[2] || where[3]) {
#ifdef DEBUG_LIBZIP
PerlIO_debug("This is not disc zero, or central dir is not on disc zero.\n");
#endif
return 0;
}
entries = where[6] | (where[7] << 8);
size = where[8] | (where[9] << 8) | (where[10] << 16)
| (where[11] << 24);
offset = where[12] | (where[13] << 8) | (where[14] << 16)
| (where[15] << 24);
#ifdef DEBUG_LIBZIP
PerlIO_debug("%ld entries, offset %ld, size %ld, ends at %ld\n",
entries, (long)offset, (long)size, (long)(offset + size));
#endif
if ((offset == 0) || (size + offset != searchoffset)) {
/* Central directory is at start of zip (hence no files in it) or
calculated position of end of central directory does not match
actual location. */
#ifdef DEBUG_LIBZIP
PerlIO_debug("Bah. dir at zero, or not where expected.\n");
#endif
return 0;
}
#ifdef DEBUG_LIBZIP
PerlIO_debug("Success\n");
#endif
return offset; /* Start with first file of central dir. */
}
}
/* Run out of buffered data. */
toread = UNZIP_SEARCH_BUFFERSIZE-3;
searchoffset -= toread;
#ifdef DEBUG_LIBZIP
PerlIO_debug("reading another %ld bytes from offset %ld\n", (long)toread,
(long)searchoffset);
#endif
if (searchoffset < searchfrom) {
/* No longer a whole buffer left. */
toread -= (searchfrom - searchoffset);
if (toread == 0) {
/* Reading nothing means we are out of data. */
return 0;
}
searchoffset = searchfrom;
#ifdef DEBUG_LIBZIP
PerlIO_debug("------- another %ld bytes from offset %ld\n", (long)toread,
(long)searchoffset);
#endif
}
tosearch = toread;
/* Copy over the first 3 bytes. */
buffer[UNZIP_SEARCH_BUFFERSIZE-1] = buffer[2];
buffer[UNZIP_SEARCH_BUFFERSIZE-2] = buffer[1];
buffer[UNZIP_SEARCH_BUFFERSIZE-3] = buffer[0];
/* Loop. */
}
}
/* 0 is success. non-0 is failure. This subrouting checks the zip file
central directory (either our cache, or resume the linear search, for
the file named in the SV. */
static int
findfileinzipdir (HV *self, struct cache *aswas, PerlIO *fp, SV *file)
{
dTHX;
SV **entry;
STRLEN wantednamelen;
const char *wantedname = SvPV(file, wantednamelen);
/* Hash lookup filename, before resuming linear search. */
entry = hv_fetch (self, wantedname, wantednamelen, 0);
if (entry) {
off_t offset = SvIV (*entry);
#ifdef DEBUG_LIBZIP
PerlIO_debug("Hash success for %.*s at offset %ld\n", (int)wantednamelen,
wantedname, (long)offset);
#endif
/* Return 0 if PerlIO_seek succeeds. */
return PerlIO_seek (fp, offset, SEEK_SET) == -1;
}
#ifdef DEBUG_LIBZIP
PerlIO_debug("Hash fail for %.*s\n", (int)wantednamelen, wantedname);
#endif
/* File lookup. */
if (aswas->progress == 0 || PerlIO_seek (fp, aswas->progress, SEEK_SET) == -1)
return -1;
while (1) {
unsigned char buffer[ZIP_CENTRALDIRENTRYSIZE];
off_t offset;
int filenamelen;
int skiplen;
if (PerlIO_read (fp, buffer, sizeof (buffer)) != sizeof (buffer)
|| buffer[0] != 0x50 || buffer[1] != 0x4b || buffer[2] != 0x01
|| buffer[3] != 0x02 || buffer[34] || buffer[35]) {
/* Failed to read buffer, or incorrect signature (either end of central
directory signature or garbage), or disk number start is not zero.
Mark central directory as all read. */
#ifdef DEBUG_LIBZIP
if (buffer[0] == 0x50 && buffer[1] == 0x4b && buffer[2] == 0x05
&& buffer[3] == 0x06)
PerlIO_debug("End of the central directory\n");
else
PerlIO_debug("Failed to read buffer, or buffer bad\n");
#endif
aswas->progress;
return -1;
}
/* signature (PK\01\02) 4 bytes 0- 3 */
/* version made by 2 bytes 4, 5 */
/* version needed to extract 2 bytes 6, 7 */
/* general purpose bit flag 2 bytes 8, 9 */
/* compression method 2 bytes 10,11 */
/* last mod file time 2 bytes 12,13 */
/* last mod file date 2 bytes 14,15 */
/* crc-32 4 bytes 16-19 */
/* compressed size 4 bytes 20-23 */
/* uncompressed size 4 bytes 24-27 */
/* filename length 2 bytes 28,29 */
/* extra field length 2 bytes 30,31 */
/* file comment length 2 bytes 32,33 */
/* disk number start 2 bytes 34,35 */
/* internal file attributes 2 bytes 36,37 */
/* external file attributes 4 bytes 38-41 */
/* relative offset of local header 4 bytes 42-45 */
/* filename (variable size) */
/* extra field (variable size) */
/* file comment (variable size) */
filenamelen = buffer[28] | (buffer[29] << 8);
skiplen = (buffer[30] | (buffer[31] << 8))
+ (buffer[32] | (buffer[33] << 8)); /* Extra field plus comment. */
/* Is it a directory? */
if (buffer[38] & ZIP_DIRATTR) {
#ifdef DEBUG_LIBZIP
PerlIO_debug("Skipping directory\n");
#endif
if (PerlIO_seek (fp, filenamelen + skiplen, SEEK_CUR) == -1) {
#ifdef DEBUG_LIBZIP
PerlIO_debug("Skipping directory failed\n");
#endif
aswas->progress = 0;
return -1;
}
} else {
/* It's (probably) a file. */
char *filename;
New (103,filename,filenamelen,char);
if (!filename || PerlIO_read (fp, filename, filenamelen) != filenamelen) {
Safefree (filename);
#ifdef DEBUG_LIBZIP
PerlIO_debug("All going pear shaped :-(\n");
#endif
/* Everything is going to go pear shaped at this point. */
aswas->progress = 0;
return -1;
}
#ifdef DEBUG_LIBZIP
if (skiplen > sizeof (buffer))
PerlIO_debug("Skipping %ld byte(s) from %ld to ", skiplen,
(long) PerlIO_tell (fp));
#endif
if (skiplen && ((skiplen > sizeof (buffer))
/* Seek if we can't fread() the extra field into our (small) buffer. */
? (PerlIO_seek (fp, skiplen, SEEK_CUR) == -1)
/* assume that for 42 bytes buffered fread() is faster than fseek() */
: (PerlIO_read (fp, buffer, skiplen) != skiplen)))
{
#ifdef DEBUG_LIBZIP
PerlIO_debug("Skip failed :-(\n");
#endif
Safefree (filename);
aswas->progress = 0;
return -1;
}
#ifdef DEBUG_LIBZIP
if (skiplen)
PerlIO_debug("%ld\n", PerlIO_tell (fp));
#endif
offset = buffer[42] | (buffer[43] << 8) | (buffer[44] << 16)
| (buffer[45] << 24);
#ifdef DEBUG_LIBZIP
PerlIO_debug("file %.*s at offset %ld\n", (int)filenamelen,
filename, (long)offset);
#endif
/* Hash it */
entry = hv_fetch (self, filename, filenamelen, 1);
if (entry)
sv_setiv (*entry, offset);
#ifdef DEBUG_LIBZIP
else
PerlIO_debug("problem making hash entry\n");
#endif
if ((filenamelen == wantednamelen)
&& memEQ (filename, wantedname, wantednamelen)) {
/* Found the file. */
Safefree (filename);
aswas->progress = PerlIO_tell(fp);
if (aswas->progress == -1)
aswas->progress = 0;
#ifdef DEBUG_LIBZIP
PerlIO_debug("Got it, returning seek() return, progress now %ld\n",
aswas->progress);
#endif
if (PerlIO_seek (fp, offset, SEEK_SET) != -1)
return 0; /* Return success - fp now points to local entry. */
return -1;
}
Safefree (filename);
}
} /* loop forever */
}
/* Return true only if successful location of file in zip and successful
installation of layer discipline to deal with it (inflating or counting). */
static int
findfile (HV *self, struct cache *aswas, PerlIO *fp, SV *file)
{
dTHX;
/* Make four byte values word aligned. Hopefully optimisers on little
endian compilers that can't read unaligned can spot this. */
unsigned char buffer[ZIP_LOCALFILEHEADERSIZE + 2];
int filenamelen;
int skiplen;
char *filename;
STRLEN wantednamelen;
const char *wantedfilename = SvPV(file, wantednamelen);
int status;
if (findfileinzipdir (self, aswas, fp, file)) {
#ifdef DEBUG_LIBZIP
PerlIO_debug("findfileinzipdir failed, returning.\n");
#endif
return -1;
}
#ifdef DEBUG_LIBZIP
PerlIO_debug("You are at %ld\n", (long) PerlIO_tell (fp));
#endif
if ((PerlIO_read (fp, buffer + 2, sizeof (buffer) - 2)
!= sizeof (buffer) - 2)
|| buffer[2] != 0x50 || buffer[3] != 0x4b || buffer[4] != 0x03
|| buffer[5] != 0x04 || buffer[6] > UNZIP_VERSION || buffer[11]
|| !(buffer[10] == ZIP_DEFLATED
|| (buffer[10] == ZIP_STORED && ((buffer[8] & 8) == 0)))) {
#ifdef DEBUG_LIBZIP
PerlIO_debug("Local header failed: %x%x%x%x %d\n", buffer[2], buffer[3],
buffer[4], buffer[5], buffer[6]);
#endif
/* Failed to read buffer, or incorrect signature, or too recent for us,
or not (compression method is deflation or (compression method is
stored and sizes follow it)). */
return -1;
}
/* as it's loaded 2 off it's: */
/* signature (PK\03\04) 4 bytes 2- 5 */
/* version needed to extract 2 bytes 6, 7 */
/* general purpose bit flag 2 bytes 8, 9 */
/* compression method 2 bytes 10,11 */
/* last mod file time 2 bytes 12,13 */
/* last mod file date 2 bytes 14,15 */
/* crc-32 4 bytes 16-19 */
/* compressed size 4 bytes 20-23 */
/* uncompressed size 4 bytes 24-27 */
/* filename length 2 bytes 28,29 */
/* extra field length 2 bytes 30,31 */
/* filename (variable size) */
/* extra field (variable size) */
filenamelen = buffer[28] | (buffer[29] << 8);
skiplen = (buffer[30] | (buffer[31] << 8)); /* Extra field. */
if ((filenamelen != wantednamelen)
|| !(New(103, filename, filenamelen + skiplen, char)))
return -1;
status = (PerlIO_read (fp, filename, filenamelen + skiplen)
== (filenamelen + skiplen))
|| memNE (filename, wantedfilename, filenamelen);
Safefree (filename);
if (!status)
return -1;
#ifdef DEBUG_LIBZIP
PerlIO_debug("Compression method %d\n", buffer[10]);
#endif
#ifdef DEBUG_LIBZIP
PerlIO_debug("You are at %ld\n", (long) PerlIO_tell (fp));
#endif
{
const char *layer_name;
STRLEN layer_len;
PerlIO_funcs *layer;
SV *arg;
int result;
/* ZIP_STORED is zero, ZIP_DEFLATED is 8. */
if (buffer[10]) {
layer_name = "gzip";
layer_len = 4;
arg = newSVpvn("none",4);
} else {
/* The number of bytes we need to copy. */
UV offset = (buffer[24] | (buffer[25] << 8)
| (buffer[26] << 16) | (buffer[27] << 24));
layer_name = "subfile";
layer_len = 7;
arg = newSVuv(offset);
}
layer = PerlIO_find_layer(aTHX_ layer_name, layer_len, 0);
if (!layer)
Perl_croak(aTHX_ CLASSNAME " failed to find layer \"%s\"", layer_name);
result = PerlIO_push(aTHX_ fp, layer, NULL, arg) ? 0 : -1;
#ifdef DEBUG_LIBZIP
PerlIO_debug("Apply layer of %s gave %d\n", layer_name, result);
#endif
return result;
}
}
MODULE = ex::lib::zip PACKAGE = ex::lib::zip
PROTOTYPES: ENABLE
SV *
new (class, file)
char * class
SV * file
CODE:
{
dTHR;
STRLEN len;
HV *stash = gv_stashpv(class, 1);
HV *self;
SV *self_ref;
SV **entry;
/* A cache struct all zero. Read only. */
static const struct cache zeros;
if (!stash)
XSRETURN_UNDEF;
self = newHV();
if (!self)
XSRETURN_UNDEF;
/* It really doesn't matter what the second pointer is, as it's
length zero. We're setting $self->{''} */
entry = hv_fetch (self, (char *)self, 0, 1);
if (!entry) {
SvREFCNT_dec(self);
XSRETURN_UNDEF;
}
sv_setpvn(*entry, (const char *)&zeros, sizeof(zeros));
sv_catsv(*entry, file);
self_ref = newRV_noinc((SV *)self);
RETVAL = sv_bless(self_ref, stash);
}
OUTPUT:
RETVAL
SV *
name (self)
LIBZIB_OBJ * self
CODE:
{
SV **entry = hv_fetch (self, (char *)self, 0, 0);
const char *name;
STRLEN namelen;
if (!entry)
XSRETURN_UNDEF;
name = SvPV(*entry, namelen);
if (namelen < sizeof(struct cache))
XSRETURN_UNDEF;
RETVAL = newSVpvn(name + sizeof(struct cache),
namelen - sizeof(struct cache));
}
OUTPUT:
RETVAL
PerlIO *
INC (self, file)
LIBZIB_OBJ * self
SV * file
CODE:
{
SV **zipfile = hv_fetch (self, (char *)self, 0, 0);
const char *name;
struct cache *aswas;
STRLEN namelen;
Stat_t isnow;
PerlIO *fp;
if (!zipfile)
XSRETURN_UNDEF;
aswas = (struct cache *) SvPV(*zipfile, namelen);
if (namelen < sizeof(struct cache))
XSRETURN_UNDEF;
/* Right, now we know what the file is called, and how long/how old it
was last time we looked, let's stat it. */
name = ((const char *)aswas) + sizeof(struct cache);
if (PerlLIO_stat(name, &isnow) < 0 || !S_ISREG(isnow.st_mode))
XSRETURN_UNDEF;
fp = PerlIO_open (name, "rb");
if (!fp)
XSRETURN_UNDEF;
if (!(isnow.st_size == aswas->size && isnow.st_mtime == aswas->mtime)) {
/* It's changed size or timestamp. (Or we've never looked at it
before. */
if (aswas->size) {
/* It had non-zero size before. Therefore need to scrub the hash,
but retain the '' entry. I *think* increasing its refcount is
the way to do it. */
SV *me = SvREFCNT_inc(*zipfile);
hv_clear (self);
zipfile = hv_fetch (self, (char *)self, 0, 1);
if (!zipfile) {
SvREFCNT_dec(me);
PerlIO_close(fp);
croak (CLASSNAME "::INC failed to rebuild cache");
XSRETURN_UNDEF;
}
/* making the SV mortal decreases its refcount at end of scope.
Moreover, it should make the copy to *zipfile more efficient. */
sv_setsv(*zipfile, sv_2mortal(me));
aswas = (struct cache *) SvPV_nolen(*zipfile);
name = ((const char *)aswas) + sizeof(struct cache);
}
aswas->size = isnow.st_size;
aswas->mtime = isnow.st_mtime;
/* Failure to find a zip header returns zero. Linear search finished
is also zero. So failure only happens once, and the (now emptied)
cache is accurate. */
aswas->progress = openzipfile (fp, isnow.st_size);
/* Right. Cache now scrubbed, search position reset. */
}
if (findfile (self, aswas, fp, file) == 0)
RETVAL = fp;
else {
PerlIO_close(fp);
XSRETURN_UNDEF;
}
}
OUTPUT:
RETVAL