6 * Written by Keith Marshall <keithmarshall@users.sourceforge.net>
7 * Copyright (C) 2009, MinGW Project
10 * Implementation of the streaming data filters, which will be used
11 * for reading package archives in any supported compression format;
12 * currently supported formats are:--
21 * This is free software. Permission is granted to copy, modify and
22 * redistribute this software, under the provisions of the GNU General
23 * Public License, Version 3, (or, at your option, any later version),
24 * as published by the Free Software Foundation; see the file COPYING
25 * for licensing details.
27 * Note, in particular, that this software is provided "as is", in the
28 * hope that it may prove useful, but WITHOUT WARRANTY OF ANY KIND; not
29 * even an implied WARRANTY OF MERCHANTABILITY, nor of FITNESS FOR ANY
30 * PARTICULAR PURPOSE. Under no circumstances will the author, or the
31 * MinGW Project, accept liability for any damages, however caused,
32 * arising from the use of this software.
40 * MS-Windows nuisances...
41 * Files are expected to be either explicitly text or binary;
42 * (UNIX makes no such specific distinction). We want to force
43 * treatment of all files as binary; define a "no-op" substitute
44 * for the appropriate MS-Windows attribute, for when we compile
45 * on UNIX, so we may henceforth just use it unconditionally.
48 # define O_BINARY _O_BINARY
54 /* We need to enable PKGSTRM_H_SPECIAL awareness, when we compile this...
56 #define PKGSTRM_H_SPECIAL 1
61 * Class Implementation: pkgArchiveStream
63 * This class uses a default constructor and default virtual destructor.
64 * We never instantiate objects of this class directly; all derived classes
65 * provide their own specialised constructors and destructors, together with
66 * a mandatory specialised "Read" method.
68 * We do, however, provide one generic "GetRawData" method, which derived
69 * classes may adopt, or may override, as necessary...
72 int pkgArchiveStream::GetRawData( int fd, uint8_t *buf, size_t max )
74 /* Generic helper function for reading a compressed data stream into
75 * its decompressing filter's input buffer. The default implementation
76 * assumes a file stream, and simply invokes a read() request; however,
77 * we segregate this function, to facilitate an override to handle
78 * other input streaming capabilities.
80 return read( fd, buf, max );
85 * Class Implementation: pkgRawArchiveStream
87 * This is the simplest archive stream class, suitable for archives
88 * which have been stored WITHOUT compression...
91 pkgRawArchiveStream::pkgRawArchiveStream( const char *filename )
93 /* The constructor has little to to, but to open the archive file
94 * and associate a file descriptor with the resultant data stream.
96 fd = open( filename, O_RDONLY | O_BINARY );
99 pkgRawArchiveStream::~pkgRawArchiveStream()
101 /* The destructor needs only to close the data stream.
106 int pkgRawArchiveStream::Read( char *buf, size_t max )
108 /* While the stream reader simply transfers the requested number
109 * of bytes from the stream, to the caller's buffer.
111 return read( fd, buf, max );
116 * Class Implementation: pkgGzipArchiveStream
118 * This class creates an input streaming interface, suitable for
119 * reading archives which have been stored with gzip compression.
120 * The implementation is based on the use of libz.a, which allows
121 * for a similar implementation to that of pkgRawArchiveStream.
124 pkgGzipArchiveStream::pkgGzipArchiveStream( const char *filename )
126 /* Once more, the constructor has little to do but open the stream;
127 * in this case, the method is analogous to C's fopen().
129 stream = gzopen( filename, "rb" );
132 pkgGzipArchiveStream::~pkgGzipArchiveStream()
134 /* Another destructor, with little to do but close the stream; the
135 * gzclose() call suffices for the purpose.
140 int pkgGzipArchiveStream::Read( char *buf, size_t max )
142 /* The reader is again served by a single function call, to transfer
143 * the requested volume of decompressed data from the raw input file
144 * to the caller's buffer.
146 return gzread( stream, buf, max );
151 * Class Implementation: pkgBzipArchiveStream
153 * This class creates an input streaming interface, suitable for
154 * reading archives which have been stored with bzip2 compression.
155 * The implementation is based on the use of libbz2.a, which again
156 * allows for a fairly simple implementation, which is also quite
157 * analogous to that of pkgRawArchiveStream.
160 pkgBzipArchiveStream::pkgBzipArchiveStream( const char *filename )
162 /* The constructor carries a marginal additional overhead, in
163 * that it must first open a regular file, before associating
164 * a bzip2 control structure with it; subsequent stream access
165 * is directed exclusively through that control structure.
167 FILE *streamfile = fopen( filename, "rb" );
168 stream = BZ2_bzReadOpen( &bzerror, streamfile, 0, 0, 0, 0 );
171 pkgBzipArchiveStream::~pkgBzipArchiveStream()
173 /* For the destructor, it is again just a matter of closing
174 * the bzip2 stream; (this also takes care of closing the
175 * associated file stream).
177 BZ2_bzReadClose( &bzerror, stream );
180 int pkgBzipArchiveStream::Read( char *buf, size_t max )
182 /* Once again, reading is a simple matter of transferring
183 * the requisite number of bytes to the caller's buffer.
185 return BZ2_bzRead( &bzerror, stream, buf, max );
190 * Class Implementation: pkgLzmaArchiveStream
192 * This class creates an input streaming interface, suitable for
193 * reading archives which have been stored with lzma compression;
194 * based on the use of liblzma.a, this implements an adaptation of
195 * Lasse Collin's "xzdec" code, as configured for use as an lzma
202 /* Naively cap the memory available to lzma and xz decoders.
204 * FIXME: libarchive appears to use this; however, Lasse Collin
205 * provides a more sophisticated method for xz, based on actual
206 * physical memory footprint; we should adopt it.
208 return 1ULL << 23 + 1ULL << 21;
212 void lzma_stream_initialise( lzma_stream *stream )
214 /* This simple helper provides a static template, which is
215 * used to define initial state for lzma and xz decoders.
217 static const lzma_stream stream_template = LZMA_STREAM_INIT;
218 *stream = stream_template;
220 * ...mark the input buffer as initially empty.
222 stream->avail_in = 0;
225 pkgLzmaArchiveStream::pkgLzmaArchiveStream( const char *filename )
227 /* The constructor must first open a file stream...
229 if( (fd = open( filename, O_RDONLY | O_BINARY )) >= 0 )
231 /* ...then set up the lzma decoder, in appropriately
232 * initialised state...
234 lzma_stream_initialise( &stream );
235 status = lzma_alone_decoder( &stream, memlimit() );
239 pkgLzmaArchiveStream::pkgLzmaArchiveStream( int fileno ):fd( fileno )
241 /* ...then set up the lzma decoder, in appropriately
242 * initialised state...
244 lzma_stream_initialise( &stream );
245 status = lzma_alone_decoder( &stream, memlimit() );
248 pkgLzmaArchiveStream::~pkgLzmaArchiveStream()
250 /* The destructor frees memory resources allocated to the decoder,
251 * and closes the input stream file descriptor.
253 * FIXME: The lzma_alone_decoder may indicate end-of-stream, before
254 * the physical input data stream is exhausted. For now, we silently
255 * ignore any such residual data; (it is likely to be garbage anyway).
256 * Should we handle it any more explicitly?
262 int pkgLzmaArchiveStream::Read( char *buf, size_t max )
264 /* Read an lzma compressed data stream; store up to "max" bytes of
265 * decompressed data into "buf".
267 * Start by directing the decoder to use "buf", initially marking it
270 stream.next_out = (uint8_t *)(buf);
271 stream.avail_out = max;
273 while( (stream.avail_out > 0) && (status == LZMA_OK) )
275 /* "buf" hasn't been filled yet, and the decoder continues to say
276 * that more data may be available.
278 if( stream.avail_in == 0 )
280 /* We exhausted the current content of the raw input buffer;
283 stream.next_in = streambuf;
284 if( (stream.avail_in = GetRawData( fd, streambuf, BUFSIZ )) < 0 )
286 /* FIXME: an I/O error occurred here: need to handle it!!!
291 /* Run the decoder, to decompress as much as possible of the data
292 * currently in the raw input buffer, filling available space in
293 * "buf"; go round again, in case we exhausted the raw input data
294 * before we ran out of available space in "buf".
296 status = lzma_code( &stream, LZMA_RUN );
299 /* When we get to here, we either filled "buf" completely, or we
300 * completely exhausted the raw input stream; in either case, we
301 * return the actual number of bytes stored in "buf", (i.e. its
302 * total size, less any residual free space).
304 return max - stream.avail_out;
309 * Class Implementation: pkgXzArchiveStream
311 * This class creates an input streaming interface, suitable for
312 * reading archives which have been stored with xz compression;
313 * again based on the use of liblzma.a, this implements a further
314 * adaptation of Lasse Collin's "xzdec" code, as configured for
315 * use as an xz decompressor.
318 pkgXzArchiveStream::pkgXzArchiveStream( const char *filename )
320 /* The constructor must first open a file stream...
322 if( (fd = open( filename, O_RDONLY | O_BINARY )) >= 0 )
324 /* ...then set up the lzma decoder, in appropriately
325 * initialised state...
327 lzma_stream_initialise( &stream );
328 status = lzma_stream_decoder( &stream, memlimit(), LZMA_CONCATENATED );
330 /* Finally, recognising that with LZMA_CONCATENATED data,
331 * we will eventually need to switch the decoder from its
332 * initial LZMA_RUN state to LZMA_FINISH, we must provide
333 * a variable to specify the active state, (which we may
334 * initialise for the LZMA_RUN state).
340 pkgXzArchiveStream::~pkgXzArchiveStream()
342 /* This destructor frees memory resources allocated to the decoder,
343 * and closes the input stream file descriptor; unlike the preceding
344 * case of the lzma_alone_decoder, the lzma_stream_decoder guarantees
345 * that there is no trailing garbage remaining from the input stream.
351 int pkgXzArchiveStream::Read( char *buf, size_t max )
353 /* Read an xz compressed data stream; store up to "max" bytes of
354 * decompressed data into "buf".
356 * Start by directing the decoder to use "buf", initially marking it
359 stream.next_out = (uint8_t *)(buf);
360 stream.avail_out = max;
362 while( (stream.avail_out > 0) && (status == LZMA_OK) )
364 /* "buf" hasn't been filled yet, and the decoder continues to say
365 * that more data may be available.
367 if( stream.avail_in == 0 )
369 /* We exhausted the current content of the raw input buffer;
372 stream.next_in = streambuf;
373 if( (stream.avail_in = GetRawData( fd, streambuf, BUFSIZ )) < 0 )
375 /* FIXME: an I/O error occurred here: need to handle it!!!
379 else if( stream.avail_in < BUFSIZ )
381 /* A short read indicates end-of-input...
382 * Unlike the case of the lzma_alone_decoder, (as used for
383 * decompressing lzma streams), the lzma_stream_decoder, (when
384 * initialised for LZMA_CONCATENATED data, as we use here), may
385 * run lzma_code in either LZMA_RUN or LZMA_FINISH mode; the
386 * normal mode is LZMA_RUN, but we switch to LZMA_FINISH
387 * when we have exhausted the input stream.
389 opmode = LZMA_FINISH;
393 /* Run the decoder, to decompress as much as possible of the data
394 * currently in the raw input buffer, filling available space in
395 * "buf"; as noted above, "opmode" will be LZMA_RUN, until we have
396 * exhausted the input stream, when it becomes LZMA_FINISH.
398 status = lzma_code( &stream, opmode );
400 /* We need to go round again, in case we exhausted the raw input
401 * data before we ran out of available space in "buf", except...
403 if( (status == LZMA_OK) && (opmode == LZMA_FINISH) )
405 * ...when we've already achieved the LZMA_FINISH state,
406 * this becomes unnecessary, so we break the cycle.
411 /* When we get to here, we either filled "buf" completely, or we
412 * completely exhausted the raw input stream; in either case, we
413 * return the actual number of bytes stored in "buf", (i.e. its
414 * total size, less any residual free space).
416 return max - stream.avail_out;
421 * Auxiliary function: pkgOpenArchiveStream()
423 * NOTE: Keep this AFTER the class specialisations, so that their derived
424 * class declarations are visible for object instantiation here!
430 extern "C" pkgArchiveStream* pkgOpenArchiveStream( const char* filename )
432 /* Naive decompression filter selection, based on file name extension.
434 * FIXME: adopt more proactive selection method, (similar to that used
435 * by libarchive, perhaps), based on magic patterns within the file.
437 * NOTE: MS-Windows may use UNICODE file names, but distributed package
438 * archives almost certainly do not. For our purposes, use of the POSIX
439 * Portable Character Set should suffice; we offer no concessions for
440 * any usage beyond this.
442 char *ext = strrchr( filename, '.' );
445 if( strcasecmp( ext, ".gz" ) == 0 )
447 * We expect this input stream to be "gzip" compressed,
448 * so we return the appropriate decompressor.
450 return new pkgGzipArchiveStream( filename );
452 else if( strcasecmp( ext, ".bz2" ) == 0 )
454 * We expect this input stream to be "bzip2" compressed,
455 * so again, we return the appropriate decompressor.
457 return new pkgBzipArchiveStream( filename );
459 else if( strcasecmp( ext, ".lzma" ) == 0 )
461 * We expect this input stream to be "lzma" compressed,
462 * so again, we return the appropriate decompressor.
464 return new pkgLzmaArchiveStream( filename );
466 else if( strcasecmp( ext, ".xz" ) == 0 )
468 * We expect this input stream to be "xz" compressed,
469 * so again, we return the appropriate decompressor.
471 return new pkgXzArchiveStream( filename );
474 /* If we get to here, then we didn't recognise any of the standard
475 * compression indicating file name extensions; fall through, to
476 * process the stream as raw (uncompressed) data.
478 return new pkgRawArchiveStream( filename );
481 /* $RCSfile$: end of file */