6 * Written by Keith Marshall <keithmarshall@users.sourceforge.net>
7 * Copyright (C) 2009, 2010, 2011, MinGW Project
10 * Implementation of package archive processing methods, for reading
11 * and extracting content from tar archives; provides implementations
12 * for each of the pkgTarArchiveProcessor and pkgTarArchiveInstaller
16 * This is free software. Permission is granted to copy, modify and
17 * redistribute this software, under the provisions of the GNU General
18 * Public License, Version 3, (or, at your option, any later version),
19 * as published by the Free Software Foundation; see the file COPYING
20 * for licensing details.
22 * Note, in particular, that this software is provided "as is", in the
23 * hope that it may prove useful, but WITHOUT WARRANTY OF ANY KIND; not
24 * even an implied WARRANTY OF MERCHANTABILITY, nor of FITNESS FOR ANY
25 * PARTICULAR PURPOSE. Under no circumstances will the author, or the
26 * MinGW Project, accept liability for any damages, however caused,
27 * arising from the use of this software.
33 #include <sys/types.h>
49 * Class Implementation: pkgArchiveProcessor
52 int pkgArchiveProcessor::CreateExtractionDirectory( const char *pathname )
54 /* Helper method for creation of the directory infrastructure
55 * into which archived file entities are to be extracted.
58 if( (status = mkdir_recursive( pathname, 0755 )) != 0 )
59 dmh_notify( DMH_ERROR, "cannot create directory `%s'\n", pathname );
63 int pkgArchiveProcessor::ExtractFile( int fd, const char *pathname, int status )
65 /* Helper method to finalise extraction of archived file entities;
66 * called by the ProcessDataStream() method of the extractor class,
67 * where "fd" is the file descriptor for the extraction data stream,
68 * "pathname" is the corresponding path wher the data is extracted,
69 * and "status" is the result of calling the ProcessEntityData()
70 * method of the extractor class on "fd".
74 /* File stream was written; close it...
79 /* The target file was not successfully and completely
80 * written; discard it, and diagnose failure.
83 dmh_notify( DMH_ERROR, "%s: extraction failed\n", pathname );
86 /* Finally, we pass the original status value back to the caller.
93 * Class Implementation: pkgTarArchiveProcessor
96 pkgTarArchiveProcessor::pkgTarArchiveProcessor( pkgXmlNode *pkg )
98 /* Constructor to associate a package tar archive with its
99 * nominated sysroot and respective installation directory path,
100 * and prepare it for processing, using an appropriate streaming
101 * decompression filter; (choice of filter is based on archive
102 * file name extension; file names are restricted to the
103 * POSIX Portable Character Set).
105 * First, we anticipate an invalid initialisation state...
114 /* The 'pkg' XML database entry must be non-NULL, must
115 * represent a package release, and must specify a canonical
116 * tarname to identify the package...
118 if( ((origin = pkg) != NULL) && pkg->IsElementOfType( release_key )
119 && ((tarname = pkg->GetPropVal( tarname_key, NULL )) != NULL) )
121 /* When these pre-conditions are satisfied, we may proceed
122 * to identify and locate the sysroot record with which this
123 * package is to be associated...
125 pkgSpecs lookup( pkgfile = tarname );
126 if( (sysroot = pkg->GetSysRoot( lookup.GetSubSystemName() )) != NULL )
128 /* Having located the requisite sysroot record, we may
129 * retrieve its specified installation path prefix...
132 if( (prefix = sysroot->GetPropVal( pathname_key, NULL )) != NULL )
134 /* ...and incorporate it into a formatting template
135 * for use in deriving the full path names for files
136 * which are installed from this package.
138 const char *template_format = "%F%%/M/%%F";
139 char template_text[mkpath( NULL, template_format, prefix, NULL )];
140 mkpath( template_text, template_format, prefix, NULL );
141 sysroot_len = mkpath( NULL, template_text, "", NULL ) - 1;
142 sysroot_path = strdup( template_text );
145 /* Some older packages don't use the canonical tarname
146 * for the archive file name; identify the real file name
147 * associated with such packages...
149 pkgfile = pkg->ArchiveName();
151 /* Finally, initialise the data stream which we will use
152 * for reading the package content.
154 const char *archive_path_template = pkgArchivePath();
155 char archive_path_name[mkpath( NULL, archive_path_template, pkgfile, NULL )];
156 mkpath( archive_path_name, archive_path_template, pkgfile, NULL );
157 stream = pkgOpenArchiveStream( archive_path_name );
161 pkgTarArchiveProcessor::~pkgTarArchiveProcessor()
163 /* Destructor must release the heap memory allocated in
164 * the constructor, (by strdup and pkgManifest), clean up
165 * the decompression filter state, and close the archive
168 free( (void *)(sysroot_path) );
173 int pkgTarArchiveProcessor::ProcessLinkedEntity( const char *pathname )
175 /* FIXME: Win32 links need special handling; for hard links, we
176 * may be able to create them directly, with >= Win2K and NTFS;
177 * for symlinks on *all* Win32 variants, and for hard links on
178 * FAT32 or Win9x, we need to make physical copies of the source
179 * file, at the link target location.
181 * For now, we simply ignore links.
184 "FIXME:ProcessLinkedEntity<stub>:Ignoring link: %s --> %s\n",
185 pathname, header.field.linkname
191 uint64_t compute_octval( const char *p, size_t len )
192 # define octval( FIELD ) compute_octval( FIELD, sizeof( FIELD ) )
194 /* Helper to convert the ASCII representation of octal values,
195 * (as recorded within tar archive header fields), to their actual
196 * numeric values, ignoring leading or trailing garbage.
198 uint64_t value = 0LL;
200 while( (len > 0) && ((*p < '0') || (*p > '7')) )
202 /* Step over leading garbage.
206 while( (len > 0) && (*p >= '0') && (*p < '8') )
208 /* Accumulate octal digits; (each represents exactly three
209 * bits in the accumulated value), until we either exhaust
210 * the width of the field, or we encounter trailing junk.
212 value = (value << 3) + *p++ - '0'; --len;
217 int pkgTarArchiveProcessor::GetArchiveEntry()
219 /* Read header for next available entry in the tar archive;
220 * check for end-of-archive mark, (all zero header); verify
221 * checksum for active entry.
223 char *buf = header.aggregate;
224 size_t count = stream->Read( buf, sizeof( header ) );
226 if( count < sizeof( header ) )
228 /* Failed to read a complete header; return error code.
235 * Outer loop checks for an all zero header...
239 /* Any non-zero byte transfers control to an inner loop,
240 * to rescan the entire header, accumulating its checksum...
243 for( buf = header.aggregate, count = sizeof( header ); count--; ++buf )
245 if( (buf < header.field.chksum) || (buf >= header.field.typeflag) )
247 * ...counting the actual binary value of each byte,
248 * in all but the checksum field itself...
252 /* ...while treating each byte within the checksum field as
253 * having an effective value equivalent to ASCII <space>.
257 /* After computing the checksum for a non-zero header,
258 * verify it against the value recorded in the checksum field;
259 * return +1 for a successful match, or -2 for failure.
261 return (sum == octval( header.field.chksum )) ? 1 : -2;
264 /* If we get to here, then the inner loop was never entered;
265 * the outer loop has completed, confirming an all zero header;
266 * return zero, to indicate end of archive.
271 int pkgTarArchiveProcessor::Process()
273 /* Generic method for reading tar archives, and extracting their
274 * content; loops over each archive entry in turn...
276 while( GetArchiveEntry() > 0 )
278 char *prefix = *header.field.prefix ? header.field.prefix : NULL;
279 char *name = header.field.name;
281 /* Handle the GNU long name header format.
282 * If the pathname overflows the name field, GNU tar creates a special
283 * entry type, where the data contains the full pathname for the
286 char *longname = NULL;
287 if( *header.field.typeflag == TAR_ENTITY_TYPE_GNU_LONGNAME )
289 /* Extract the full pathname from the data of this entry.
291 longname = EntityDataAsString();
293 dmh_notify( DMH_ERROR, "Unable to read a long name entry\n" );
295 /* Read the entry for which this long name is intended.
297 if( GetArchiveEntry() <= 0 )
298 dmh_notify( DMH_ERROR, "Expected a new entry after a long name entry\n" );
300 /* Use the previously determined long name as the pathname for this entry.
306 /* Found an archive entry; map it to an equivalent file system
307 * path name, within the designated sysroot hierarchy.
309 char pathname[mkpath( NULL, sysroot_path, name, prefix )];
310 mkpath( pathname, sysroot_path, name, prefix );
314 /* Direct further processing to the appropriate handler; (this
315 * is specific to the archive entry classification)...
317 switch( *header.field.typeflag )
321 case TAR_ENTITY_TYPE_DIRECTORY:
323 * We may need to take some action in respect of directories;
324 * e.g. we may need to create a directory, or even a sequence
325 * of directories, to establish a location within the sysroot
328 { /* Note: Microsoft's implementation of stat() appears to choke
329 * on directory path names with trailing slashes; thus, before
330 * we invoke the directory processing routine, (which may need
331 * to call stat(), to check if the specified directory already
332 * exists), we remove any such trailing slashes.
334 char *p = pathname + sizeof( pathname ) - 1;
335 while( (p > pathname) && ((*--p == '/') || (*p == '\\')) )
339 /* We are now ready to process the directory path name entry...
341 status = ProcessDirectory( pathname );
344 case TAR_ENTITY_TYPE_LINK:
345 case TAR_ENTITY_TYPE_SYMLINK:
347 * Links ultimately represent file system entities in
348 * our sysroot hierarchy, but we need special processing
349 * to handle them correctly...
352 status = ProcessLinkedEntity( pathname );
355 case TAR_ENTITY_TYPE_FILE:
356 case TAR_ENTITY_TYPE_ALTFILE:
358 * These represent regular files; the file content is
359 * embedded within the archive stream, so we need to be
360 * prepared to read or copy it, as appropriate...
363 ProcessDataStream( pathname );
367 /* FIXME: we make no provision for handling any other
368 * type of archive entry; we should provide some more
369 * robust error handling, but for now we simply emit
370 * a diagnostic, and return an error condition code...
373 dmh_notify( DMH_ERROR,
374 "unexpected archive entry classification: type %d\n",
375 (int)(*header.field.typeflag)
380 /* If we didn't bail out before getting to here, then the archive
381 * was processed successfully; return the success code.
386 int pkgTarArchiveProcessor::ProcessEntityData( int fd )
388 /* Generic method for reading past the data associated with
389 * a specific header within a tar archive; if given a negative
390 * value for `fd', it will simply skip over the data, otherwise
391 * `fd' is assumed to represent a descriptor for an opened file
392 * stream, to which the data will be copied (extracted).
396 /* Initialise a counter for the length of the data content, and
397 * specify the default size for the transfer buffer in which to
398 * process it; make the initial size of the transfer buffer 16
399 * times the header size.
401 uint64_t bytes_to_copy = octval( header.field.size );
402 size_t block_size = sizeof( header ) << 4;
404 /* While we still have unread data, and no processing error...
406 while( (bytes_to_copy > 0) && (status == 0) )
408 /* Adjust the requested size for the transfer buffer, shrinking
409 * it by 50% at each step, until it is smaller than the remaining
410 * data length, but never smaller than the header record length.
412 while( (bytes_to_copy < block_size) && (block_size > sizeof( header )) )
415 /* Allocate a transfer buffer of the requested size, and populate
416 * it, by reading data from the archive; (since the transfer buffer
417 * is never smaller than the header length, this will also capture
418 * any additional padding bytes, which may be required to keep the
419 * data length equal to an exact multiple of the header length).
421 char buffer[block_size];
422 if( stream->Read( buffer, block_size ) < (int)(block_size) )
424 * Failure to fully populate the transfer buffer, (i.e. a short
425 * read), indicates a corrupt archive; bail out immediately.
429 /* When the number of actual data bytes expected is fewer than the
430 * total number of bytes in the transfer buffer...
432 if( bytes_to_copy < block_size )
434 * ...then we have reached the end of the data for the current
435 * archived entity; adjust the block size to reflect the number
436 * of actual data bytes present in the transfer buffer...
438 block_size = bytes_to_copy;
440 /* With the number of actual data bytes present now accurately
441 * reflected by the block size, we save that data to the stream
442 * specified for archive extraction, (if any).
444 if( (fd >= 0) && (write( fd, buffer, block_size ) != (int)(block_size)) )
446 * An extraction error occurred; set the status code to
451 /* Adjust the count of remaining unprocessed data bytes, and begin
452 * a new processing cycle, to capture any which may be present.
454 bytes_to_copy -= block_size;
457 /* Finally, when all data for the current archive entry has been
458 * processed, we return to the caller with an appropriate completion
464 char *pkgTarArchiveProcessor::EntityDataAsString()
466 /* Read the data associated with a specific header within a tar archive
467 * and return it as a string. The return value is stored in memory which
468 * is allocated by malloc; it should be freed when no longer required.
470 * It is assumed that the return data can be accommodated within available
471 * heap memory. Since the length isn't returned, we assume that the string
472 * is NUL-terminated, and that it contains no embedded NULs.
474 * In the event of any error, NULL is returned.
477 uint64_t bytes_to_copy = octval( header.field.size );
479 /* Round the buffer size to the smallest multiple of the record size.
481 bytes_to_copy += sizeof( header ) - 1;
482 bytes_to_copy -= bytes_to_copy % sizeof( header );
484 /* Allocate the data buffer.
486 data = (char*)(malloc( bytes_to_copy ));
490 /* Read the data into the buffer.
492 size_t count = stream->Read( data, bytes_to_copy );
493 if( count < bytes_to_copy )
495 /* Failure to fully populate the transfer buffer, (i.e. a short
496 * read), indicates a corrupt archive.
506 * Class Implementation: pkgTarArchiveExtractor
511 static int commit_saved_entity( const char *pathname, time_t mtime )
513 /* Helper to set the access and modification times for a file,
514 * after extraction from an archive, to match the specified "mtime";
515 * (typically "mtime" is as recorded within the archive).
517 struct utimbuf timestamp;
519 timestamp.actime = timestamp.modtime = mtime;
520 return utime( pathname, ×tamp );
523 pkgTarArchiveExtractor::pkgTarArchiveExtractor( const char *fn, const char *dir )
525 /* A simplified variation on the installer theme; this extracts
526 * the tar archive named by "fn" into any arbitrarily chosen path,
527 * specified by "dir", without creating an installation record.
529 * The extractor uses a specialised constructor; however, we
530 * begin by initialising as for the general case.
539 /* When an explicit extraction path name is specified...
543 /* ...then set up the template which the extractor will use
544 * to generate path names for each extracted file entity...
546 const char *template_format = "%F%%/M/%%F";
547 char template_text[mkpath( NULL, template_format, dir, NULL )];
548 mkpath( template_text, template_format, dir, NULL );
550 /* ...suborning the sysroot_len and sysroot_path properties
551 * to pass it to the extraction methods.
553 sysroot_len = mkpath( NULL, template_text, "", NULL ) - 1;
554 sysroot_path = strdup( template_text );
557 /* Finally, open the specified archive using the appropriate
558 * stream type, and invoke the extraction Process() method.
560 stream = pkgOpenArchiveStream( fn );
564 int pkgTarArchiveExtractor::ProcessDirectory( const char *pathname )
566 /* We are obliged to provide an implementation for this method,
567 * since the base class declares it as abstract; in this instance,
568 * delegation to a real base class method suffices.
570 return CreateExtractionDirectory( pathname );
573 int pkgTarArchiveExtractor::ProcessDataStream( const char *pathname )
575 /* Also declared as abstract in the base class, in this case
576 * we must set up the output stream, and initiate entity data
577 * processing on behalf of the base class ExtractFile() method..
580 int fd = set_output_stream( pathname, octval( header.field.mode ) );
581 if( (status = ExtractFile( fd, pathname, ProcessEntityData( fd ))) == 0 )
583 * ...and commit the file after successful extraction...
585 commit_saved_entity( pathname, octval( header.field.mtime ) );
587 /* ...ultimately returning the extraction status code.
594 * Class Implementation: pkgTarArchiveInstaller
597 pkgTarArchiveInstaller::
598 pkgTarArchiveInstaller( pkgXmlNode *pkg ):pkgTarArchiveProcessor( pkg )
600 /* Constructor: having successfully set up the pkgTarArchiveProcessor
601 * base class, we attach a pkgManifest to track the installation.
603 if( (tarname != NULL) && (sysroot != NULL) && stream->IsReady() )
604 installed = new pkgManifest( package_key, tarname );
607 int pkgTarArchiveInstaller::Process()
609 /* Specialisation of the base class Process() method.
612 /* First, process the archive as for the base class...
614 if( (status = pkgTarArchiveProcessor::Process()) == 0 )
616 /* ...then, on successful completion...
618 * Update the package installation manifest, to record
619 * the installation in the current sysroot...
621 installed->BindSysRoot( sysroot, package_key );
622 pkgRegister( sysroot, origin, tarname, pkgfile );
627 int pkgTarArchiveInstaller::ProcessDirectory( const char *pathname )
629 /* Create the directory infrastructure required to support
630 * a specific package installation.
633 if( DEBUG_REQUEST( DEBUG_SUPPRESS_INSTALLATION ) )
636 * FIXME:maybe adapt for 'dry-run' or 'verbose' use.
639 "FIXME:ProcessDirectory<stub>:not executing: mkdir -p %s\n",
642 if( DEBUG_REQUEST( DEBUG_UPDATE_INVENTORY ) )
644 * Although no installation directory has actually been created,
645 * update the inventory to simulate the effect of doing so.
647 installed->AddEntry( dirname_key, pathname + sysroot_len );
651 if( (status = CreateExtractionDirectory( pathname )) == 0 )
653 * Either the specified directory already exists,
654 * or we just successfully created it; attach a reference
655 * in the installation manifest for the current package.
657 installed->AddEntry( dirname_key, pathname + sysroot_len );
662 int pkgTarArchiveInstaller::ProcessDataStream( const char *pathname )
664 /* Extract file data from the archive, and copy it to the
665 * associated target file stream, if any.
667 if( DEBUG_REQUEST( DEBUG_SUPPRESS_INSTALLATION ) )
670 * FIXME:maybe adapt for 'dry-run' or 'verbose' use.
673 "FIXME:ProcessDataStream<stub>:not extracting: %s\n",
676 if( DEBUG_REQUEST( DEBUG_UPDATE_INVENTORY ) )
678 * Although no file has actually been installed, update
679 * the inventory to simulate the effect of doing so.
681 installed->AddEntry( filename_key, pathname + sysroot_len );
683 return ProcessEntityData( -1 );
689 /* Establish an output file stream, extract the entity data,
690 * writing it to this stream...
692 int fd = set_output_stream( pathname, octval( header.field.mode ) );
693 if( (status = ExtractFile( fd, pathname, ProcessEntityData( fd ))) == 0 )
695 /* ...and on successful completion, commit the file
696 * and record it in the installation database.
698 commit_saved_entity( pathname, octval( header.field.mtime ) );
699 installed->AddEntry( filename_key, pathname + sysroot_len );
705 /* $RCSfile$: end of file */