6 * Written by Keith Marshall <keithmarshall@users.sourceforge.net>
7 * Copyright (C) 2009, 2010, 2011, 2012, 2013, MinGW.org Project
10 * Implementation of package archive processing methods, for reading
11 * and extracting content from tar archives; provides implementations
12 * for each of the pkgTarArchiveProcessor and pkgTarArchiveInstaller
16 * This is free software. Permission is granted to copy, modify and
17 * redistribute this software, under the provisions of the GNU General
18 * Public License, Version 3, (or, at your option, any later version),
19 * as published by the Free Software Foundation; see the file COPYING
20 * for licensing details.
22 * Note, in particular, that this software is provided "as is", in the
23 * hope that it may prove useful, but WITHOUT WARRANTY OF ANY KIND; not
24 * even an implied WARRANTY OF MERCHANTABILITY, nor of FITNESS FOR ANY
25 * PARTICULAR PURPOSE. Under no circumstances will the author, or the
26 * MinGW Project, accept liability for any damages, however caused,
27 * arising from the use of this software.
35 #include <sys/types.h>
41 #if IMPLEMENTATION_LEVEL == PACKAGE_BASE_COMPONENT
51 #endif /* PACKAGE_BASE_COMPONENT */
57 * Class Implementation: pkgArchiveProcessor
60 int pkgArchiveProcessor::CreateExtractionDirectory( const char *pathname )
62 /* Helper method for creation of the directory infrastructure
63 * into which archived file entities are to be extracted.
66 if( save_on_extract && ((status = mkdir_recursive( pathname, 0755 )) != 0) )
67 dmh_notify( DMH_ERROR, "cannot create directory `%s'\n", pathname );
71 static inline int dmh_notify_extraction_failed( const char *name )
73 /* Helper function to emit archive "extraction failed" diagnostics.
75 return dmh_notify( DMH_ERROR, "%s: extraction failed\n", name );
78 static inline int dmh_notify_archive_data_exhausted( const char *context )
80 /* Helper function to emit "premature end of archive" diagnostics.
82 return dmh_notify( DMH_ERROR,
83 "unexpected end of archive reading %s record\n", context
87 static int create_output_stream( const char *name, int mode )
89 /* Wrapper encapsulating the set_output_stream() function, while
90 * protecting against inadvertently overwriting any unexpectedly
93 int fd = set_output_stream( name, mode );
94 if( (fd == -1) && ((errno == EEXIST) || (errno == EACCES)) )
96 /* Overwrite prevention was triggered; diagnose.
98 dmh_notify_extraction_failed( name );
101 /* The exception was triggered by an already existing file;
102 * this likely indicates a conflict between two packages.
104 dmh_notify( DMH_ERROR,
105 "%s: probable package conflict; existing file not overwritten\n",
110 { /* Otherwise, the user isn't allowed to write the extracted
111 * file, in the location designated for installation.
113 dmh_notify( DMH_ERROR,
114 "%s: permission denied; cannot store file\n", name
121 inline int pkgArchiveProcessor::SetOutputStream( const char *name, int mode )
123 /* Wrapper method to facilitate the set up of output streams
124 * for writing extracted content to disk, except in the special
125 * case where saving of files has been disabled.
127 return save_on_extract ? create_output_stream( name, mode ) : -2;
130 int pkgArchiveProcessor::ExtractFile( int fd, const char *pathname, int status )
132 /* Helper method to finalise extraction of archived file entities;
133 * called by the ProcessDataStream() method of the extractor class,
134 * where "fd" is the file descriptor for the extraction data stream,
135 * "pathname" is the corresponding path where the data is extracted,
136 * and "status" is the result of calling the ProcessEntityData()
137 * method of the extractor class on "fd".
141 /* File stream was written; close it...
146 /* The target file was not successfully and completely
147 * written; discard it, and diagnose failure.
150 dmh_notify_extraction_failed( pathname );
153 case TAR_ARCHIVE_DATA_READ_ERROR:
154 dmh_notify_archive_data_exhausted( "content" );
157 case TAR_ARCHIVE_DATA_WRITE_ERROR:
158 dmh_notify( DMH_ERROR, "write error extracting file content\n" );
162 dmh_notify( DMH_ERROR, "unexpected fault; status = %d\n", status );
166 /* Finally, we pass either the original status value, or the
167 * failing file descriptor as an effective status, if no file
168 * could be extracted, back to the caller.
170 return (fd == -1) ? fd : status;
175 * Class Implementation: pkgTarArchiveProcessor
178 #if IMPLEMENTATION_LEVEL == PACKAGE_BASE_COMPONENT
180 * The GUI setup tool will provide a simplified substitute for
183 pkgTarArchiveProcessor::pkgTarArchiveProcessor( pkgXmlNode *pkg )
185 /* Constructor to associate a package tar archive with its
186 * nominated sysroot and respective installation directory path,
187 * and prepare it for processing, using an appropriate streaming
188 * decompression filter; (choice of filter is based on archive
189 * file name extension; file names are restricted to the
190 * POSIX Portable Character Set).
192 * First, we anticipate an invalid initialisation state...
201 /* The 'pkg' XML database entry must be non-NULL, must
202 * represent a package release, and must specify a canonical
203 * tarname to identify the package...
205 if( ((origin = pkg) != NULL) && pkg->IsElementOfType( release_key )
206 && ((tarname = pkg->GetPropVal( tarname_key, NULL )) != NULL) )
208 /* When these pre-conditions are satisfied, we may proceed
209 * to identify and locate the sysroot record with which this
210 * package is to be associated...
212 pkgSpecs lookup( pkgfile = tarname );
213 if( (sysroot = pkg->GetSysRoot( lookup.GetSubSystemName() )) != NULL )
215 /* Having located the requisite sysroot record, we may
216 * retrieve its specified installation path prefix...
219 if( (prefix = sysroot->GetPropVal( pathname_key, NULL )) != NULL )
221 /* ...and incorporate it into a formatting template
222 * for use in deriving the full path names for files
223 * which are installed from this package.
225 const char *template_format = "%F%%/M/%%F";
226 char template_text[mkpath( NULL, template_format, prefix, NULL )];
227 mkpath( template_text, template_format, prefix, NULL );
228 sysroot_len = mkpath( NULL, template_text, "", NULL ) - 1;
229 sysroot_path = strdup( template_text );
232 /* Some older packages don't use the canonical tarname
233 * for the archive file name; identify the real file name
234 * associated with such packages...
236 pkgfile = pkg->ArchiveName();
238 /* Finally, initialise the data stream which we will use
239 * for reading the package content.
241 const char *archive_path_template = pkgArchivePath();
242 char archive_path_name[mkpath( NULL, archive_path_template, pkgfile, NULL )];
243 mkpath( archive_path_name, archive_path_template, pkgfile, NULL );
244 stream = pkgOpenArchiveStream( archive_path_name );
248 pkgTarArchiveProcessor::~pkgTarArchiveProcessor()
250 /* Destructor must release the heap memory allocated in
251 * the constructor, (by strdup and pkgManifest), clean up
252 * the decompression filter state, and close the archive
255 free( (void *)(sysroot_path) );
260 #endif /* PACKAGE_BASE_COMPONENT */
262 int pkgTarArchiveProcessor::ProcessLinkedEntity( const char *pathname )
264 /* FIXME: Win32 links need special handling; for hard links, we
265 * may be able to create them directly, with >= Win2K and NTFS;
266 * for symlinks on *all* Win32 variants, and for hard links on
267 * FAT32 or Win9x, we need to make physical copies of the source
268 * file, at the link target location.
270 * For now, we simply ignore links.
273 "FIXME:ProcessLinkedEntity<stub>:Ignoring link: %s --> %s\n",
274 pathname, header.field.linkname
280 uint64_t compute_octval( const char *p, size_t len )
281 # define octval( FIELD ) compute_octval( FIELD, sizeof( FIELD ) )
283 /* Helper to convert the ASCII representation of octal values,
284 * (as recorded within tar archive header fields), to their actual
285 * numeric values, ignoring leading or trailing garbage.
287 uint64_t value = 0LL;
289 while( (len > 0) && ((*p < '0') || (*p > '7')) )
291 /* Step over leading garbage.
295 while( (len > 0) && (*p >= '0') && (*p < '8') )
297 /* Accumulate octal digits; (each represents exactly three
298 * bits in the accumulated value), until we either exhaust
299 * the width of the field, or we encounter trailing junk.
301 value = (value << 3) + *p++ - '0'; --len;
306 int pkgTarArchiveProcessor::GetArchiveEntry()
308 /* Read header for next available entry in the tar archive;
309 * check for end-of-archive mark, (all zero header); verify
310 * checksum for active entry.
312 char *buf = header.aggregate;
313 size_t count = stream->Read( buf, sizeof( header ) );
315 if( count < sizeof( header ) )
317 /* Failed to read a complete header; diagnose and return error code.
319 dmh_notify_archive_data_exhausted( "header" );
320 return TAR_ARCHIVE_DATA_READ_ERROR;
325 * Outer loop checks for an all zero header...
329 /* Any non-zero byte transfers control to an inner loop,
330 * to rescan the entire header, accumulating its checksum...
333 for( buf = header.aggregate, count = sizeof( header ); count--; ++buf )
335 if( (buf < header.field.chksum) || (buf >= header.field.typeflag) )
337 * ...counting the actual binary value of each byte,
338 * in all but the checksum field itself...
342 /* ...while treating each byte within the checksum field as
343 * having an effective value equivalent to ASCII <space>.
347 /* After computing the checksum for a non-zero header,
348 * verify it against the value recorded in the checksum field;
349 * return +1 for a successful match...
351 if( sum == octval( header.field.chksum ) )
354 /* ...otherwise diagnose checksum validation failure, and
355 * return the fault status.
357 dmh_notify( DMH_ERROR, "checksum validation failed\n" );
358 return TAR_ARCHIVE_FORMAT_ERROR;
361 /* If we get to here, then the inner loop was never entered;
362 * the outer loop has completed, confirming an all zero header;
363 * return zero, to indicate end of archive.
368 int pkgTarArchiveProcessor::Process()
370 /* Generic method for reading tar archives, and extracting their
371 * content; loops over each archive entry in turn...
374 while( (status = GetArchiveEntry()) > 0 )
376 char *prefix = *header.field.prefix ? header.field.prefix : NULL;
377 char *name = header.field.name;
379 /* Handle the GNU long name header format.
380 * If the pathname overflows the name field, GNU tar creates a special
381 * entry type, where the data contains the full pathname for the
384 char *longname = NULL;
385 if( *header.field.typeflag == TAR_ENTITY_TYPE_GNU_LONGNAME )
387 /* Extract the full pathname from the data of this entry.
389 if( (longname = EntityDataAsString()) == NULL )
391 dmh_notify( DMH_ERROR, "Unable to read a long name entry\n" );
392 return TAR_ARCHIVE_FORMAT_ERROR;
395 /* Read the entry for which this long name is intended.
397 if( GetArchiveEntry() <= 0 )
399 dmh_notify( DMH_ERROR, "Expected a new entry after a long name entry\n" );
400 return TAR_ARCHIVE_FORMAT_ERROR;
403 /* Use the previously determined long name as the pathname for this entry.
409 /* Found an archive entry; map it to an equivalent file system
410 * path name, within the designated sysroot hierarchy.
412 char pathname[mkpath( NULL, sysroot_path, name, prefix )];
413 mkpath( pathname, sysroot_path, name, prefix );
417 /* Direct further processing to the appropriate handler; (this
418 * is specific to the archive entry classification)...
420 switch( *header.field.typeflag )
424 case TAR_ENTITY_TYPE_DIRECTORY:
426 * We may need to take some action in respect of directories;
427 * e.g. we may need to create a directory, or even a sequence
428 * of directories, to establish a location within the sysroot
431 { /* Note: Microsoft's implementation of stat() appears to choke
432 * on directory path names with trailing slashes; thus, before
433 * we invoke the directory processing routine, (which may need
434 * to call stat(), to check if the specified directory already
435 * exists), we remove any such trailing slashes.
437 char *p = pathname + sizeof( pathname ) - 1;
438 while( (p > pathname) && ((*--p == '/') || (*p == '\\')) )
442 /* We are now ready to process the directory path name entry...
444 status = ProcessDirectory( pathname );
447 case TAR_ENTITY_TYPE_LINK:
448 case TAR_ENTITY_TYPE_SYMLINK:
450 * Links ultimately represent file system entities in
451 * our sysroot hierarchy, but we need special processing
452 * to handle them correctly...
455 status = ProcessLinkedEntity( pathname );
458 case TAR_ENTITY_TYPE_FILE:
459 case TAR_ENTITY_TYPE_ALTFILE:
461 * These represent regular files; the file content is
462 * embedded within the archive stream, so we need to be
463 * prepared to read or copy it, as appropriate...
466 ProcessDataStream( pathname );
470 /* FIXME: we make no provision for handling any other
471 * type of archive entry; we should provide some more
472 * robust error handling, but for now we simply emit
473 * a diagnostic, and return an error condition code...
476 dmh_notify( DMH_ERROR,
477 "unexpected archive entry classification: type %d\n",
478 (int)(*header.field.typeflag)
483 /* If we didn't bail out before getting to here, then the archive
484 * was processed successfully; return the success code.
489 int pkgTarArchiveProcessor::ProcessEntityData( int fd )
491 /* Generic method for reading past the data associated with
492 * a specific header within a tar archive; if given a negative
493 * value for `fd', it will simply skip over the data, otherwise
494 * `fd' is assumed to represent a descriptor for an opened file
495 * stream, to which the data will be copied (extracted).
499 /* Initialise a counter for the length of the data content, and
500 * specify the default size for the transfer buffer in which to
501 * process it; make the initial size of the transfer buffer 16
502 * times the header size.
504 uint64_t bytes_to_copy = octval( header.field.size );
505 size_t block_size = sizeof( header ) << 4;
507 /* While we still have unread data, and no processing error...
509 while( (bytes_to_copy > 0) && (status == 0) )
511 /* Adjust the requested size for the transfer buffer, shrinking
512 * it by 50% at each step, until it is smaller than the remaining
513 * data length, but never smaller than the header record length.
515 while( (bytes_to_copy < block_size) && (block_size > sizeof( header )) )
518 /* Allocate a transfer buffer of the requested size, and populate
519 * it, by reading data from the archive; (since the transfer buffer
520 * is never smaller than the header length, this will also capture
521 * any additional padding bytes, which may be required to keep the
522 * data length equal to an exact multiple of the header length).
524 char buffer[block_size];
525 if( stream->Read( buffer, block_size ) < (int)(block_size) )
527 * Failure to fully populate the transfer buffer, (i.e. a short
528 * read), indicates a corrupt archive; bail out immediately.
530 return TAR_ARCHIVE_DATA_READ_ERROR;
532 /* When the number of actual data bytes expected is fewer than the
533 * total number of bytes in the transfer buffer...
535 if( bytes_to_copy < block_size )
537 * ...then we have reached the end of the data for the current
538 * archived entity; adjust the block size to reflect the number
539 * of actual data bytes present in the transfer buffer...
541 block_size = bytes_to_copy;
543 /* With the number of actual data bytes present now accurately
544 * reflected by the block size, we save that data to the stream
545 * specified for archive extraction, (if any).
547 if( (fd >= 0) && (write( fd, buffer, block_size ) != (int)(block_size)) )
549 * An extraction error occurred; set the status code to
552 status = TAR_ARCHIVE_DATA_WRITE_ERROR;
554 /* Adjust the count of remaining unprocessed data bytes, and begin
555 * a new processing cycle, to capture any which may be present.
557 bytes_to_copy -= block_size;
560 /* Finally, when all data for the current archive entry has been
561 * processed, we return to the caller with an appropriate completion
567 char *pkgTarArchiveProcessor::EntityDataAsString()
569 /* Read the data associated with a specific header within a tar archive
570 * and return it as a string. The return value is stored in memory which
571 * is allocated by malloc; it should be freed when no longer required.
573 * It is assumed that the return data can be accommodated within available
574 * heap memory. Since the length isn't returned, we assume that the string
575 * is NUL-terminated, and that it contains no embedded NULs.
577 * In the event of any error, NULL is returned.
580 uint64_t bytes_to_copy = octval( header.field.size );
582 /* Round the buffer size to the smallest multiple of the record size.
584 bytes_to_copy += sizeof( header ) - 1;
585 bytes_to_copy -= bytes_to_copy % sizeof( header );
587 /* Allocate the data buffer.
589 data = (char*)(malloc( bytes_to_copy ));
593 /* Read the data into the buffer.
595 size_t count = stream->Read( data, bytes_to_copy );
596 if( count < bytes_to_copy )
598 /* Failure to fully populate the transfer buffer, (i.e. a short
599 * read), indicates a corrupt archive.
609 * Class Implementation: pkgTarArchiveExtractor
614 EXTERN_C int have_api( const char *, const char * = NULL );
616 static inline int have_utime64_api( void )
618 /* Local helper function to check and record the availability of
619 * the _utime64() API function, within the particular version of
620 * MSVCRT.DLL which is installed on the host platform.
622 enum { API_UNSUPPORTED = 0, API_SUPPORTED, API_UNTESTED };
624 /* On first call, we don't know; initialise accordingly.
626 static int status = (int)(API_UNTESTED);
628 return (status == (int)(API_UNTESTED))
630 * Must be first time of calling; check, record, and return
631 * the appropriate availability status.
633 ? status = have_api( "_utime64" )
635 * On second and subsequent calls, we've already checked, so
636 * we know the availability status; simply return it.
641 static int commit_saved_entity( const char *pathname, __time64_t mtime )
643 /* Helper to set the access and modification times for a file,
644 * after extraction from an archive, to match the specified "mtime";
645 * (typically "mtime" is as recorded within the archive).
647 if( have_utime64_api() )
649 /* When the _utime64() API function is available...
651 struct __utimbuf64 timestamp;
653 /* ...we prefer to use it...
655 timestamp.actime = timestamp.modtime = mtime;
656 return _utime64( pathname, ×tamp );
660 /* ...otherwise, we assume that this is a legacy system,
661 * and the utime() function is based on 32-bit time_t...
663 struct __utimbuf32 timestamp;
665 /* ...so fall back to using that.
667 timestamp.actime = timestamp.modtime = mtime;
668 return utime( pathname, (utimbuf *)(×tamp) );
672 pkgTarArchiveExtractor::pkgTarArchiveExtractor( const char *fn, const char *dir )
674 /* A simplified variation on the installer theme; this extracts
675 * the tar archive named by "fn" into any arbitrarily chosen path,
676 * specified by "dir", without creating an installation record.
678 * The extractor uses a specialised constructor; however, we
679 * begin by initialising as for the general case.
688 /* When an explicit extraction path name is specified...
692 /* ...then set up the template which the extractor will use
693 * to generate path names for each extracted file entity...
695 const char *template_format = "%F%%/M/%%F";
696 char template_text[mkpath( NULL, template_format, dir, NULL )];
697 mkpath( template_text, template_format, dir, NULL );
699 /* ...suborning the sysroot_len and sysroot_path properties
700 * to pass it to the extraction methods.
702 sysroot_len = mkpath( NULL, template_text, "", NULL ) - 1;
703 sysroot_path = strdup( template_text );
705 /* Finally, open the specified archive using the appropriate
706 * stream type, and invoke the extraction Process() method.
708 stream = pkgOpenArchiveStream( fn );
712 int pkgTarArchiveExtractor::ProcessDirectory( const char *pathname )
714 /* We are obliged to provide an implementation for this method,
715 * since the base class declares it as abstract; in this instance,
716 * delegation to a real base class method suffices.
718 return CreateExtractionDirectory( pathname );
721 int pkgTarArchiveExtractor::ProcessDataStream( const char *pathname )
723 /* Also declared as abstract in the base class, in this case
724 * we must set up the output stream, and initiate entity data
725 * processing on behalf of the base class ExtractFile() method..
728 int fd = SetOutputStream( pathname, octval( header.field.mode ) );
729 if( (status = ExtractFile( fd, pathname, ProcessEntityData( fd ))) == 0 )
730 if( save_on_extract )
732 * ...and commit the file after successful extraction...
734 commit_saved_entity( pathname, octval( header.field.mtime ) );
736 /* ...ultimately returning the extraction status code.
741 #if IMPLEMENTATION_LEVEL == PACKAGE_BASE_COMPONENT
745 * Class Implementation: pkgTarArchiveInstaller
748 pkgTarArchiveInstaller::
749 pkgTarArchiveInstaller( pkgXmlNode *pkg ):pkgTarArchiveProcessor( pkg )
751 /* Constructor: having successfully set up the pkgTarArchiveProcessor
752 * base class, we attach a pkgManifest to track the installation.
754 if( (tarname != NULL) && (sysroot != NULL) && stream->IsReady() )
755 installed = new pkgManifest( package_key, tarname );
758 int pkgTarArchiveInstaller::Process()
760 /* Specialisation of the base class Process() method.
763 /* First, process the archive as for the base class...
765 if( (status = pkgTarArchiveProcessor::Process()) == 0 )
767 /* ...then, on successful completion...
769 * Update the package installation manifest, to record
770 * the installation in the current sysroot...
772 installed->BindSysRoot( sysroot, package_key );
773 pkgRegister( sysroot, origin, tarname, pkgfile );
778 int pkgTarArchiveInstaller::ProcessDirectory( const char *pathname )
780 /* Create the directory infrastructure required to support
781 * a specific package installation.
784 if( DEBUG_REQUEST( DEBUG_SUPPRESS_INSTALLATION ) )
787 * FIXME:maybe adapt for 'dry-run' or 'verbose' use.
790 "FIXME:ProcessDirectory<stub>:not executing: mkdir -p %s\n",
793 if( DEBUG_REQUEST( DEBUG_UPDATE_INVENTORY ) )
795 * Although no installation directory has actually been created,
796 * update the inventory to simulate the effect of doing so.
798 installed->AddEntry( dirname_key, pathname + sysroot_len );
802 if( (status = CreateExtractionDirectory( pathname )) == 0 )
804 * Either the specified directory already exists,
805 * or we just successfully created it; attach a reference
806 * in the installation manifest for the current package.
808 installed->AddEntry( dirname_key, pathname + sysroot_len );
813 int pkgTarArchiveInstaller::ProcessDataStream( const char *pathname )
815 /* Extract file data from the archive, and copy it to the
816 * associated target file stream, if any.
818 pkgSpinWait::Report( "Extracting %s", pathname + sysroot_len );
819 if( DEBUG_REQUEST( DEBUG_SUPPRESS_INSTALLATION ) )
822 * FIXME:maybe adapt for 'dry-run' or 'verbose' use.
825 "FIXME:ProcessDataStream<stub>:not extracting: %s\n",
828 if( DEBUG_REQUEST( DEBUG_UPDATE_INVENTORY ) )
830 * Although no file has actually been installed, update
831 * the inventory to simulate the effect of doing so.
833 installed->AddEntry( filename_key, pathname + sysroot_len );
835 return ProcessEntityData( -1 );
841 /* Establish an output file stream, extract the entity data,
842 * writing it to this stream...
844 int fd = SetOutputStream( pathname, octval( header.field.mode ) );
845 if( (status = ExtractFile( fd, pathname, ProcessEntityData( fd ))) == 0 )
847 /* ...and on successful completion, commit the file
848 * and record it in the installation database.
850 if( save_on_extract )
851 commit_saved_entity( pathname, octval( header.field.mtime ) );
852 installed->AddEntry( filename_key, pathname + sysroot_len );
854 /* Additionally, when the appropriate level of debug
855 * tracing has been enabled, report the installation of
856 * this file to the diagnostic log.
858 * FIXME: this would be a good place to add reporting
859 * of installation, in verbose execution mode.
861 DEBUG_INVOKE_IF( DEBUG_REQUEST( DEBUG_TRACE_TRANSACTIONS ),
862 dmh_printf( " %s\n", pathname )
869 #endif /* PACKAGE_BASE_COMPONENT */
871 /* $RCSfile$: end of file */