src/tarproc.cpp

   1 /*
   2  * tarproc.cpp
   3  *
   4  * $Id$
   5  *
   6  * Written by Keith Marshall <keithmarshall@users.sourceforge.net>
   7  * Copyright (C) 2009, 2010, 2011, MinGW Project
   8  *
   9  *
  10  * Implementation of package archive processing methods, for reading
  11  * and extracting content from tar archives; provides implementations
  12  * for each of the pkgTarArchiveProcessor and pkgTarArchiveInstaller
  13  * classes.
  14  *
  15  *
  16  * This is free software.  Permission is granted to copy, modify and
  17  * redistribute this software, under the provisions of the GNU General
  18  * Public License, Version 3, (or, at your option, any later version),
  19  * as published by the Free Software Foundation; see the file COPYING
  20  * for licensing details.
  21  *
  22  * Note, in particular, that this software is provided "as is", in the
  23  * hope that it may prove useful, but WITHOUT WARRANTY OF ANY KIND; not
  24  * even an implied WARRANTY OF MERCHANTABILITY, nor of FITNESS FOR ANY
  25  * PARTICULAR PURPOSE.  Under no circumstances will the author, or the
  26  * MinGW Project, accept liability for any damages, however caused,
  27  * arising from the use of this software.
  28  *
  29  */
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <string.h>
  33 #include <sys/types.h>
  34 #include <sys/stat.h>
  35 #include <unistd.h>
  36 #include <fcntl.h>
  37 #include <errno.h>
  38
  39 #include "dmh.h"
  40 #include "debug.h"
  41 #include "mkpath.h"
  42
  43 #include "pkginfo.h"
  44 #include "pkgkeys.h"
  45 #include "pkgproc.h"
  46
  47 /*******************
  48  *
  49  * Class Implementation: pkgTarArchiveProcessor
  50  *
  51  */
  52
  53 pkgTarArchiveProcessor::pkgTarArchiveProcessor( pkgXmlNode *pkg )
  54 {
  55   /* Constructor to associate a package tar archive with its
  56    * nominated sysroot and respective installation directory path,
  57    * and prepare it for processing, using an appropriate streaming
  58    * decompression filter; (choice of filter is based on archive
  59    * file name extension; file names are restricted to the
  60    * POSIX Portable Character Set).
  61    *
  62    * First, we anticipate an invalid initialisation state...
  63    */
  64   sysroot_len = 0;
  65
  66   sysroot = NULL;
  67   sysroot_path = NULL;
  68   installed = NULL;
  69   stream = NULL;
  70
  71   /* The 'pkg' XML database entry must be non-NULL, must
  72    * represent a package release, and must specify a canonical
  73    * tarname to identify the package...
  74    */
  75   if( ((origin = pkg) != NULL) && pkg->IsElementOfType( release_key )
  76   &&  ((tarname = pkg->GetPropVal( tarname_key, NULL )) != NULL)       )
  77   {
  78     /* When these pre-conditions are satisfied, we may proceed
  79      * to identify and locate the sysroot record with which this
  80      * package is to be associated...
  81      */
  82     pkgSpecs lookup( pkgfile = tarname );
  83     if( (sysroot = pkg->GetSysRoot( lookup.GetSubSystemName() )) != NULL )
  84     {
  85       /* Having located the requisite sysroot record, we may
  86        * retrieve its specified installation path prefix...
  87        */
  88       const char *prefix;
  89       if( (prefix = sysroot->GetPropVal( pathname_key, NULL )) != NULL )
  90       {
  91         /* ...and incorporate it into a formatting template
  92          * for use in deriving the full path names for files
  93          * which are installed from this package.
  94          */
  95         const char *template_format = "%F%%/M/%%F";
  96         char template_text[mkpath( NULL, template_format, prefix, NULL )];
  97         mkpath( template_text, template_format, prefix, NULL );
  98         sysroot_len = mkpath( NULL, template_text, "", NULL ) - 1;
  99         sysroot_path = strdup( template_text );
 100       }
 101     }
 102     /* Some older packages don't use the canonical tarname
 103      * for the archive file name; identify the real file name
 104      * associated with such packages...
 105      */
 106     pkgfile = pkg->ArchiveName();
 107
 108     /* Finally, initialise the data stream which we will use
 109      * for reading the package content.
 110      */
 111     const char *archive_path_template = pkgArchivePath();
 112     char archive_path_name[mkpath( NULL, archive_path_template, pkgfile, NULL )];
 113     mkpath( archive_path_name, archive_path_template, pkgfile, NULL );
 114     stream = pkgOpenArchiveStream( archive_path_name );
 115   }
 116 }
 117
 118 pkgTarArchiveProcessor::~pkgTarArchiveProcessor()
 119 {
 120   /* Destructor must release the heap memory allocated in
 121    * the constructor, (by strdup and pkgManifest), clean up
 122    * the decompression filter state, and close the archive
 123    * data stream.
 124    */
 125   free( (void *)(sysroot_path) );
 126   delete installed;
 127   delete stream;
 128 }
 129
 130 int pkgTarArchiveProcessor::ProcessLinkedEntity( const char *pathname )
 131 {
 132   /* FIXME: Win32 links need special handling; for hard links, we
 133    * may be able to create them directly, with >= Win2K and NTFS;
 134    * for symlinks on *all* Win32 variants, and for hard links on
 135    * FAT32 or Win9x, we need to make physical copies of the source
 136    * file, at the link target location.
 137    *
 138    * For now, we simply ignore links.
 139    */
 140   dmh_printf(
 141       "FIXME:ProcessLinkedEntity<stub>:Ignoring link: %s --> %s\n",
 142        pathname, header.field.linkname
 143     );
 144   return 0;
 145 }
 146
 147 static
 148 uint64_t compute_octval( const char *p, size_t len )
 149 # define octval( FIELD ) compute_octval( FIELD, sizeof( FIELD ) )
 150 {
 151   /* Helper to convert the ASCII representation of octal values,
 152    * (as recorded within tar archive header fields), to their actual
 153    * numeric values, ignoring leading or trailing garbage.
 154    */
 155   uint64_t value = 0LL;
 156
 157   while( (len > 0) && ((*p < '0') || (*p > '7')) )
 158   {
 159     /* Step over leading garbage.
 160      */
 161     ++p; --len;
 162   }
 163   while( (len > 0) && (*p >= '0') && (*p < '8') )
 164   {
 165     /* Accumulate octal digits; (each represents exactly three
 166      * bits in the accumulated value), until we either exhaust
 167      * the width of the field, or we encounter trailing junk.
 168      */
 169     value = (value << 3) + *p++ - '0'; --len;
 170   }
 171   return value;
 172 }
 173
 174 int pkgTarArchiveProcessor::GetArchiveEntry()
 175 {
 176   /* Read header for next available entry in the tar archive;
 177    * check for end-of-archive mark, (all zero header); verify
 178    * checksum for active entry.
 179    */
 180   char *buf = header.aggregate;
 181   size_t count = stream->Read( buf, sizeof( header ) );
 182
 183   if( count < sizeof( header ) )
 184   {
 185     /* Failed to read a complete header; return error code.
 186      */
 187     return -1;
 188   }
 189
 190   while( count-- )
 191     /*
 192      * Outer loop checks for an all zero header...
 193      */
 194     if( *buf++ != '\0' )
 195     {
 196       /* Any non-zero byte transfers control to an inner loop,
 197        * to rescan the entire header, accumulating its checksum...
 198        */
 199       uint64_t sum = 0;
 200       for( buf = header.aggregate, count = sizeof( header ); count--; ++buf )
 201       {
 202         if( (buf < header.field.chksum) || (buf >= header.field.typeflag) )
 203           /*
 204            * ...counting the actual binary value of each byte,
 205            * in all but the checksum field itself...
 206            */
 207           sum += *buf;
 208         else
 209           /* ...while treating each byte within the checksum field as
 210            * having an effective value equivalent to ASCII <space>.
 211            */
 212           sum += 0x20;
 213       }
 214       /* After computing the checksum for a non-zero header,
 215        * verify it against the value recorded in the checksum field;
 216        * return +1 for a successful match, or -2 for failure.
 217        */
 218       return (sum == octval( header.field.chksum )) ? 1 : -2;
 219     }
 220
 221   /* If we get to here, then the inner loop was never entered;
 222    * the outer loop has completed, confirming an all zero header;
 223    * return zero, to indicate end of archive.
 224    */
 225   return 0;
 226 }
 227
 228 int pkgTarArchiveProcessor::Process()
 229 {
 230   /* Generic method for reading tar archives, and extracting their
 231    * content; loops over each archive entry in turn...
 232    */
 233   while( GetArchiveEntry() > 0 )
 234   {
 235     char *prefix = *header.field.prefix ? header.field.prefix : NULL;
 236     char *name = header.field.name;
 237
 238     /* Handle the GNU long name header format.
 239      * If the pathname overflows the name field, GNU tar creates a special
 240      * entry type, where the data contains the full pathname for the
 241      * following entry.
 242      */
 243     char *longname = NULL;
 244     if( *header.field.typeflag == TAR_ENTITY_TYPE_GNU_LONGNAME )
 245     {
 246       /* Extract the full pathname from the data of this entry.
 247        */
 248       longname = EntityDataAsString();
 249       if( !longname )
 250         dmh_notify( DMH_ERROR, "Unable to read a long name entry\n" );
 251
 252       /* Read the entry for which this long name is intended.
 253        */
 254       if( GetArchiveEntry() <= 0 )
 255         dmh_notify( DMH_ERROR, "Expected a new entry after a long name entry\n" );
 256
 257       /* Use the previously determined long name as the pathname for this entry.
 258        */
 259       prefix = NULL;
 260       name = longname;
 261     }
 262
 263     /* Found an archive entry; map it to an equivalent file system
 264      * path name, within the designated sysroot hierarchy.
 265      */
 266     char pathname[mkpath( NULL, sysroot_path, name, prefix )];
 267     mkpath( pathname, sysroot_path, name, prefix );
 268
 269     free( longname );
 270
 271     /* Direct further processing to the appropriate handler; (this
 272      * is specific to the archive entry classification)...
 273      */
 274     switch( *header.field.typeflag )
 275     {
 276       int status;
 277
 278       case TAR_ENTITY_TYPE_DIRECTORY:
 279         /*
 280          * We may need to take some action in respect of directories;
 281          * e.g. we may need to create a directory, or even a sequence
 282          * of directories, to establish a location within the sysroot
 283          * hierarchy...
 284          */
 285          { /* Note: Microsoft's implementation of stat() appears to choke
 286             * on directory path names with trailing slashes; thus, before
 287             * we invoke the directory processing routine, (which may need
 288             * to call stat(), to check if the specified directory already
 289             * exists), we remove any such trailing slashes.
 290             */
 291            char *p = pathname + sizeof( pathname ) - 1;
 292            while( (p > pathname) && ((*--p == '/') || (*p == '\\')) )
 293              *p = '\0';
 294          }
 295
 296         /* We are now ready to process the directory path name entry...
 297          */
 298         status = ProcessDirectory( pathname );
 299         break;
 300
 301       case TAR_ENTITY_TYPE_LINK:
 302       case TAR_ENTITY_TYPE_SYMLINK:
 303         /*
 304          * Links ultimately represent file system entities in
 305          * our sysroot hierarchy, but we need special processing
 306          * to handle them correctly...
 307          *
 308          */
 309         status = ProcessLinkedEntity( pathname );
 310         break;
 311
 312       case TAR_ENTITY_TYPE_FILE:
 313       case TAR_ENTITY_TYPE_ALTFILE:
 314         /*
 315          * These represent regular files; the file content is
 316          * embedded within the archive stream, so we need to be
 317          * prepared to read or copy it, as appropriate...
 318          *
 319          */
 320         ProcessDataStream( pathname );
 321         break;
 322
 323       default:
 324         /* FIXME: we make no provision for handling any other
 325          * type of archive entry; we should provide some more
 326          * robust error handling, but for now we simply emit
 327          * a diagnostic, and return an error condition code...
 328          *
 329          */
 330         dmh_notify( DMH_ERROR,
 331             "unexpected archive entry classification: type %d\n",
 332             (int)(*header.field.typeflag)
 333           );
 334         return -1;
 335     }
 336   }
 337   /* If we didn't bail out before getting to here, then the archive
 338    * was processed successfully; return the success code.
 339    */
 340   return 0;
 341 }
 342
 343 int pkgTarArchiveProcessor::ProcessEntityData( int fd )
 344 {
 345   /* Generic method for reading past the data associated with
 346    * a specific header within a tar archive; if given a negative
 347    * value for `fd', it will simply skip over the data, otherwise
 348    * `fd' is assumed to represent a descriptor for an opened file
 349    * stream, to which the data will be copied (extracted).
 350    */
 351    int status = 0;
 352
 353   /* Initialise a counter for the length of the data content, and
 354    * specify the default size for the transfer buffer in which to
 355    * process it; make the initial size of the transfer buffer 16
 356    * times the header size.
 357    */
 358   uint64_t bytes_to_copy = octval( header.field.size );
 359   size_t block_size = sizeof( header ) << 4;
 360
 361   /* While we still have unread data, and no processing error...
 362    */
 363   while( (bytes_to_copy > 0) && (status == 0) )
 364   {
 365     /* Adjust the requested size for the transfer buffer, shrinking
 366      * it by 50% at each step, until it is smaller than the remaining
 367      * data length, but never smaller than the header record length.
 368      */
 369     while( (bytes_to_copy < block_size) && (block_size > sizeof( header )) )
 370       block_size >>= 1;
 371
 372     /* Allocate a transfer buffer of the requested size, and populate
 373      * it, by reading data from the archive; (since the transfer buffer
 374      * is never smaller than the header length, this will also capture
 375      * any additional padding bytes, which may be required to keep the
 376      * data length equal to an exact multiple of the header length).
 377      */
 378     char buffer[block_size];
 379     if( stream->Read( buffer, block_size ) < (int)(block_size) )
 380       /*
 381        * Failure to fully populate the transfer buffer, (i.e. a short
 382        * read), indicates a corrupt archive; bail out immediately.
 383        */
 384       return -1;
 385
 386     /* When the number of actual data bytes expected is fewer than the
 387      * total number of bytes in the transfer buffer...
 388      */
 389     if( bytes_to_copy < block_size )
 390       /*
 391        * ...then we have reached the end of the data for the current
 392        * archived entity; adjust the block size to reflect the number
 393        * of actual data bytes present in the transfer buffer...
 394        */
 395       block_size = bytes_to_copy;
 396
 397     /* With the number of actual data bytes present now accurately
 398      * reflected by the block size, we save that data to the stream
 399      * specified for archive extraction, (if any).
 400      */
 401     if( (fd >= 0) && (write( fd, buffer, block_size ) != (int)(block_size)) )
 402       /*
 403        * An extraction error occurred; set the status code to
 404        * indicate failure.
 405        */
 406       status = -2;
 407
 408     /* Adjust the count of remaining unprocessed data bytes, and begin
 409      * a new processing cycle, to capture any which may be present.
 410      */
 411     bytes_to_copy -= block_size;
 412   }
 413
 414   /* Finally, when all data for the current archive entry has been
 415    * processed, we return to the caller with an appropriate completion
 416    * status code.
 417    */
 418   return status;
 419 }
 420
 421 char *pkgTarArchiveProcessor::EntityDataAsString()
 422 {
 423   /* Read the data associated with a specific header within a tar archive
 424    * and return it as a string.  The return value is stored in memory which
 425    * is allocated by malloc; it should be freed when no longer required.
 426    *
 427    * It is assumed that the return data can be accommodated within available
 428    * heap memory.  Since the length isn't returned, we assume that the string
 429    * is NUL-terminated, and that it contains no embedded NULs.
 430    *
 431    * In the event of any error, NULL is returned.
 432    */
 433   char *data;
 434   uint64_t bytes_to_copy = octval( header.field.size );
 435
 436   /* Round the buffer size to the smallest multiple of the record size.
 437    */
 438   bytes_to_copy += sizeof( header ) - 1;
 439   bytes_to_copy -= bytes_to_copy % sizeof( header );
 440
 441   /* Allocate the data buffer.
 442    */
 443   data = (char*)(malloc( bytes_to_copy ));
 444   if( !data )
 445     return NULL;
 446
 447   /* Read the data into the buffer.
 448    */
 449   size_t count = stream->Read( data, bytes_to_copy );
 450   if( count < bytes_to_copy )
 451   {
 452     /* Failure to fully populate the transfer buffer, (i.e. a short
 453      * read), indicates a corrupt archive.
 454      */
 455     free( data );
 456     return NULL;
 457   }
 458   return data;
 459 }
 460
 461 /*******************
 462  *
 463  * Class Implementation: pkgTarArchiveInstaller
 464  *
 465  */
 466 #include <utime.h>
 467
 468 static int commit_saved_entity( const char *pathname, time_t mtime )
 469 {
 470   /* Helper to set the access and modification times for a file,
 471    * after extraction from an archive, to match the specified "mtime";
 472    * (typically "mtime" is as recorded within the archive).
 473    */
 474   struct utimbuf timestamp;
 475
 476   timestamp.actime = timestamp.modtime = mtime;
 477   return utime( pathname, &timestamp );
 478 }
 479
 480 pkgTarArchiveInstaller::
 481 pkgTarArchiveInstaller( pkgXmlNode *pkg ):pkgTarArchiveProcessor( pkg )
 482 {
 483   /* Constructor: having successfully set up the pkgTarArchiveProcessor
 484    * base class, we attach a pkgManifest to track the installation.
 485    */
 486   if( (tarname != NULL) && (sysroot != NULL) && stream->IsReady() )
 487     installed = new pkgManifest( package_key, tarname );
 488 }
 489
 490 int pkgTarArchiveInstaller::Process()
 491 {
 492   /* Specialisation of the base class Process() method.
 493    */
 494   int status;
 495   /* First, process the archive as for the base class...
 496    */
 497   if( (status = pkgTarArchiveProcessor::Process()) == 0 )
 498   {
 499     /* ...then, on successful completion...
 500      *
 501      * Update the package installation manifest, to record
 502      * the installation in the current sysroot...
 503      */
 504     installed->BindSysRoot( sysroot, package_key );
 505     pkgRegister( sysroot, origin, tarname, pkgfile );
 506   }
 507   return status;
 508 }
 509
 510 int pkgTarArchiveInstaller::ProcessDirectory( const char *pathname )
 511 {
 512   /* Create the directory infrastructure required to support
 513    * a specific package installation.
 514    */
 515 #if DEBUGLEVEL & DEBUG_SUPPRESS_INSTALLATION
 516   /*
 517    * Debugging stub...
 518    * FIXME:maybe adapt for 'dry-run' or 'verbose' use.
 519    */
 520   int status = 0;
 521   dmh_printf(
 522       "FIXME:ProcessDirectory<stub>:not executing: mkdir -p %s\n",
 523        pathname
 524     );
 525 # if DEBUGLEVEL & DEBUG_UPDATE_INVENTORY
 526   /*
 527    * Although no installation directory has actually been created,
 528    * update the inventory to simulate the effect of doing so.
 529    */
 530   installed->AddEntry( dirname_key, pathname + sysroot_len );
 531 # endif
 532
 533 #else
 534   int status;
 535   if( (status = mkdir_recursive( pathname, 0755 )) == 0 )
 536     /*
 537      * Either the specified directory already exists,
 538      * or we just successfully created it; attach a reference
 539      * in the installation manifest for the current package.
 540      */
 541     installed->AddEntry( dirname_key, pathname + sysroot_len );
 542
 543   else
 544     /* A required subdirectory could not be created;
 545      * diagnose this failure.
 546      */
 547     dmh_notify( DMH_ERROR, "cannot create directory `%s'\n", pathname );
 548 #endif
 549
 550   return status;
 551 }
 552
 553 int pkgTarArchiveInstaller::ProcessDataStream( const char *pathname )
 554 {
 555   /* Extract file data from the archive, and copy it to the
 556    * associated target file stream, if any.
 557    */
 558 #if DEBUGLEVEL & DEBUG_SUPPRESS_INSTALLATION
 559   /*
 560    * Debugging stub...
 561    * FIXME:maybe adapt for 'dry-run' or 'verbose' use.
 562    */
 563   dmh_printf(
 564       "FIXME:ProcessDataStream<stub>:not extracting: %s\n",
 565       pathname
 566     );
 567 # if DEBUGLEVEL & DEBUG_UPDATE_INVENTORY
 568   /*
 569    * Although no file has actually been installed, update
 570    * the inventory to simulate the effect of doing so.
 571    */
 572   installed->AddEntry( filename_key, pathname + sysroot_len );
 573 # endif
 574   return ProcessEntityData( -1 );
 575
 576 #else
 577   int fd = set_output_stream( pathname, octval( header.field.mode ) );
 578   int status = ProcessEntityData( fd );
 579   if( fd >= 0 )
 580   {
 581     /* File stream was written; close it...
 582      */
 583     close( fd );
 584     if( status == 0 )
 585     {
 586       /* ...and on successful completion, commit it and
 587        * record it in the installation database.
 588        */
 589       commit_saved_entity( pathname, octval( header.field.mtime ) );
 590       installed->AddEntry( filename_key, pathname + sysroot_len );
 591     }
 592
 593     else
 594     { /* The target file was not successfully and completely
 595        * written; discard it, and diagnose failure.
 596        */
 597       unlink( pathname );
 598       dmh_notify( DMH_ERROR, "%s: extraction failed\n", pathname );
 599     }
 600   }
 601   return status;
 602 #endif
 603 }
 604
 605 /* $RCSfile$: end of file */