6 * Written by Keith Marshall <keithmarshall@users.sourceforge.net>
7 * Copyright (C) 2009, 2010, MinGW Project
10 * Implementation of package archive processing methods, for reading
11 * and extracting content from tar archives; provides implementations
12 * for each of the pkgTarArchiveProcessor and pkgTarArchiveInstaller
16 * This is free software. Permission is granted to copy, modify and
17 * redistribute this software, under the provisions of the GNU General
18 * Public License, Version 3, (or, at your option, any later version),
19 * as published by the Free Software Foundation; see the file COPYING
20 * for licensing details.
22 * Note, in particular, that this software is provided "as is", in the
23 * hope that it may prove useful, but WITHOUT WARRANTY OF ANY KIND; not
24 * even an implied WARRANTY OF MERCHANTABILITY, nor of FITNESS FOR ANY
25 * PARTICULAR PURPOSE. Under no circumstances will the author, or the
26 * MinGW Project, accept liability for any damages, however caused,
27 * arising from the use of this software.
33 #include <sys/types.h>
48 * Class Implementation: pkgTarArchiveProcessor
52 pkgTarArchiveProcessor::pkgTarArchiveProcessor( pkgXmlNode *pkg )
54 /* Constructor to associate a package tar archive with its
55 * nominated sysroot and respective installation directory path,
56 * and prepare it for processing, using an appropriate streaming
57 * decompression filter; (choice of filter is based on archive
58 * file name extension; file names are restricted to the
59 * POSIX Portable Character Set).
61 * First, we anticipate an invalid initialisation state...
70 /* The 'pkg' XML database entry must be non-NULL, must
71 * represent a package release, and must specify a canonical
72 * tarname to identify the package...
74 if( ((origin = pkg) != NULL) && pkg->IsElementOfType( release_key )
75 && ((tarname = pkg->GetPropVal( tarname_key, NULL )) != NULL) )
77 /* When these pre-conditions are satisfied, we may proceed
78 * to identify and locate the sysroot record with which this
79 * package is to be associated...
81 pkgSpecs lookup( pkgfile = tarname );
82 if( (sysroot = pkg->GetSysRoot( lookup.GetSubSystemName() )) != NULL )
84 /* Having located the requisite sysroot record, we may
85 * retrieve its specified installation path prefix...
88 if( (prefix = sysroot->GetPropVal( pathname_key, NULL )) != NULL )
90 /* ...and incorporate it into a formatting template
91 * for use in deriving the full path names for files
92 * which are installed from this package.
94 const char *template_format = "%F%%/M/%%F";
95 char template_text[mkpath( NULL, template_format, prefix, NULL )];
96 mkpath( template_text, template_format, prefix, NULL );
97 sysroot_len = mkpath( NULL, template_text, "", NULL ) - 1;
98 sysroot_path = strdup( template_text );
101 /* Some older packages don't use the canonical tarname
102 * for the archive file name; identify the real file name
103 * associated with such packages...
105 pkgfile = pkg->ArchiveName();
107 /* Finally, initialise the data stream which we will use
108 * for reading the package content.
110 const char *archive_path_template = pkgArchivePath();
111 char archive_path_name[mkpath( NULL, archive_path_template, pkgfile, NULL )];
112 mkpath( archive_path_name, archive_path_template, pkgfile, NULL );
113 stream = pkgOpenArchiveStream( archive_path_name );
117 pkgTarArchiveProcessor::~pkgTarArchiveProcessor()
119 /* Destructor must release the heap memory allocated in
120 * the constructor, (by strdup and pkgManifest), clean up
121 * the decompression filter state, and close the archive
124 free( (void *)(sysroot_path) );
129 int pkgTarArchiveProcessor::ProcessLinkedEntity( const char *pathname )
131 /* FIXME: Win32 links need special handling; for hard links, we
132 * may be able to create them directly, with >= Win2K and NTFS;
133 * for symlinks on *all* Win32 variants, and for hard links on
134 * FAT32 or Win9x, we need to make physical copies of the source
135 * file, at the link target location.
137 * For now, we simply ignore links.
140 "FIXME:ProcessLinkedEntity<stub>:Ignoring link: %s --> %s\n",
141 pathname, header.field.linkname
147 uint64_t compute_octval( const char *p, size_t len )
148 # define octval( FIELD ) compute_octval( FIELD, sizeof( FIELD ) )
150 /* Helper to convert the ASCII representation of octal values,
151 * (as recorded within tar archive header fields), to their actual
152 * numeric values, ignoring leading or trailing garbage.
154 uint64_t value = 0LL;
156 while( (len > 0) && ((*p < '0') || (*p > '7')) )
158 /* Step over leading garbage.
162 while( (len > 0) && (*p >= '0') && (*p < '8') )
164 /* Accumulate octal digits; (each represents exactly three
165 * bits in the accumulated value), until we either exhaust
166 * the width of the field, or we encounter trailing junk.
168 value = (value << 3) + *p++ - '0'; --len;
173 int pkgTarArchiveProcessor::GetArchiveEntry()
175 /* Read header for next available entry in the tar archive;
176 * check for end-of-archive mark, (all zero header); verify
177 * checksum for active entry.
179 char *buf = header.aggregate;
180 size_t count = stream->Read( buf, sizeof( header ) );
182 if( count < sizeof( header ) )
184 /* Failed to read a complete header; return error code.
191 * Outer loop checks for an all zero header...
195 /* Any non-zero byte transfers control to an inner loop,
196 * to rescan the entire header, accumulating its checksum...
199 for( buf = header.aggregate, count = sizeof( header ); count--; ++buf )
201 if( (buf < header.field.chksum) || (buf >= header.field.typeflag) )
203 * ...counting the actual binary value of each byte,
204 * in all but the checksum field itself...
208 /* ...while treating each byte within the checksum field as
209 * having an effective value equivalent to ASCII <space>.
213 /* After computing the checksum for a non-zero header,
214 * verify it against the value recorded in the checksum field;
215 * return +1 for a successful match, or -2 for failure.
217 return (sum == octval( header.field.chksum )) ? 1 : -2;
220 /* If we get to here, then the inner loop was never entered;
221 * the outer loop has completed, confirming an all zero header;
222 * return zero, to indicate end of archive.
227 int pkgTarArchiveProcessor::Process()
229 /* Generic method for reading tar archives, and extracting their
230 * content; loops over each archive entry in turn...
232 while( GetArchiveEntry() > 0 )
234 char *prefix = *header.field.prefix ? header.field.prefix : NULL;
235 char *name = header.field.name;
237 /* Handle the GNU long name header format.
238 * If the pathname overflows the name field, GNU tar creates a special
239 * entry type, where the data contains the full pathname for the
242 char *longname = NULL;
243 if( *header.field.typeflag == TAR_ENTITY_TYPE_GNU_LONGNAME )
245 /* Extract the full pathname from the data of this entry.
247 longname = EntityDataAsString();
249 dmh_notify( DMH_ERROR, "Unable to read a long name entry\n" );
251 /* Read the entry for which this long name is intended.
253 if( GetArchiveEntry() <= 0 )
254 dmh_notify( DMH_ERROR, "Expected a new entry after a long name entry\n" );
256 /* Use the previously determined long name as the pathname for this entry.
262 /* Found an archive entry; map it to an equivalent file system
263 * path name, within the designated sysroot hierarchy.
265 char pathname[mkpath( NULL, sysroot_path, name, prefix )];
266 mkpath( pathname, sysroot_path, name, prefix );
270 /* Direct further processing to the appropriate handler; (this
271 * is specific to the archive entry classification)...
273 switch( *header.field.typeflag )
277 case TAR_ENTITY_TYPE_DIRECTORY:
279 * We may need to take some action in respect of directories;
280 * e.g. we may need to create a directory, or even a sequence
281 * of directories, to establish a location within the sysroot
284 { /* Note: Microsoft's implementation of stat() appears to choke
285 * on directory path names with trailing slashes; thus, before
286 * we invoke the directory processing routine, (which may need
287 * to call stat(), to check if the specified directory already
288 * exists), we remove any such trailing slashes.
290 char *p = pathname + sizeof( pathname ) - 1;
291 while( (p > pathname) && ((*--p == '/') || (*p == '\\')) )
295 /* We are now ready to process the directory path name entry...
297 status = ProcessDirectory( pathname );
300 case TAR_ENTITY_TYPE_LINK:
301 case TAR_ENTITY_TYPE_SYMLINK:
303 * Links ultimately represent file system entities in
304 * our sysroot hierarchy, but we need special processing
305 * to handle them correctly...
308 status = ProcessLinkedEntity( pathname );
311 case TAR_ENTITY_TYPE_FILE:
312 case TAR_ENTITY_TYPE_ALTFILE:
314 * These represent regular files; the file content is
315 * embedded within the archive stream, so we need to be
316 * prepared to read or copy it, as appropriate...
319 ProcessDataStream( pathname );
323 /* FIXME: we make no provision for handling any other
324 * type of archive entry; we should provide some more
325 * robust error handling, but for now we simply emit
326 * a diagnostic, and return an error condition code...
329 dmh_notify( DMH_ERROR,
330 "unexpected archive entry classification: type %d\n",
331 (int)(*header.field.typeflag)
336 /* If we didn't bail out before getting to here, then the archive
337 * was processed successfully; return the success code.
342 int pkgTarArchiveProcessor::ProcessEntityData( int fd )
344 /* Generic method for reading past the data associated with
345 * a specific header within a tar archive; if given a negative
346 * value for `fd', it will simply skip over the data, otherwise
347 * `fd' is assumed to represent a descriptor for an opened file
348 * stream, to which the data will be copied (extracted).
352 /* Initialise a counter for the length of the data content, and
353 * specify the default size for the transfer buffer in which to
354 * process it; make the initial size of the transfer buffer 16
355 * times the header size.
357 uint64_t bytes_to_copy = octval( header.field.size );
358 size_t block_size = sizeof( header ) << 4;
360 /* While we still have unread data, and no processing error...
362 while( (bytes_to_copy > 0) && (status == 0) )
364 /* Adjust the requested size for the transfer buffer, shrinking
365 * it by 50% at each step, until it is smaller than the remaining
366 * data length, but never smaller than the header record length.
368 while( (bytes_to_copy < block_size) && (block_size > sizeof( header )) )
371 /* Allocate a transfer buffer of the requested size, and populate
372 * it, by reading data from the archive; (since the transfer buffer
373 * is never smaller than the header length, this will also capture
374 * any additional padding bytes, which may be required to keep the
375 * data length equal to an exact multiple of the header length).
377 char buffer[block_size];
378 if( stream->Read( buffer, block_size ) < (int)(block_size) )
380 * Failure to fully populate the transfer buffer, (i.e. a short
381 * read), indicates a corrupt archive; bail out immediately.
385 /* When the number of actual data bytes expected is fewer than the
386 * total number of bytes in the transfer buffer...
388 if( bytes_to_copy < block_size )
390 * ...then we have reached the end of the data for the current
391 * archived entity; adjust the block size to reflect the number
392 * of actual data bytes present in the transfer buffer...
394 block_size = bytes_to_copy;
396 /* With the number of actual data bytes present now accurately
397 * reflected by the block size, we save that data to the stream
398 * specified for archive extraction, (if any).
400 if( (fd >= 0) && (write( fd, buffer, block_size ) != (int)(block_size)) )
402 * An extraction error occurred; set the status code to
407 /* Adjust the count of remaining unprocessed data bytes, and begin
408 * a new processing cycle, to capture any which may be present.
410 bytes_to_copy -= block_size;
413 /* Finally, when all data for the current archive entry has been
414 * processed, we return to the caller with an appropriate completion
420 char *pkgTarArchiveProcessor::EntityDataAsString()
422 /* Read the data associated with a specific header within a tar archive
423 * and return it as a string. The return value is stored in memory which
424 * is allocated by malloc; it should be freed when no longer required.
426 * It is assumed that the return data can be accommodated within available
427 * heap memory. Since the length isn't returned, we assume that the string
428 * is NUL-terminated, and that it contains no embedded NULs.
430 * In the event of any error, NULL is returned.
433 uint64_t bytes_to_copy = octval( header.field.size );
435 /* Round the buffer size to the smallest multiple of the record size.
437 bytes_to_copy += sizeof( header ) - 1;
438 bytes_to_copy -= bytes_to_copy % sizeof( header );
440 /* Allocate the data buffer.
442 data = (char*)(malloc( bytes_to_copy ));
446 /* Read the data into the buffer.
448 size_t count = stream->Read( data, bytes_to_copy );
449 if( count < bytes_to_copy )
451 /* Failure to fully populate the transfer buffer, (i.e. a short
452 * read), indicates a corrupt archive.
462 * Class Implementation: pkgTarArchiveInstaller
467 static int commit_saved_entity( const char *pathname, time_t mtime )
469 /* Helper to set the access and modification times for a file,
470 * after extraction from an archive, to match the specified "mtime";
471 * (typically "mtime" is as recorded within the archive).
473 struct utimbuf timestamp;
475 timestamp.actime = timestamp.modtime = mtime;
476 return utime( pathname, ×tamp );
479 pkgTarArchiveInstaller::
480 pkgTarArchiveInstaller( pkgXmlNode *pkg ):pkgTarArchiveProcessor( pkg )
482 /* Constructor: having successfully set up the pkgTarArchiveProcessor
483 * base class, we attach a pkgManifest to track the installation.
485 if( (tarname != NULL) && (sysroot != NULL) && stream->IsReady() )
486 installed = new pkgManifest( package_key, tarname );
489 int pkgTarArchiveInstaller::Process()
491 /* Specialisation of the base class Process() method.
494 /* First, process the archive as for the base class...
496 if( (status = pkgTarArchiveProcessor::Process()) == 0 )
498 /* ...then, on successful completion...
500 * Update the package installation manifest, to record
501 * the installation in the current sysroot...
503 installed->BindSysRoot( sysroot, package_key );
504 pkgRegister( sysroot, origin, tarname, pkgfile );
509 int pkgTarArchiveInstaller::ProcessDirectory( const char *pathname )
511 /* Create the directory infrastructure required to support
512 * a specific package installation.
517 if( (status = mkdir_recursive( pathname, 0755 )) == 0 )
519 * Either the specified directory already exists,
520 * or we just successfully created it; attach a reference
521 * in the installation manifest for the current package.
523 installed->AddEntry( dirname_key, pathname + sysroot_len );
526 /* A required subdirectory could not be created;
527 * diagnose this failure.
529 dmh_notify( DMH_ERROR, "cannot create directory `%s'\n", pathname );
534 * FIXME:maybe adapt for 'dry-run' or 'verbose' use.
539 "FIXME:ProcessDirectory<stub>:not executing: mkdir -p %s\n",
543 installed->AddEntry( dirname_key, pathname + sysroot_len );
549 int pkgTarArchiveInstaller::ProcessDataStream( const char *pathname )
551 /* Extract file data from the archive, and copy it to the
552 * associated target file stream, if any.
555 int fd = set_output_stream( pathname, octval( header.field.mode ) );
556 int status = ProcessEntityData( fd );
559 /* File stream was written; close it...
564 /* ...and on successful completion, commit it and
565 * record it in the installation database.
567 commit_saved_entity( pathname, octval( header.field.mtime ) );
568 installed->AddEntry( filename_key, pathname + sysroot_len );
573 /* The target file was not successfully and completely
574 * written; discard it, and diagnose failure.
577 dmh_notify( DMH_ERROR, "%s: extraction failed\n", pathname );
585 * FIXME:maybe adapt for 'dry-run' or 'verbose' use.
588 "FIXME:ProcessDataStream<stub>:not extracting: %s\n",
592 installed->AddEntry( filename_key, pathname + sysroot_len );
594 return ProcessEntityData( -1 );
598 /* $RCSfile$: end of file */