venv/Lib/site-packages/pip/_vendor/distlib/util.py

   1 #
   2 # Copyright (C) 2012-2016 The Python Software Foundation.
   3 # See LICENSE.txt and CONTRIBUTORS.txt.
   4 #
   5 import codecs
   6 from collections import deque
   7 import contextlib
   8 import csv
   9 from glob import iglob as std_iglob
  10 import io
  11 import json
  12 import logging
  13 import os
  14 import py_compile
  15 import re
  16 import shutil
  17 import socket
  18 try:
  19     import ssl
  20 except ImportError:  # pragma: no cover
  21     ssl = None
  22 import subprocess
  23 import sys
  24 import tarfile
  25 import tempfile
  26 import textwrap
  27
  28 try:
  29     import threading
  30 except ImportError:  # pragma: no cover
  31     import dummy_threading as threading
  32 import time
  33
  34 from . import DistlibException
  35 from .compat import (string_types, text_type, shutil, raw_input, StringIO,
  36                      cache_from_source, urlopen, urljoin, httplib, xmlrpclib,
  37                      splittype, HTTPHandler, BaseConfigurator, valid_ident,
  38                      Container, configparser, URLError, ZipFile, fsdecode,
  39                      unquote)
  40
  41 logger = logging.getLogger(__name__)
  42
  43 #
  44 # Requirement parsing code for name + optional constraints + optional extras
  45 #
  46 # e.g. 'foo >= 1.2, < 2.0 [bar, baz]'
  47 #
  48 # The regex can seem a bit hairy, so we build it up out of smaller pieces
  49 # which are manageable.
  50 #
  51
  52 COMMA = r'\s*,\s*'
  53 COMMA_RE = re.compile(COMMA)
  54
  55 IDENT = r'(\w|[.-])+'
  56 EXTRA_IDENT = r'(\*|:(\*|\w+):|' + IDENT + ')'
  57 VERSPEC = IDENT + r'\*?'
  58
  59 RELOP = '([<>=!~]=)|[<>]'
  60
  61 #
  62 # The first relop is optional - if absent, will be taken as '~='
  63 #
  64 BARE_CONSTRAINTS = ('(' + RELOP + r')?\s*(' + VERSPEC + ')(' + COMMA + '(' +
  65                     RELOP + r')\s*(' + VERSPEC + '))*')
  66
  67 DIRECT_REF = '(from\s+(?P<diref>.*))'
  68
  69 #
  70 # Either the bare constraints or the bare constraints in parentheses
  71 #
  72 CONSTRAINTS = (r'\(\s*(?P<c1>' + BARE_CONSTRAINTS + '|' + DIRECT_REF +
  73                r')\s*\)|(?P<c2>' + BARE_CONSTRAINTS + '\s*)')
  74
  75 EXTRA_LIST = EXTRA_IDENT + '(' + COMMA + EXTRA_IDENT + ')*'
  76 EXTRAS = r'\[\s*(?P<ex>' + EXTRA_LIST + r')?\s*\]'
  77 REQUIREMENT = ('(?P<dn>'  + IDENT + r')\s*(' + EXTRAS + r'\s*)?(\s*' +
  78                CONSTRAINTS + ')?$')
  79 REQUIREMENT_RE = re.compile(REQUIREMENT)
  80
  81 #
  82 # Used to scan through the constraints
  83 #
  84 RELOP_IDENT = '(?P<op>' + RELOP + r')\s*(?P<vn>' + VERSPEC + ')'
  85 RELOP_IDENT_RE = re.compile(RELOP_IDENT)
  86
  87 def parse_requirement(s):
  88
  89     def get_constraint(m):
  90         d = m.groupdict()
  91         return d['op'], d['vn']
  92
  93     result = None
  94     m = REQUIREMENT_RE.match(s)
  95     if m:
  96         d = m.groupdict()
  97         name = d['dn']
  98         cons = d['c1'] or d['c2']
  99         if not d['diref']:
 100             url = None
 101         else:
 102             # direct reference
 103             cons = None
 104             url = d['diref'].strip()
 105         if not cons:
 106             cons = None
 107             constr = ''
 108             rs = d['dn']
 109         else:
 110             if cons[0] not in '<>!=':
 111                 cons = '~=' + cons
 112             iterator = RELOP_IDENT_RE.finditer(cons)
 113             cons = [get_constraint(m) for m in iterator]
 114             rs = '%s (%s)' % (name, ', '.join(['%s %s' % con for con in cons]))
 115         if not d['ex']:
 116             extras = None
 117         else:
 118             extras = COMMA_RE.split(d['ex'])
 119         result = Container(name=name, constraints=cons, extras=extras,
 120                            requirement=rs, source=s, url=url)
 121     return result
 122
 123
 124 def get_resources_dests(resources_root, rules):
 125     """Find destinations for resources files"""
 126
 127     def get_rel_path(base, path):
 128         # normalizes and returns a lstripped-/-separated path
 129         base = base.replace(os.path.sep, '/')
 130         path = path.replace(os.path.sep, '/')
 131         assert path.startswith(base)
 132         return path[len(base):].lstrip('/')
 133
 134
 135     destinations = {}
 136     for base, suffix, dest in rules:
 137         prefix = os.path.join(resources_root, base)
 138         for abs_base in iglob(prefix):
 139             abs_glob = os.path.join(abs_base, suffix)
 140             for abs_path in iglob(abs_glob):
 141                 resource_file = get_rel_path(resources_root, abs_path)
 142                 if dest is None:  # remove the entry if it was here
 143                     destinations.pop(resource_file, None)
 144                 else:
 145                     rel_path = get_rel_path(abs_base, abs_path)
 146                     rel_dest = dest.replace(os.path.sep, '/').rstrip('/')
 147                     destinations[resource_file] = rel_dest + '/' + rel_path
 148     return destinations
 149
 150
 151 def in_venv():
 152     if hasattr(sys, 'real_prefix'):
 153         # virtualenv venvs
 154         result = True
 155     else:
 156         # PEP 405 venvs
 157         result = sys.prefix != getattr(sys, 'base_prefix', sys.prefix)
 158     return result
 159
 160
 161 def get_executable():
 162 # The __PYVENV_LAUNCHER__ dance is apparently no longer needed, as
 163 # changes to the stub launcher mean that sys.executable always points
 164 # to the stub on macOS
 165 #    if sys.platform == 'darwin' and ('__PYVENV_LAUNCHER__'
 166 #                                     in os.environ):
 167 #        result =  os.environ['__PYVENV_LAUNCHER__']
 168 #    else:
 169 #        result = sys.executable
 170 #    return result
 171     result = os.path.normcase(sys.executable)
 172     if not isinstance(result, text_type):
 173         result = fsdecode(result)
 174     return result
 175
 176
 177 def proceed(prompt, allowed_chars, error_prompt=None, default=None):
 178     p = prompt
 179     while True:
 180         s = raw_input(p)
 181         p = prompt
 182         if not s and default:
 183             s = default
 184         if s:
 185             c = s[0].lower()
 186             if c in allowed_chars:
 187                 break
 188             if error_prompt:
 189                 p = '%c: %s\n%s' % (c, error_prompt, prompt)
 190     return c
 191
 192
 193 def extract_by_key(d, keys):
 194     if isinstance(keys, string_types):
 195         keys = keys.split()
 196     result = {}
 197     for key in keys:
 198         if key in d:
 199             result[key] = d[key]
 200     return result
 201
 202 def read_exports(stream):
 203     if sys.version_info[0] >= 3:
 204         # needs to be a text stream
 205         stream = codecs.getreader('utf-8')(stream)
 206     # Try to load as JSON, falling back on legacy format
 207     data = stream.read()
 208     stream = StringIO(data)
 209     try:
 210         jdata = json.load(stream)
 211         result = jdata['extensions']['python.exports']['exports']
 212         for group, entries in result.items():
 213             for k, v in entries.items():
 214                 s = '%s = %s' % (k, v)
 215                 entry = get_export_entry(s)
 216                 assert entry is not None
 217                 entries[k] = entry
 218         return result
 219     except Exception:
 220         stream.seek(0, 0)
 221
 222     def read_stream(cp, stream):
 223         if hasattr(cp, 'read_file'):
 224             cp.read_file(stream)
 225         else:
 226             cp.readfp(stream)
 227
 228     cp = configparser.ConfigParser()
 229     try:
 230         read_stream(cp, stream)
 231     except configparser.MissingSectionHeaderError:
 232         stream.close()
 233         data = textwrap.dedent(data)
 234         stream = StringIO(data)
 235         read_stream(cp, stream)
 236
 237     result = {}
 238     for key in cp.sections():
 239         result[key] = entries = {}
 240         for name, value in cp.items(key):
 241             s = '%s = %s' % (name, value)
 242             entry = get_export_entry(s)
 243             assert entry is not None
 244             #entry.dist = self
 245             entries[name] = entry
 246     return result
 247
 248
 249 def write_exports(exports, stream):
 250     if sys.version_info[0] >= 3:
 251         # needs to be a text stream
 252         stream = codecs.getwriter('utf-8')(stream)
 253     cp = configparser.ConfigParser()
 254     for k, v in exports.items():
 255         # TODO check k, v for valid values
 256         cp.add_section(k)
 257         for entry in v.values():
 258             if entry.suffix is None:
 259                 s = entry.prefix
 260             else:
 261                 s = '%s:%s' % (entry.prefix, entry.suffix)
 262             if entry.flags:
 263                 s = '%s [%s]' % (s, ', '.join(entry.flags))
 264             cp.set(k, entry.name, s)
 265     cp.write(stream)
 266
 267
 268 @contextlib.contextmanager
 269 def tempdir():
 270     td = tempfile.mkdtemp()
 271     try:
 272         yield td
 273     finally:
 274         shutil.rmtree(td)
 275
 276 @contextlib.contextmanager
 277 def chdir(d):
 278     cwd = os.getcwd()
 279     try:
 280         os.chdir(d)
 281         yield
 282     finally:
 283         os.chdir(cwd)
 284
 285
 286 @contextlib.contextmanager
 287 def socket_timeout(seconds=15):
 288     cto = socket.getdefaulttimeout()
 289     try:
 290         socket.setdefaulttimeout(seconds)
 291         yield
 292     finally:
 293         socket.setdefaulttimeout(cto)
 294
 295
 296 class cached_property(object):
 297     def __init__(self, func):
 298         self.func = func
 299         #for attr in ('__name__', '__module__', '__doc__'):
 300         #    setattr(self, attr, getattr(func, attr, None))
 301
 302     def __get__(self, obj, cls=None):
 303         if obj is None:
 304             return self
 305         value = self.func(obj)
 306         object.__setattr__(obj, self.func.__name__, value)
 307         #obj.__dict__[self.func.__name__] = value = self.func(obj)
 308         return value
 309
 310 def convert_path(pathname):
 311     """Return 'pathname' as a name that will work on the native filesystem.
 312
 313     The path is split on '/' and put back together again using the current
 314     directory separator.  Needed because filenames in the setup script are
 315     always supplied in Unix style, and have to be converted to the local
 316     convention before we can actually use them in the filesystem.  Raises
 317     ValueError on non-Unix-ish systems if 'pathname' either starts or
 318     ends with a slash.
 319     """
 320     if os.sep == '/':
 321         return pathname
 322     if not pathname:
 323         return pathname
 324     if pathname[0] == '/':
 325         raise ValueError("path '%s' cannot be absolute" % pathname)
 326     if pathname[-1] == '/':
 327         raise ValueError("path '%s' cannot end with '/'" % pathname)
 328
 329     paths = pathname.split('/')
 330     while os.curdir in paths:
 331         paths.remove(os.curdir)
 332     if not paths:
 333         return os.curdir
 334     return os.path.join(*paths)
 335
 336
 337 class FileOperator(object):
 338     def __init__(self, dry_run=False):
 339         self.dry_run = dry_run
 340         self.ensured = set()
 341         self._init_record()
 342
 343     def _init_record(self):
 344         self.record = False
 345         self.files_written = set()
 346         self.dirs_created = set()
 347
 348     def record_as_written(self, path):
 349         if self.record:
 350             self.files_written.add(path)
 351
 352     def newer(self, source, target):
 353         """Tell if the target is newer than the source.
 354
 355         Returns true if 'source' exists and is more recently modified than
 356         'target', or if 'source' exists and 'target' doesn't.
 357
 358         Returns false if both exist and 'target' is the same age or younger
 359         than 'source'. Raise PackagingFileError if 'source' does not exist.
 360
 361         Note that this test is not very accurate: files created in the same
 362         second will have the same "age".
 363         """
 364         if not os.path.exists(source):
 365             raise DistlibException("file '%r' does not exist" %
 366                                    os.path.abspath(source))
 367         if not os.path.exists(target):
 368             return True
 369
 370         return os.stat(source).st_mtime > os.stat(target).st_mtime
 371
 372     def copy_file(self, infile, outfile, check=True):
 373         """Copy a file respecting dry-run and force flags.
 374         """
 375         self.ensure_dir(os.path.dirname(outfile))
 376         logger.info('Copying %s to %s', infile, outfile)
 377         if not self.dry_run:
 378             msg = None
 379             if check:
 380                 if os.path.islink(outfile):
 381                     msg = '%s is a symlink' % outfile
 382                 elif os.path.exists(outfile) and not os.path.isfile(outfile):
 383                     msg = '%s is a non-regular file' % outfile
 384             if msg:
 385                 raise ValueError(msg + ' which would be overwritten')
 386             shutil.copyfile(infile, outfile)
 387         self.record_as_written(outfile)
 388
 389     def copy_stream(self, instream, outfile, encoding=None):
 390         assert not os.path.isdir(outfile)
 391         self.ensure_dir(os.path.dirname(outfile))
 392         logger.info('Copying stream %s to %s', instream, outfile)
 393         if not self.dry_run:
 394             if encoding is None:
 395                 outstream = open(outfile, 'wb')
 396             else:
 397                 outstream = codecs.open(outfile, 'w', encoding=encoding)
 398             try:
 399                 shutil.copyfileobj(instream, outstream)
 400             finally:
 401                 outstream.close()
 402         self.record_as_written(outfile)
 403
 404     def write_binary_file(self, path, data):
 405         self.ensure_dir(os.path.dirname(path))
 406         if not self.dry_run:
 407             with open(path, 'wb') as f:
 408                 f.write(data)
 409         self.record_as_written(path)
 410
 411     def write_text_file(self, path, data, encoding):
 412         self.ensure_dir(os.path.dirname(path))
 413         if not self.dry_run:
 414             with open(path, 'wb') as f:
 415                 f.write(data.encode(encoding))
 416         self.record_as_written(path)
 417
 418     def set_mode(self, bits, mask, files):
 419         if os.name == 'posix' or (os.name == 'java' and os._name == 'posix'):
 420             # Set the executable bits (owner, group, and world) on
 421             # all the files specified.
 422             for f in files:
 423                 if self.dry_run:
 424                     logger.info("changing mode of %s", f)
 425                 else:
 426                     mode = (os.stat(f).st_mode | bits) & mask
 427                     logger.info("changing mode of %s to %o", f, mode)
 428                     os.chmod(f, mode)
 429
 430     set_executable_mode = lambda s, f: s.set_mode(0o555, 0o7777, f)
 431
 432     def ensure_dir(self, path):
 433         path = os.path.abspath(path)
 434         if path not in self.ensured and not os.path.exists(path):
 435             self.ensured.add(path)
 436             d, f = os.path.split(path)
 437             self.ensure_dir(d)
 438             logger.info('Creating %s' % path)
 439             if not self.dry_run:
 440                 os.mkdir(path)
 441             if self.record:
 442                 self.dirs_created.add(path)
 443
 444     def byte_compile(self, path, optimize=False, force=False, prefix=None):
 445         dpath = cache_from_source(path, not optimize)
 446         logger.info('Byte-compiling %s to %s', path, dpath)
 447         if not self.dry_run:
 448             if force or self.newer(path, dpath):
 449                 if not prefix:
 450                     diagpath = None
 451                 else:
 452                     assert path.startswith(prefix)
 453                     diagpath = path[len(prefix):]
 454             py_compile.compile(path, dpath, diagpath, True)     # raise error
 455         self.record_as_written(dpath)
 456         return dpath
 457
 458     def ensure_removed(self, path):
 459         if os.path.exists(path):
 460             if os.path.isdir(path) and not os.path.islink(path):
 461                 logger.debug('Removing directory tree at %s', path)
 462                 if not self.dry_run:
 463                     shutil.rmtree(path)
 464                 if self.record:
 465                     if path in self.dirs_created:
 466                         self.dirs_created.remove(path)
 467             else:
 468                 if os.path.islink(path):
 469                     s = 'link'
 470                 else:
 471                     s = 'file'
 472                 logger.debug('Removing %s %s', s, path)
 473                 if not self.dry_run:
 474                     os.remove(path)
 475                 if self.record:
 476                     if path in self.files_written:
 477                         self.files_written.remove(path)
 478
 479     def is_writable(self, path):
 480         result = False
 481         while not result:
 482             if os.path.exists(path):
 483                 result = os.access(path, os.W_OK)
 484                 break
 485             parent = os.path.dirname(path)
 486             if parent == path:
 487                 break
 488             path = parent
 489         return result
 490
 491     def commit(self):
 492         """
 493         Commit recorded changes, turn off recording, return
 494         changes.
 495         """
 496         assert self.record
 497         result = self.files_written, self.dirs_created
 498         self._init_record()
 499         return result
 500
 501     def rollback(self):
 502         if not self.dry_run:
 503             for f in list(self.files_written):
 504                 if os.path.exists(f):
 505                     os.remove(f)
 506             # dirs should all be empty now, except perhaps for
 507             # __pycache__ subdirs
 508             # reverse so that subdirs appear before their parents
 509             dirs = sorted(self.dirs_created, reverse=True)
 510             for d in dirs:
 511                 flist = os.listdir(d)
 512                 if flist:
 513                     assert flist == ['__pycache__']
 514                     sd = os.path.join(d, flist[0])
 515                     os.rmdir(sd)
 516                 os.rmdir(d)     # should fail if non-empty
 517         self._init_record()
 518
 519 def resolve(module_name, dotted_path):
 520     if module_name in sys.modules:
 521         mod = sys.modules[module_name]
 522     else:
 523         mod = __import__(module_name)
 524     if dotted_path is None:
 525         result = mod
 526     else:
 527         parts = dotted_path.split('.')
 528         result = getattr(mod, parts.pop(0))
 529         for p in parts:
 530             result = getattr(result, p)
 531     return result
 532
 533
 534 class ExportEntry(object):
 535     def __init__(self, name, prefix, suffix, flags):
 536         self.name = name
 537         self.prefix = prefix
 538         self.suffix = suffix
 539         self.flags = flags
 540
 541     @cached_property
 542     def value(self):
 543         return resolve(self.prefix, self.suffix)
 544
 545     def __repr__(self):  # pragma: no cover
 546         return '<ExportEntry %s = %s:%s %s>' % (self.name, self.prefix,
 547                                                 self.suffix, self.flags)
 548
 549     def __eq__(self, other):
 550         if not isinstance(other, ExportEntry):
 551             result = False
 552         else:
 553             result = (self.name == other.name and
 554                       self.prefix == other.prefix and
 555                       self.suffix == other.suffix and
 556                       self.flags == other.flags)
 557         return result
 558
 559     __hash__ = object.__hash__
 560
 561
 562 ENTRY_RE = re.compile(r'''(?P<name>(\w|[-.+])+)
 563                       \s*=\s*(?P<callable>(\w+)([:\.]\w+)*)
 564                       \s*(\[\s*(?P<flags>\w+(=\w+)?(,\s*\w+(=\w+)?)*)\s*\])?
 565                       ''', re.VERBOSE)
 566
 567 def get_export_entry(specification):
 568     m = ENTRY_RE.search(specification)
 569     if not m:
 570         result = None
 571         if '[' in specification or ']' in specification:
 572             raise DistlibException("Invalid specification "
 573                                    "'%s'" % specification)
 574     else:
 575         d = m.groupdict()
 576         name = d['name']
 577         path = d['callable']
 578         colons = path.count(':')
 579         if colons == 0:
 580             prefix, suffix = path, None
 581         else:
 582             if colons != 1:
 583                 raise DistlibException("Invalid specification "
 584                                        "'%s'" % specification)
 585             prefix, suffix = path.split(':')
 586         flags = d['flags']
 587         if flags is None:
 588             if '[' in specification or ']' in specification:
 589                 raise DistlibException("Invalid specification "
 590                                        "'%s'" % specification)
 591             flags = []
 592         else:
 593             flags = [f.strip() for f in flags.split(',')]
 594         result = ExportEntry(name, prefix, suffix, flags)
 595     return result
 596
 597
 598 def get_cache_base(suffix=None):
 599     """
 600     Return the default base location for distlib caches. If the directory does
 601     not exist, it is created. Use the suffix provided for the base directory,
 602     and default to '.distlib' if it isn't provided.
 603
 604     On Windows, if LOCALAPPDATA is defined in the environment, then it is
 605     assumed to be a directory, and will be the parent directory of the result.
 606     On POSIX, and on Windows if LOCALAPPDATA is not defined, the user's home
 607     directory - using os.expanduser('~') - will be the parent directory of
 608     the result.
 609
 610     The result is just the directory '.distlib' in the parent directory as
 611     determined above, or with the name specified with ``suffix``.
 612     """
 613     if suffix is None:
 614         suffix = '.distlib'
 615     if os.name == 'nt' and 'LOCALAPPDATA' in os.environ:
 616         result = os.path.expandvars('$localappdata')
 617     else:
 618         # Assume posix, or old Windows
 619         result = os.path.expanduser('~')
 620     # we use 'isdir' instead of 'exists', because we want to
 621     # fail if there's a file with that name
 622     if os.path.isdir(result):
 623         usable = os.access(result, os.W_OK)
 624         if not usable:
 625             logger.warning('Directory exists but is not writable: %s', result)
 626     else:
 627         try:
 628             os.makedirs(result)
 629             usable = True
 630         except OSError:
 631             logger.warning('Unable to create %s', result, exc_info=True)
 632             usable = False
 633     if not usable:
 634         result = tempfile.mkdtemp()
 635         logger.warning('Default location unusable, using %s', result)
 636     return os.path.join(result, suffix)
 637
 638
 639 def path_to_cache_dir(path):
 640     """
 641     Convert an absolute path to a directory name for use in a cache.
 642
 643     The algorithm used is:
 644
 645     #. On Windows, any ``':'`` in the drive is replaced with ``'---'``.
 646     #. Any occurrence of ``os.sep`` is replaced with ``'--'``.
 647     #. ``'.cache'`` is appended.
 648     """
 649     d, p = os.path.splitdrive(os.path.abspath(path))
 650     if d:
 651         d = d.replace(':', '---')
 652     p = p.replace(os.sep, '--')
 653     return d + p + '.cache'
 654
 655
 656 def ensure_slash(s):
 657     if not s.endswith('/'):
 658         return s + '/'
 659     return s
 660
 661
 662 def parse_credentials(netloc):
 663     username = password = None
 664     if '@' in netloc:
 665         prefix, netloc = netloc.split('@', 1)
 666         if ':' not in prefix:
 667             username = prefix
 668         else:
 669             username, password = prefix.split(':', 1)
 670     return username, password, netloc
 671
 672
 673 def get_process_umask():
 674     result = os.umask(0o22)
 675     os.umask(result)
 676     return result
 677
 678 def is_string_sequence(seq):
 679     result = True
 680     i = None
 681     for i, s in enumerate(seq):
 682         if not isinstance(s, string_types):
 683             result = False
 684             break
 685     assert i is not None
 686     return result
 687
 688 PROJECT_NAME_AND_VERSION = re.compile('([a-z0-9_]+([.-][a-z_][a-z0-9_]*)*)-'
 689                                       '([a-z0-9_.+-]+)', re.I)
 690 PYTHON_VERSION = re.compile(r'-py(\d\.?\d?)')
 691
 692
 693 def split_filename(filename, project_name=None):
 694     """
 695     Extract name, version, python version from a filename (no extension)
 696
 697     Return name, version, pyver or None
 698     """
 699     result = None
 700     pyver = None
 701     filename = unquote(filename).replace(' ', '-')
 702     m = PYTHON_VERSION.search(filename)
 703     if m:
 704         pyver = m.group(1)
 705         filename = filename[:m.start()]
 706     if project_name and len(filename) > len(project_name) + 1:
 707         m = re.match(re.escape(project_name) + r'\b', filename)
 708         if m:
 709             n = m.end()
 710             result = filename[:n], filename[n + 1:], pyver
 711     if result is None:
 712         m = PROJECT_NAME_AND_VERSION.match(filename)
 713         if m:
 714             result = m.group(1), m.group(3), pyver
 715     return result
 716
 717 # Allow spaces in name because of legacy dists like "Twisted Core"
 718 NAME_VERSION_RE = re.compile(r'(?P<name>[\w .-]+)\s*'
 719                              r'\(\s*(?P<ver>[^\s)]+)\)$')
 720
 721 def parse_name_and_version(p):
 722     """
 723     A utility method used to get name and version from a string.
 724
 725     From e.g. a Provides-Dist value.
 726
 727     :param p: A value in a form 'foo (1.0)'
 728     :return: The name and version as a tuple.
 729     """
 730     m = NAME_VERSION_RE.match(p)
 731     if not m:
 732         raise DistlibException('Ill-formed name/version string: \'%s\'' % p)
 733     d = m.groupdict()
 734     return d['name'].strip().lower(), d['ver']
 735
 736 def get_extras(requested, available):
 737     result = set()
 738     requested = set(requested or [])
 739     available = set(available or [])
 740     if '*' in requested:
 741         requested.remove('*')
 742         result |= available
 743     for r in requested:
 744         if r == '-':
 745             result.add(r)
 746         elif r.startswith('-'):
 747             unwanted = r[1:]
 748             if unwanted not in available:
 749                 logger.warning('undeclared extra: %s' % unwanted)
 750             if unwanted in result:
 751                 result.remove(unwanted)
 752         else:
 753             if r not in available:
 754                 logger.warning('undeclared extra: %s' % r)
 755             result.add(r)
 756     return result
 757 #
 758 # Extended metadata functionality
 759 #
 760
 761 def _get_external_data(url):
 762     result = {}
 763     try:
 764         # urlopen might fail if it runs into redirections,
 765         # because of Python issue #13696. Fixed in locators
 766         # using a custom redirect handler.
 767         resp = urlopen(url)
 768         headers = resp.info()
 769         ct = headers.get('Content-Type')
 770         if not ct.startswith('application/json'):
 771             logger.debug('Unexpected response for JSON request: %s', ct)
 772         else:
 773             reader = codecs.getreader('utf-8')(resp)
 774             #data = reader.read().decode('utf-8')
 775             #result = json.loads(data)
 776             result = json.load(reader)
 777     except Exception as e:
 778         logger.exception('Failed to get external data for %s: %s', url, e)
 779     return result
 780
 781 _external_data_base_url = 'https://www.red-dove.com/pypi/projects/'
 782
 783 def get_project_data(name):
 784     url = '%s/%s/project.json' % (name[0].upper(), name)
 785     url = urljoin(_external_data_base_url, url)
 786     result = _get_external_data(url)
 787     return result
 788
 789 def get_package_data(name, version):
 790     url = '%s/%s/package-%s.json' % (name[0].upper(), name, version)
 791     url = urljoin(_external_data_base_url, url)
 792     return _get_external_data(url)
 793
 794
 795 class Cache(object):
 796     """
 797     A class implementing a cache for resources that need to live in the file system
 798     e.g. shared libraries. This class was moved from resources to here because it
 799     could be used by other modules, e.g. the wheel module.
 800     """
 801
 802     def __init__(self, base):
 803         """
 804         Initialise an instance.
 805
 806         :param base: The base directory where the cache should be located.
 807         """
 808         # we use 'isdir' instead of 'exists', because we want to
 809         # fail if there's a file with that name
 810         if not os.path.isdir(base):  # pragma: no cover
 811             os.makedirs(base)
 812         if (os.stat(base).st_mode & 0o77) != 0:
 813             logger.warning('Directory \'%s\' is not private', base)
 814         self.base = os.path.abspath(os.path.normpath(base))
 815
 816     def prefix_to_dir(self, prefix):
 817         """
 818         Converts a resource prefix to a directory name in the cache.
 819         """
 820         return path_to_cache_dir(prefix)
 821
 822     def clear(self):
 823         """
 824         Clear the cache.
 825         """
 826         not_removed = []
 827         for fn in os.listdir(self.base):
 828             fn = os.path.join(self.base, fn)
 829             try:
 830                 if os.path.islink(fn) or os.path.isfile(fn):
 831                     os.remove(fn)
 832                 elif os.path.isdir(fn):
 833                     shutil.rmtree(fn)
 834             except Exception:
 835                 not_removed.append(fn)
 836         return not_removed
 837
 838
 839 class EventMixin(object):
 840     """
 841     A very simple publish/subscribe system.
 842     """
 843     def __init__(self):
 844         self._subscribers = {}
 845
 846     def add(self, event, subscriber, append=True):
 847         """
 848         Add a subscriber for an event.
 849
 850         :param event: The name of an event.
 851         :param subscriber: The subscriber to be added (and called when the
 852                            event is published).
 853         :param append: Whether to append or prepend the subscriber to an
 854                        existing subscriber list for the event.
 855         """
 856         subs = self._subscribers
 857         if event not in subs:
 858             subs[event] = deque([subscriber])
 859         else:
 860             sq = subs[event]
 861             if append:
 862                 sq.append(subscriber)
 863             else:
 864                 sq.appendleft(subscriber)
 865
 866     def remove(self, event, subscriber):
 867         """
 868         Remove a subscriber for an event.
 869
 870         :param event: The name of an event.
 871         :param subscriber: The subscriber to be removed.
 872         """
 873         subs = self._subscribers
 874         if event not in subs:
 875             raise ValueError('No subscribers: %r' % event)
 876         subs[event].remove(subscriber)
 877
 878     def get_subscribers(self, event):
 879         """
 880         Return an iterator for the subscribers for an event.
 881         :param event: The event to return subscribers for.
 882         """
 883         return iter(self._subscribers.get(event, ()))
 884
 885     def publish(self, event, *args, **kwargs):
 886         """
 887         Publish a event and return a list of values returned by its
 888         subscribers.
 889
 890         :param event: The event to publish.
 891         :param args: The positional arguments to pass to the event's
 892                      subscribers.
 893         :param kwargs: The keyword arguments to pass to the event's
 894                        subscribers.
 895         """
 896         result = []
 897         for subscriber in self.get_subscribers(event):
 898             try:
 899                 value = subscriber(event, *args, **kwargs)
 900             except Exception:
 901                 logger.exception('Exception during event publication')
 902                 value = None
 903             result.append(value)
 904         logger.debug('publish %s: args = %s, kwargs = %s, result = %s',
 905                      event, args, kwargs, result)
 906         return result
 907
 908 #
 909 # Simple sequencing
 910 #
 911 class Sequencer(object):
 912     def __init__(self):
 913         self._preds = {}
 914         self._succs = {}
 915         self._nodes = set()     # nodes with no preds/succs
 916
 917     def add_node(self, node):
 918         self._nodes.add(node)
 919
 920     def remove_node(self, node, edges=False):
 921         if node in self._nodes:
 922             self._nodes.remove(node)
 923         if edges:
 924             for p in set(self._preds.get(node, ())):
 925                 self.remove(p, node)
 926             for s in set(self._succs.get(node, ())):
 927                 self.remove(node, s)
 928             # Remove empties
 929             for k, v in list(self._preds.items()):
 930                 if not v:
 931                     del self._preds[k]
 932             for k, v in list(self._succs.items()):
 933                 if not v:
 934                     del self._succs[k]
 935
 936     def add(self, pred, succ):
 937         assert pred != succ
 938         self._preds.setdefault(succ, set()).add(pred)
 939         self._succs.setdefault(pred, set()).add(succ)
 940
 941     def remove(self, pred, succ):
 942         assert pred != succ
 943         try:
 944             preds = self._preds[succ]
 945             succs = self._succs[pred]
 946         except KeyError:  # pragma: no cover
 947             raise ValueError('%r not a successor of anything' % succ)
 948         try:
 949             preds.remove(pred)
 950             succs.remove(succ)
 951         except KeyError:  # pragma: no cover
 952             raise ValueError('%r not a successor of %r' % (succ, pred))
 953
 954     def is_step(self, step):
 955         return (step in self._preds or step in self._succs or
 956                 step in self._nodes)
 957
 958     def get_steps(self, final):
 959         if not self.is_step(final):
 960             raise ValueError('Unknown: %r' % final)
 961         result = []
 962         todo = []
 963         seen = set()
 964         todo.append(final)
 965         while todo:
 966             step = todo.pop(0)
 967             if step in seen:
 968                 # if a step was already seen,
 969                 # move it to the end (so it will appear earlier
 970                 # when reversed on return) ... but not for the
 971                 # final step, as that would be confusing for
 972                 # users
 973                 if step != final:
 974                     result.remove(step)
 975                     result.append(step)
 976             else:
 977                 seen.add(step)
 978                 result.append(step)
 979                 preds = self._preds.get(step, ())
 980                 todo.extend(preds)
 981         return reversed(result)
 982
 983     @property
 984     def strong_connections(self):
 985         #http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
 986         index_counter = [0]
 987         stack = []
 988         lowlinks = {}
 989         index = {}
 990         result = []
 991
 992         graph = self._succs
 993
 994         def strongconnect(node):
 995             # set the depth index for this node to the smallest unused index
 996             index[node] = index_counter[0]
 997             lowlinks[node] = index_counter[0]
 998             index_counter[0] += 1
 999             stack.append(node)
1000
1001             # Consider successors
1002             try:
1003                 successors = graph[node]
1004             except Exception:
1005                 successors = []
1006             for successor in successors:
1007                 if successor not in lowlinks:
1008                     # Successor has not yet been visited
1009                     strongconnect(successor)
1010                     lowlinks[node] = min(lowlinks[node],lowlinks[successor])
1011                 elif successor in stack:
1012                     # the successor is in the stack and hence in the current
1013                     # strongly connected component (SCC)
1014                     lowlinks[node] = min(lowlinks[node],index[successor])
1015
1016             # If `node` is a root node, pop the stack and generate an SCC
1017             if lowlinks[node] == index[node]:
1018                 connected_component = []
1019
1020                 while True:
1021                     successor = stack.pop()
1022                     connected_component.append(successor)
1023                     if successor == node: break
1024                 component = tuple(connected_component)
1025                 # storing the result
1026                 result.append(component)
1027
1028         for node in graph:
1029             if node not in lowlinks:
1030                 strongconnect(node)
1031
1032         return result
1033
1034     @property
1035     def dot(self):
1036         result = ['digraph G {']
1037         for succ in self._preds:
1038             preds = self._preds[succ]
1039             for pred in preds:
1040                 result.append('  %s -> %s;' % (pred, succ))
1041         for node in self._nodes:
1042             result.append('  %s;' % node)
1043         result.append('}')
1044         return '\n'.join(result)
1045
1046 #
1047 # Unarchiving functionality for zip, tar, tgz, tbz, whl
1048 #
1049
1050 ARCHIVE_EXTENSIONS = ('.tar.gz', '.tar.bz2', '.tar', '.zip',
1051                       '.tgz', '.tbz', '.whl')
1052
1053 def unarchive(archive_filename, dest_dir, format=None, check=True):
1054
1055     def check_path(path):
1056         if not isinstance(path, text_type):
1057             path = path.decode('utf-8')
1058         p = os.path.abspath(os.path.join(dest_dir, path))
1059         if not p.startswith(dest_dir) or p[plen] != os.sep:
1060             raise ValueError('path outside destination: %r' % p)
1061
1062     dest_dir = os.path.abspath(dest_dir)
1063     plen = len(dest_dir)
1064     archive = None
1065     if format is None:
1066         if archive_filename.endswith(('.zip', '.whl')):
1067             format = 'zip'
1068         elif archive_filename.endswith(('.tar.gz', '.tgz')):
1069             format = 'tgz'
1070             mode = 'r:gz'
1071         elif archive_filename.endswith(('.tar.bz2', '.tbz')):
1072             format = 'tbz'
1073             mode = 'r:bz2'
1074         elif archive_filename.endswith('.tar'):
1075             format = 'tar'
1076             mode = 'r'
1077         else:  # pragma: no cover
1078             raise ValueError('Unknown format for %r' % archive_filename)
1079     try:
1080         if format == 'zip':
1081             archive = ZipFile(archive_filename, 'r')
1082             if check:
1083                 names = archive.namelist()
1084                 for name in names:
1085                     check_path(name)
1086         else:
1087             archive = tarfile.open(archive_filename, mode)
1088             if check:
1089                 names = archive.getnames()
1090                 for name in names:
1091                     check_path(name)
1092         if format != 'zip' and sys.version_info[0] < 3:
1093             # See Python issue 17153. If the dest path contains Unicode,
1094             # tarfile extraction fails on Python 2.x if a member path name
1095             # contains non-ASCII characters - it leads to an implicit
1096             # bytes -> unicode conversion using ASCII to decode.
1097             for tarinfo in archive.getmembers():
1098                 if not isinstance(tarinfo.name, text_type):
1099                     tarinfo.name = tarinfo.name.decode('utf-8')
1100         archive.extractall(dest_dir)
1101
1102     finally:
1103         if archive:
1104             archive.close()
1105
1106
1107 def zip_dir(directory):
1108     """zip a directory tree into a BytesIO object"""
1109     result = io.BytesIO()
1110     dlen = len(directory)
1111     with ZipFile(result, "w") as zf:
1112         for root, dirs, files in os.walk(directory):
1113             for name in files:
1114                 full = os.path.join(root, name)
1115                 rel = root[dlen:]
1116                 dest = os.path.join(rel, name)
1117                 zf.write(full, dest)
1118     return result
1119
1120 #
1121 # Simple progress bar
1122 #
1123
1124 UNITS = ('', 'K', 'M', 'G','T','P')
1125
1126
1127 class Progress(object):
1128     unknown = 'UNKNOWN'
1129
1130     def __init__(self, minval=0, maxval=100):
1131         assert maxval is None or maxval >= minval
1132         self.min = self.cur = minval
1133         self.max = maxval
1134         self.started = None
1135         self.elapsed = 0
1136         self.done = False
1137
1138     def update(self, curval):
1139         assert self.min <= curval
1140         assert self.max is None or curval <= self.max
1141         self.cur = curval
1142         now = time.time()
1143         if self.started is None:
1144             self.started = now
1145         else:
1146             self.elapsed = now - self.started
1147
1148     def increment(self, incr):
1149         assert incr >= 0
1150         self.update(self.cur + incr)
1151
1152     def start(self):
1153         self.update(self.min)
1154         return self
1155
1156     def stop(self):
1157         if self.max is not None:
1158             self.update(self.max)
1159         self.done = True
1160
1161     @property
1162     def maximum(self):
1163         return self.unknown if self.max is None else self.max
1164
1165     @property
1166     def percentage(self):
1167         if self.done:
1168             result = '100 %'
1169         elif self.max is None:
1170             result = ' ?? %'
1171         else:
1172             v = 100.0 * (self.cur - self.min) / (self.max - self.min)
1173             result = '%3d %%' % v
1174         return result
1175
1176     def format_duration(self, duration):
1177         if (duration <= 0) and self.max is None or self.cur == self.min:
1178             result = '??:??:??'
1179         #elif duration < 1:
1180         #    result = '--:--:--'
1181         else:
1182             result = time.strftime('%H:%M:%S', time.gmtime(duration))
1183         return result
1184
1185     @property
1186     def ETA(self):
1187         if self.done:
1188             prefix = 'Done'
1189             t = self.elapsed
1190             #import pdb; pdb.set_trace()
1191         else:
1192             prefix = 'ETA '
1193             if self.max is None:
1194                 t = -1
1195             elif self.elapsed == 0 or (self.cur == self.min):
1196                 t = 0
1197             else:
1198                 #import pdb; pdb.set_trace()
1199                 t = float(self.max - self.min)
1200                 t /= self.cur - self.min
1201                 t = (t - 1) * self.elapsed
1202         return '%s: %s' % (prefix, self.format_duration(t))
1203
1204     @property
1205     def speed(self):
1206         if self.elapsed == 0:
1207             result = 0.0
1208         else:
1209             result = (self.cur - self.min) / self.elapsed
1210         for unit in UNITS:
1211             if result < 1000:
1212                 break
1213             result /= 1000.0
1214         return '%d %sB/s' % (result, unit)
1215
1216 #
1217 # Glob functionality
1218 #
1219
1220 RICH_GLOB = re.compile(r'\{([^}]*)\}')
1221 _CHECK_RECURSIVE_GLOB = re.compile(r'[^/\\,{]\*\*|\*\*[^/\\,}]')
1222 _CHECK_MISMATCH_SET = re.compile(r'^[^{]*\}|\{[^}]*$')
1223
1224
1225 def iglob(path_glob):
1226     """Extended globbing function that supports ** and {opt1,opt2,opt3}."""
1227     if _CHECK_RECURSIVE_GLOB.search(path_glob):
1228         msg = """invalid glob %r: recursive glob "**" must be used alone"""
1229         raise ValueError(msg % path_glob)
1230     if _CHECK_MISMATCH_SET.search(path_glob):
1231         msg = """invalid glob %r: mismatching set marker '{' or '}'"""
1232         raise ValueError(msg % path_glob)
1233     return _iglob(path_glob)
1234
1235
1236 def _iglob(path_glob):
1237     rich_path_glob = RICH_GLOB.split(path_glob, 1)
1238     if len(rich_path_glob) > 1:
1239         assert len(rich_path_glob) == 3, rich_path_glob
1240         prefix, set, suffix = rich_path_glob
1241         for item in set.split(','):
1242             for path in _iglob(''.join((prefix, item, suffix))):
1243                 yield path
1244     else:
1245         if '**' not in path_glob:
1246             for item in std_iglob(path_glob):
1247                 yield item
1248         else:
1249             prefix, radical = path_glob.split('**', 1)
1250             if prefix == '':
1251                 prefix = '.'
1252             if radical == '':
1253                 radical = '*'
1254             else:
1255                 # we support both
1256                 radical = radical.lstrip('/')
1257                 radical = radical.lstrip('\\')
1258             for path, dir, files in os.walk(prefix):
1259                 path = os.path.normpath(path)
1260                 for fn in _iglob(os.path.join(path, radical)):
1261                     yield fn
1262
1263 if ssl:
1264     from .compat import (HTTPSHandler as BaseHTTPSHandler, match_hostname,
1265                          CertificateError)
1266
1267
1268 #
1269 # HTTPSConnection which verifies certificates/matches domains
1270 #
1271
1272     class HTTPSConnection(httplib.HTTPSConnection):
1273         ca_certs = None # set this to the path to the certs file (.pem)
1274         check_domain = True # only used if ca_certs is not None
1275
1276         # noinspection PyPropertyAccess
1277         def connect(self):
1278             sock = socket.create_connection((self.host, self.port), self.timeout)
1279             if getattr(self, '_tunnel_host', False):
1280                 self.sock = sock
1281                 self._tunnel()
1282
1283             if not hasattr(ssl, 'SSLContext'):
1284                 # For 2.x
1285                 if self.ca_certs:
1286                     cert_reqs = ssl.CERT_REQUIRED
1287                 else:
1288                     cert_reqs = ssl.CERT_NONE
1289                 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file,
1290                                             cert_reqs=cert_reqs,
1291                                             ssl_version=ssl.PROTOCOL_SSLv23,
1292                                             ca_certs=self.ca_certs)
1293             else:  # pragma: no cover
1294                 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
1295                 context.options |= ssl.OP_NO_SSLv2
1296                 if self.cert_file:
1297                     context.load_cert_chain(self.cert_file, self.key_file)
1298                 kwargs = {}
1299                 if self.ca_certs:
1300                     context.verify_mode = ssl.CERT_REQUIRED
1301                     context.load_verify_locations(cafile=self.ca_certs)
1302                     if getattr(ssl, 'HAS_SNI', False):
1303                         kwargs['server_hostname'] = self.host
1304                 self.sock = context.wrap_socket(sock, **kwargs)
1305             if self.ca_certs and self.check_domain:
1306                 try:
1307                     match_hostname(self.sock.getpeercert(), self.host)
1308                     logger.debug('Host verified: %s', self.host)
1309                 except CertificateError:  # pragma: no cover
1310                     self.sock.shutdown(socket.SHUT_RDWR)
1311                     self.sock.close()
1312                     raise
1313
1314     class HTTPSHandler(BaseHTTPSHandler):
1315         def __init__(self, ca_certs, check_domain=True):
1316             BaseHTTPSHandler.__init__(self)
1317             self.ca_certs = ca_certs
1318             self.check_domain = check_domain
1319
1320         def _conn_maker(self, *args, **kwargs):
1321             """
1322             This is called to create a connection instance. Normally you'd
1323             pass a connection class to do_open, but it doesn't actually check for
1324             a class, and just expects a callable. As long as we behave just as a
1325             constructor would have, we should be OK. If it ever changes so that
1326             we *must* pass a class, we'll create an UnsafeHTTPSConnection class
1327             which just sets check_domain to False in the class definition, and
1328             choose which one to pass to do_open.
1329             """
1330             result = HTTPSConnection(*args, **kwargs)
1331             if self.ca_certs:
1332                 result.ca_certs = self.ca_certs
1333                 result.check_domain = self.check_domain
1334             return result
1335
1336         def https_open(self, req):
1337             try:
1338                 return self.do_open(self._conn_maker, req)
1339             except URLError as e:
1340                 if 'certificate verify failed' in str(e.reason):
1341                     raise CertificateError('Unable to verify server certificate '
1342                                            'for %s' % req.host)
1343                 else:
1344                     raise
1345
1346     #
1347     # To prevent against mixing HTTP traffic with HTTPS (examples: A Man-In-The-
1348     # Middle proxy using HTTP listens on port 443, or an index mistakenly serves
1349     # HTML containing a http://xyz link when it should be https://xyz),
1350     # you can use the following handler class, which does not allow HTTP traffic.
1351     #
1352     # It works by inheriting from HTTPHandler - so build_opener won't add a
1353     # handler for HTTP itself.
1354     #
1355     class HTTPSOnlyHandler(HTTPSHandler, HTTPHandler):
1356         def http_open(self, req):
1357             raise URLError('Unexpected HTTP request on what should be a secure '
1358                            'connection: %s' % req)
1359
1360 #
1361 # XML-RPC with timeouts
1362 #
1363
1364 _ver_info = sys.version_info[:2]
1365
1366 if _ver_info == (2, 6):
1367     class HTTP(httplib.HTTP):
1368         def __init__(self, host='', port=None, **kwargs):
1369             if port == 0:   # 0 means use port 0, not the default port
1370                 port = None
1371             self._setup(self._connection_class(host, port, **kwargs))
1372
1373
1374     if ssl:
1375         class HTTPS(httplib.HTTPS):
1376             def __init__(self, host='', port=None, **kwargs):
1377                 if port == 0:   # 0 means use port 0, not the default port
1378                     port = None
1379                 self._setup(self._connection_class(host, port, **kwargs))
1380
1381
1382 class Transport(xmlrpclib.Transport):
1383     def __init__(self, timeout, use_datetime=0):
1384         self.timeout = timeout
1385         xmlrpclib.Transport.__init__(self, use_datetime)
1386
1387     def make_connection(self, host):
1388         h, eh, x509 = self.get_host_info(host)
1389         if _ver_info == (2, 6):
1390             result = HTTP(h, timeout=self.timeout)
1391         else:
1392             if not self._connection or host != self._connection[0]:
1393                 self._extra_headers = eh
1394                 self._connection = host, httplib.HTTPConnection(h)
1395             result = self._connection[1]
1396         return result
1397
1398 if ssl:
1399     class SafeTransport(xmlrpclib.SafeTransport):
1400         def __init__(self, timeout, use_datetime=0):
1401             self.timeout = timeout
1402             xmlrpclib.SafeTransport.__init__(self, use_datetime)
1403
1404         def make_connection(self, host):
1405             h, eh, kwargs = self.get_host_info(host)
1406             if not kwargs:
1407                 kwargs = {}
1408             kwargs['timeout'] = self.timeout
1409             if _ver_info == (2, 6):
1410                 result = HTTPS(host, None, **kwargs)
1411             else:
1412                 if not self._connection or host != self._connection[0]:
1413                     self._extra_headers = eh
1414                     self._connection = host, httplib.HTTPSConnection(h, None,
1415                                                                      **kwargs)
1416                 result = self._connection[1]
1417             return result
1418
1419
1420 class ServerProxy(xmlrpclib.ServerProxy):
1421     def __init__(self, uri, **kwargs):
1422         self.timeout = timeout = kwargs.pop('timeout', None)
1423         # The above classes only come into play if a timeout
1424         # is specified
1425         if timeout is not None:
1426             scheme, _ = splittype(uri)
1427             use_datetime = kwargs.get('use_datetime', 0)
1428             if scheme == 'https':
1429                 tcls = SafeTransport
1430             else:
1431                 tcls = Transport
1432             kwargs['transport'] = t = tcls(timeout, use_datetime=use_datetime)
1433             self.transport = t
1434         xmlrpclib.ServerProxy.__init__(self, uri, **kwargs)
1435
1436 #
1437 # CSV functionality. This is provided because on 2.x, the csv module can't
1438 # handle Unicode. However, we need to deal with Unicode in e.g. RECORD files.
1439 #
1440
1441 def _csv_open(fn, mode, **kwargs):
1442     if sys.version_info[0] < 3:
1443         mode += 'b'
1444     else:
1445         kwargs['newline'] = ''
1446     return open(fn, mode, **kwargs)
1447
1448
1449 class CSVBase(object):
1450     defaults = {
1451         'delimiter': str(','),      # The strs are used because we need native
1452         'quotechar': str('"'),      # str in the csv API (2.x won't take
1453         'lineterminator': str('\n') # Unicode)
1454     }
1455
1456     def __enter__(self):
1457         return self
1458
1459     def __exit__(self, *exc_info):
1460         self.stream.close()
1461
1462
1463 class CSVReader(CSVBase):
1464     def __init__(self, **kwargs):
1465         if 'stream' in kwargs:
1466             stream = kwargs['stream']
1467             if sys.version_info[0] >= 3:
1468                 # needs to be a text stream
1469                 stream = codecs.getreader('utf-8')(stream)
1470             self.stream = stream
1471         else:
1472             self.stream = _csv_open(kwargs['path'], 'r')
1473         self.reader = csv.reader(self.stream, **self.defaults)
1474
1475     def __iter__(self):
1476         return self
1477
1478     def next(self):
1479         result = next(self.reader)
1480         if sys.version_info[0] < 3:
1481             for i, item in enumerate(result):
1482                 if not isinstance(item, text_type):
1483                     result[i] = item.decode('utf-8')
1484         return result
1485
1486     __next__ = next
1487
1488 class CSVWriter(CSVBase):
1489     def __init__(self, fn, **kwargs):
1490         self.stream = _csv_open(fn, 'w')
1491         self.writer = csv.writer(self.stream, **self.defaults)
1492
1493     def writerow(self, row):
1494         if sys.version_info[0] < 3:
1495             r = []
1496             for item in row:
1497                 if isinstance(item, text_type):
1498                     item = item.encode('utf-8')
1499                 r.append(item)
1500             row = r
1501         self.writer.writerow(row)
1502
1503 #
1504 #   Configurator functionality
1505 #
1506
1507 class Configurator(BaseConfigurator):
1508
1509     value_converters = dict(BaseConfigurator.value_converters)
1510     value_converters['inc'] = 'inc_convert'
1511
1512     def __init__(self, config, base=None):
1513         super(Configurator, self).__init__(config)
1514         self.base = base or os.getcwd()
1515
1516     def configure_custom(self, config):
1517         def convert(o):
1518             if isinstance(o, (list, tuple)):
1519                 result = type(o)([convert(i) for i in o])
1520             elif isinstance(o, dict):
1521                 if '()' in o:
1522                     result = self.configure_custom(o)
1523                 else:
1524                     result = {}
1525                     for k in o:
1526                         result[k] = convert(o[k])
1527             else:
1528                 result = self.convert(o)
1529             return result
1530
1531         c = config.pop('()')
1532         if not callable(c):
1533             c = self.resolve(c)
1534         props = config.pop('.', None)
1535         # Check for valid identifiers
1536         args = config.pop('[]', ())
1537         if args:
1538             args = tuple([convert(o) for o in args])
1539         items = [(k, convert(config[k])) for k in config if valid_ident(k)]
1540         kwargs = dict(items)
1541         result = c(*args, **kwargs)
1542         if props:
1543             for n, v in props.items():
1544                 setattr(result, n, convert(v))
1545         return result
1546
1547     def __getitem__(self, key):
1548         result = self.config[key]
1549         if isinstance(result, dict) and '()' in result:
1550             self.config[key] = result = self.configure_custom(result)
1551         return result
1552
1553     def inc_convert(self, value):
1554         """Default converter for the inc:// protocol."""
1555         if not os.path.isabs(value):
1556             value = os.path.join(self.base, value)
1557         with codecs.open(value, 'r', encoding='utf-8') as f:
1558             result = json.load(f)
1559         return result
1560
1561 #
1562 # Mixin for running subprocesses and capturing their output
1563 #
1564
1565 class SubprocessMixin(object):
1566     def __init__(self, verbose=False, progress=None):
1567         self.verbose = verbose
1568         self.progress = progress
1569
1570     def reader(self, stream, context):
1571         """
1572         Read lines from a subprocess' output stream and either pass to a progress
1573         callable (if specified) or write progress information to sys.stderr.
1574         """
1575         progress = self.progress
1576         verbose = self.verbose
1577         while True:
1578             s = stream.readline()
1579             if not s:
1580                 break
1581             if progress is not None:
1582                 progress(s, context)
1583             else:
1584                 if not verbose:
1585                     sys.stderr.write('.')
1586                 else:
1587                     sys.stderr.write(s.decode('utf-8'))
1588                 sys.stderr.flush()
1589         stream.close()
1590
1591     def run_command(self, cmd, **kwargs):
1592         p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
1593                              stderr=subprocess.PIPE, **kwargs)
1594         t1 = threading.Thread(target=self.reader, args=(p.stdout, 'stdout'))
1595         t1.start()
1596         t2 = threading.Thread(target=self.reader, args=(p.stderr, 'stderr'))
1597         t2.start()
1598         p.wait()
1599         t1.join()
1600         t2.join()
1601         if self.progress is not None:
1602             self.progress('done.', 'main')
1603         elif self.verbose:
1604             sys.stderr.write('done.\n')
1605         return p
1606
1607
1608 def normalize_name(name):
1609     """Normalize a python package name a la PEP 503"""
1610     # https://www.python.org/dev/peps/pep-0503/#normalized-names
1611     return re.sub('[-_.]+', '-', name).lower()