2 # Copyright (C) 2012-2016 The Python Software Foundation.
3 # See LICENSE.txt and CONTRIBUTORS.txt.
6 from collections import deque
9 from glob import iglob as std_iglob
20 except ImportError: # pragma: no cover
30 except ImportError: # pragma: no cover
31 import dummy_threading as threading
34 from . import DistlibException
35 from .compat import (string_types, text_type, shutil, raw_input, StringIO,
36 cache_from_source, urlopen, urljoin, httplib, xmlrpclib,
37 splittype, HTTPHandler, BaseConfigurator, valid_ident,
38 Container, configparser, URLError, ZipFile, fsdecode,
41 logger = logging.getLogger(__name__)
44 # Requirement parsing code for name + optional constraints + optional extras
46 # e.g. 'foo >= 1.2, < 2.0 [bar, baz]'
48 # The regex can seem a bit hairy, so we build it up out of smaller pieces
49 # which are manageable.
53 COMMA_RE = re.compile(COMMA)
56 EXTRA_IDENT = r'(\*|:(\*|\w+):|' + IDENT + ')'
57 VERSPEC = IDENT + r'\*?'
59 RELOP = '([<>=!~]=)|[<>]'
62 # The first relop is optional - if absent, will be taken as '~='
64 BARE_CONSTRAINTS = ('(' + RELOP + r')?\s*(' + VERSPEC + ')(' + COMMA + '(' +
65 RELOP + r')\s*(' + VERSPEC + '))*')
67 DIRECT_REF = '(from\s+(?P<diref>.*))'
70 # Either the bare constraints or the bare constraints in parentheses
72 CONSTRAINTS = (r'\(\s*(?P<c1>' + BARE_CONSTRAINTS + '|' + DIRECT_REF +
73 r')\s*\)|(?P<c2>' + BARE_CONSTRAINTS + '\s*)')
75 EXTRA_LIST = EXTRA_IDENT + '(' + COMMA + EXTRA_IDENT + ')*'
76 EXTRAS = r'\[\s*(?P<ex>' + EXTRA_LIST + r')?\s*\]'
77 REQUIREMENT = ('(?P<dn>' + IDENT + r')\s*(' + EXTRAS + r'\s*)?(\s*' +
79 REQUIREMENT_RE = re.compile(REQUIREMENT)
82 # Used to scan through the constraints
84 RELOP_IDENT = '(?P<op>' + RELOP + r')\s*(?P<vn>' + VERSPEC + ')'
85 RELOP_IDENT_RE = re.compile(RELOP_IDENT)
87 def parse_requirement(s):
89 def get_constraint(m):
91 return d['op'], d['vn']
94 m = REQUIREMENT_RE.match(s)
98 cons = d['c1'] or d['c2']
104 url = d['diref'].strip()
110 if cons[0] not in '<>!=':
112 iterator = RELOP_IDENT_RE.finditer(cons)
113 cons = [get_constraint(m) for m in iterator]
114 rs = '%s (%s)' % (name, ', '.join(['%s %s' % con for con in cons]))
118 extras = COMMA_RE.split(d['ex'])
119 result = Container(name=name, constraints=cons, extras=extras,
120 requirement=rs, source=s, url=url)
124 def get_resources_dests(resources_root, rules):
125 """Find destinations for resources files"""
127 def get_rel_path(base, path):
128 # normalizes and returns a lstripped-/-separated path
129 base = base.replace(os.path.sep, '/')
130 path = path.replace(os.path.sep, '/')
131 assert path.startswith(base)
132 return path[len(base):].lstrip('/')
136 for base, suffix, dest in rules:
137 prefix = os.path.join(resources_root, base)
138 for abs_base in iglob(prefix):
139 abs_glob = os.path.join(abs_base, suffix)
140 for abs_path in iglob(abs_glob):
141 resource_file = get_rel_path(resources_root, abs_path)
142 if dest is None: # remove the entry if it was here
143 destinations.pop(resource_file, None)
145 rel_path = get_rel_path(abs_base, abs_path)
146 rel_dest = dest.replace(os.path.sep, '/').rstrip('/')
147 destinations[resource_file] = rel_dest + '/' + rel_path
152 if hasattr(sys, 'real_prefix'):
157 result = sys.prefix != getattr(sys, 'base_prefix', sys.prefix)
161 def get_executable():
162 # The __PYVENV_LAUNCHER__ dance is apparently no longer needed, as
163 # changes to the stub launcher mean that sys.executable always points
164 # to the stub on macOS
165 # if sys.platform == 'darwin' and ('__PYVENV_LAUNCHER__'
167 # result = os.environ['__PYVENV_LAUNCHER__']
169 # result = sys.executable
171 result = os.path.normcase(sys.executable)
172 if not isinstance(result, text_type):
173 result = fsdecode(result)
177 def proceed(prompt, allowed_chars, error_prompt=None, default=None):
182 if not s and default:
186 if c in allowed_chars:
189 p = '%c: %s\n%s' % (c, error_prompt, prompt)
193 def extract_by_key(d, keys):
194 if isinstance(keys, string_types):
202 def read_exports(stream):
203 if sys.version_info[0] >= 3:
204 # needs to be a text stream
205 stream = codecs.getreader('utf-8')(stream)
206 # Try to load as JSON, falling back on legacy format
208 stream = StringIO(data)
210 jdata = json.load(stream)
211 result = jdata['extensions']['python.exports']['exports']
212 for group, entries in result.items():
213 for k, v in entries.items():
214 s = '%s = %s' % (k, v)
215 entry = get_export_entry(s)
216 assert entry is not None
222 def read_stream(cp, stream):
223 if hasattr(cp, 'read_file'):
228 cp = configparser.ConfigParser()
230 read_stream(cp, stream)
231 except configparser.MissingSectionHeaderError:
233 data = textwrap.dedent(data)
234 stream = StringIO(data)
235 read_stream(cp, stream)
238 for key in cp.sections():
239 result[key] = entries = {}
240 for name, value in cp.items(key):
241 s = '%s = %s' % (name, value)
242 entry = get_export_entry(s)
243 assert entry is not None
245 entries[name] = entry
249 def write_exports(exports, stream):
250 if sys.version_info[0] >= 3:
251 # needs to be a text stream
252 stream = codecs.getwriter('utf-8')(stream)
253 cp = configparser.ConfigParser()
254 for k, v in exports.items():
255 # TODO check k, v for valid values
257 for entry in v.values():
258 if entry.suffix is None:
261 s = '%s:%s' % (entry.prefix, entry.suffix)
263 s = '%s [%s]' % (s, ', '.join(entry.flags))
264 cp.set(k, entry.name, s)
268 @contextlib.contextmanager
270 td = tempfile.mkdtemp()
276 @contextlib.contextmanager
286 @contextlib.contextmanager
287 def socket_timeout(seconds=15):
288 cto = socket.getdefaulttimeout()
290 socket.setdefaulttimeout(seconds)
293 socket.setdefaulttimeout(cto)
296 class cached_property(object):
297 def __init__(self, func):
299 #for attr in ('__name__', '__module__', '__doc__'):
300 # setattr(self, attr, getattr(func, attr, None))
302 def __get__(self, obj, cls=None):
305 value = self.func(obj)
306 object.__setattr__(obj, self.func.__name__, value)
307 #obj.__dict__[self.func.__name__] = value = self.func(obj)
310 def convert_path(pathname):
311 """Return 'pathname' as a name that will work on the native filesystem.
313 The path is split on '/' and put back together again using the current
314 directory separator. Needed because filenames in the setup script are
315 always supplied in Unix style, and have to be converted to the local
316 convention before we can actually use them in the filesystem. Raises
317 ValueError on non-Unix-ish systems if 'pathname' either starts or
324 if pathname[0] == '/':
325 raise ValueError("path '%s' cannot be absolute" % pathname)
326 if pathname[-1] == '/':
327 raise ValueError("path '%s' cannot end with '/'" % pathname)
329 paths = pathname.split('/')
330 while os.curdir in paths:
331 paths.remove(os.curdir)
334 return os.path.join(*paths)
337 class FileOperator(object):
338 def __init__(self, dry_run=False):
339 self.dry_run = dry_run
343 def _init_record(self):
345 self.files_written = set()
346 self.dirs_created = set()
348 def record_as_written(self, path):
350 self.files_written.add(path)
352 def newer(self, source, target):
353 """Tell if the target is newer than the source.
355 Returns true if 'source' exists and is more recently modified than
356 'target', or if 'source' exists and 'target' doesn't.
358 Returns false if both exist and 'target' is the same age or younger
359 than 'source'. Raise PackagingFileError if 'source' does not exist.
361 Note that this test is not very accurate: files created in the same
362 second will have the same "age".
364 if not os.path.exists(source):
365 raise DistlibException("file '%r' does not exist" %
366 os.path.abspath(source))
367 if not os.path.exists(target):
370 return os.stat(source).st_mtime > os.stat(target).st_mtime
372 def copy_file(self, infile, outfile, check=True):
373 """Copy a file respecting dry-run and force flags.
375 self.ensure_dir(os.path.dirname(outfile))
376 logger.info('Copying %s to %s', infile, outfile)
380 if os.path.islink(outfile):
381 msg = '%s is a symlink' % outfile
382 elif os.path.exists(outfile) and not os.path.isfile(outfile):
383 msg = '%s is a non-regular file' % outfile
385 raise ValueError(msg + ' which would be overwritten')
386 shutil.copyfile(infile, outfile)
387 self.record_as_written(outfile)
389 def copy_stream(self, instream, outfile, encoding=None):
390 assert not os.path.isdir(outfile)
391 self.ensure_dir(os.path.dirname(outfile))
392 logger.info('Copying stream %s to %s', instream, outfile)
395 outstream = open(outfile, 'wb')
397 outstream = codecs.open(outfile, 'w', encoding=encoding)
399 shutil.copyfileobj(instream, outstream)
402 self.record_as_written(outfile)
404 def write_binary_file(self, path, data):
405 self.ensure_dir(os.path.dirname(path))
407 with open(path, 'wb') as f:
409 self.record_as_written(path)
411 def write_text_file(self, path, data, encoding):
412 self.ensure_dir(os.path.dirname(path))
414 with open(path, 'wb') as f:
415 f.write(data.encode(encoding))
416 self.record_as_written(path)
418 def set_mode(self, bits, mask, files):
419 if os.name == 'posix' or (os.name == 'java' and os._name == 'posix'):
420 # Set the executable bits (owner, group, and world) on
421 # all the files specified.
424 logger.info("changing mode of %s", f)
426 mode = (os.stat(f).st_mode | bits) & mask
427 logger.info("changing mode of %s to %o", f, mode)
430 set_executable_mode = lambda s, f: s.set_mode(0o555, 0o7777, f)
432 def ensure_dir(self, path):
433 path = os.path.abspath(path)
434 if path not in self.ensured and not os.path.exists(path):
435 self.ensured.add(path)
436 d, f = os.path.split(path)
438 logger.info('Creating %s' % path)
442 self.dirs_created.add(path)
444 def byte_compile(self, path, optimize=False, force=False, prefix=None):
445 dpath = cache_from_source(path, not optimize)
446 logger.info('Byte-compiling %s to %s', path, dpath)
448 if force or self.newer(path, dpath):
452 assert path.startswith(prefix)
453 diagpath = path[len(prefix):]
454 py_compile.compile(path, dpath, diagpath, True) # raise error
455 self.record_as_written(dpath)
458 def ensure_removed(self, path):
459 if os.path.exists(path):
460 if os.path.isdir(path) and not os.path.islink(path):
461 logger.debug('Removing directory tree at %s', path)
465 if path in self.dirs_created:
466 self.dirs_created.remove(path)
468 if os.path.islink(path):
472 logger.debug('Removing %s %s', s, path)
476 if path in self.files_written:
477 self.files_written.remove(path)
479 def is_writable(self, path):
482 if os.path.exists(path):
483 result = os.access(path, os.W_OK)
485 parent = os.path.dirname(path)
493 Commit recorded changes, turn off recording, return
497 result = self.files_written, self.dirs_created
503 for f in list(self.files_written):
504 if os.path.exists(f):
506 # dirs should all be empty now, except perhaps for
507 # __pycache__ subdirs
508 # reverse so that subdirs appear before their parents
509 dirs = sorted(self.dirs_created, reverse=True)
511 flist = os.listdir(d)
513 assert flist == ['__pycache__']
514 sd = os.path.join(d, flist[0])
516 os.rmdir(d) # should fail if non-empty
519 def resolve(module_name, dotted_path):
520 if module_name in sys.modules:
521 mod = sys.modules[module_name]
523 mod = __import__(module_name)
524 if dotted_path is None:
527 parts = dotted_path.split('.')
528 result = getattr(mod, parts.pop(0))
530 result = getattr(result, p)
534 class ExportEntry(object):
535 def __init__(self, name, prefix, suffix, flags):
543 return resolve(self.prefix, self.suffix)
545 def __repr__(self): # pragma: no cover
546 return '<ExportEntry %s = %s:%s %s>' % (self.name, self.prefix,
547 self.suffix, self.flags)
549 def __eq__(self, other):
550 if not isinstance(other, ExportEntry):
553 result = (self.name == other.name and
554 self.prefix == other.prefix and
555 self.suffix == other.suffix and
556 self.flags == other.flags)
559 __hash__ = object.__hash__
562 ENTRY_RE = re.compile(r'''(?P<name>(\w|[-.+])+)
563 \s*=\s*(?P<callable>(\w+)([:\.]\w+)*)
564 \s*(\[\s*(?P<flags>\w+(=\w+)?(,\s*\w+(=\w+)?)*)\s*\])?
567 def get_export_entry(specification):
568 m = ENTRY_RE.search(specification)
571 if '[' in specification or ']' in specification:
572 raise DistlibException("Invalid specification "
573 "'%s'" % specification)
578 colons = path.count(':')
580 prefix, suffix = path, None
583 raise DistlibException("Invalid specification "
584 "'%s'" % specification)
585 prefix, suffix = path.split(':')
588 if '[' in specification or ']' in specification:
589 raise DistlibException("Invalid specification "
590 "'%s'" % specification)
593 flags = [f.strip() for f in flags.split(',')]
594 result = ExportEntry(name, prefix, suffix, flags)
598 def get_cache_base(suffix=None):
600 Return the default base location for distlib caches. If the directory does
601 not exist, it is created. Use the suffix provided for the base directory,
602 and default to '.distlib' if it isn't provided.
604 On Windows, if LOCALAPPDATA is defined in the environment, then it is
605 assumed to be a directory, and will be the parent directory of the result.
606 On POSIX, and on Windows if LOCALAPPDATA is not defined, the user's home
607 directory - using os.expanduser('~') - will be the parent directory of
610 The result is just the directory '.distlib' in the parent directory as
611 determined above, or with the name specified with ``suffix``.
615 if os.name == 'nt' and 'LOCALAPPDATA' in os.environ:
616 result = os.path.expandvars('$localappdata')
618 # Assume posix, or old Windows
619 result = os.path.expanduser('~')
620 # we use 'isdir' instead of 'exists', because we want to
621 # fail if there's a file with that name
622 if os.path.isdir(result):
623 usable = os.access(result, os.W_OK)
625 logger.warning('Directory exists but is not writable: %s', result)
631 logger.warning('Unable to create %s', result, exc_info=True)
634 result = tempfile.mkdtemp()
635 logger.warning('Default location unusable, using %s', result)
636 return os.path.join(result, suffix)
639 def path_to_cache_dir(path):
641 Convert an absolute path to a directory name for use in a cache.
643 The algorithm used is:
645 #. On Windows, any ``':'`` in the drive is replaced with ``'---'``.
646 #. Any occurrence of ``os.sep`` is replaced with ``'--'``.
647 #. ``'.cache'`` is appended.
649 d, p = os.path.splitdrive(os.path.abspath(path))
651 d = d.replace(':', '---')
652 p = p.replace(os.sep, '--')
653 return d + p + '.cache'
657 if not s.endswith('/'):
662 def parse_credentials(netloc):
663 username = password = None
665 prefix, netloc = netloc.split('@', 1)
666 if ':' not in prefix:
669 username, password = prefix.split(':', 1)
670 return username, password, netloc
673 def get_process_umask():
674 result = os.umask(0o22)
678 def is_string_sequence(seq):
681 for i, s in enumerate(seq):
682 if not isinstance(s, string_types):
688 PROJECT_NAME_AND_VERSION = re.compile('([a-z0-9_]+([.-][a-z_][a-z0-9_]*)*)-'
689 '([a-z0-9_.+-]+)', re.I)
690 PYTHON_VERSION = re.compile(r'-py(\d\.?\d?)')
693 def split_filename(filename, project_name=None):
695 Extract name, version, python version from a filename (no extension)
697 Return name, version, pyver or None
701 filename = unquote(filename).replace(' ', '-')
702 m = PYTHON_VERSION.search(filename)
705 filename = filename[:m.start()]
706 if project_name and len(filename) > len(project_name) + 1:
707 m = re.match(re.escape(project_name) + r'\b', filename)
710 result = filename[:n], filename[n + 1:], pyver
712 m = PROJECT_NAME_AND_VERSION.match(filename)
714 result = m.group(1), m.group(3), pyver
717 # Allow spaces in name because of legacy dists like "Twisted Core"
718 NAME_VERSION_RE = re.compile(r'(?P<name>[\w .-]+)\s*'
719 r'\(\s*(?P<ver>[^\s)]+)\)$')
721 def parse_name_and_version(p):
723 A utility method used to get name and version from a string.
725 From e.g. a Provides-Dist value.
727 :param p: A value in a form 'foo (1.0)'
728 :return: The name and version as a tuple.
730 m = NAME_VERSION_RE.match(p)
732 raise DistlibException('Ill-formed name/version string: \'%s\'' % p)
734 return d['name'].strip().lower(), d['ver']
736 def get_extras(requested, available):
738 requested = set(requested or [])
739 available = set(available or [])
741 requested.remove('*')
746 elif r.startswith('-'):
748 if unwanted not in available:
749 logger.warning('undeclared extra: %s' % unwanted)
750 if unwanted in result:
751 result.remove(unwanted)
753 if r not in available:
754 logger.warning('undeclared extra: %s' % r)
758 # Extended metadata functionality
761 def _get_external_data(url):
764 # urlopen might fail if it runs into redirections,
765 # because of Python issue #13696. Fixed in locators
766 # using a custom redirect handler.
768 headers = resp.info()
769 ct = headers.get('Content-Type')
770 if not ct.startswith('application/json'):
771 logger.debug('Unexpected response for JSON request: %s', ct)
773 reader = codecs.getreader('utf-8')(resp)
774 #data = reader.read().decode('utf-8')
775 #result = json.loads(data)
776 result = json.load(reader)
777 except Exception as e:
778 logger.exception('Failed to get external data for %s: %s', url, e)
781 _external_data_base_url = 'https://www.red-dove.com/pypi/projects/'
783 def get_project_data(name):
784 url = '%s/%s/project.json' % (name[0].upper(), name)
785 url = urljoin(_external_data_base_url, url)
786 result = _get_external_data(url)
789 def get_package_data(name, version):
790 url = '%s/%s/package-%s.json' % (name[0].upper(), name, version)
791 url = urljoin(_external_data_base_url, url)
792 return _get_external_data(url)
797 A class implementing a cache for resources that need to live in the file system
798 e.g. shared libraries. This class was moved from resources to here because it
799 could be used by other modules, e.g. the wheel module.
802 def __init__(self, base):
804 Initialise an instance.
806 :param base: The base directory where the cache should be located.
808 # we use 'isdir' instead of 'exists', because we want to
809 # fail if there's a file with that name
810 if not os.path.isdir(base): # pragma: no cover
812 if (os.stat(base).st_mode & 0o77) != 0:
813 logger.warning('Directory \'%s\' is not private', base)
814 self.base = os.path.abspath(os.path.normpath(base))
816 def prefix_to_dir(self, prefix):
818 Converts a resource prefix to a directory name in the cache.
820 return path_to_cache_dir(prefix)
827 for fn in os.listdir(self.base):
828 fn = os.path.join(self.base, fn)
830 if os.path.islink(fn) or os.path.isfile(fn):
832 elif os.path.isdir(fn):
835 not_removed.append(fn)
839 class EventMixin(object):
841 A very simple publish/subscribe system.
844 self._subscribers = {}
846 def add(self, event, subscriber, append=True):
848 Add a subscriber for an event.
850 :param event: The name of an event.
851 :param subscriber: The subscriber to be added (and called when the
853 :param append: Whether to append or prepend the subscriber to an
854 existing subscriber list for the event.
856 subs = self._subscribers
857 if event not in subs:
858 subs[event] = deque([subscriber])
862 sq.append(subscriber)
864 sq.appendleft(subscriber)
866 def remove(self, event, subscriber):
868 Remove a subscriber for an event.
870 :param event: The name of an event.
871 :param subscriber: The subscriber to be removed.
873 subs = self._subscribers
874 if event not in subs:
875 raise ValueError('No subscribers: %r' % event)
876 subs[event].remove(subscriber)
878 def get_subscribers(self, event):
880 Return an iterator for the subscribers for an event.
881 :param event: The event to return subscribers for.
883 return iter(self._subscribers.get(event, ()))
885 def publish(self, event, *args, **kwargs):
887 Publish a event and return a list of values returned by its
890 :param event: The event to publish.
891 :param args: The positional arguments to pass to the event's
893 :param kwargs: The keyword arguments to pass to the event's
897 for subscriber in self.get_subscribers(event):
899 value = subscriber(event, *args, **kwargs)
901 logger.exception('Exception during event publication')
904 logger.debug('publish %s: args = %s, kwargs = %s, result = %s',
905 event, args, kwargs, result)
911 class Sequencer(object):
915 self._nodes = set() # nodes with no preds/succs
917 def add_node(self, node):
918 self._nodes.add(node)
920 def remove_node(self, node, edges=False):
921 if node in self._nodes:
922 self._nodes.remove(node)
924 for p in set(self._preds.get(node, ())):
926 for s in set(self._succs.get(node, ())):
929 for k, v in list(self._preds.items()):
932 for k, v in list(self._succs.items()):
936 def add(self, pred, succ):
938 self._preds.setdefault(succ, set()).add(pred)
939 self._succs.setdefault(pred, set()).add(succ)
941 def remove(self, pred, succ):
944 preds = self._preds[succ]
945 succs = self._succs[pred]
946 except KeyError: # pragma: no cover
947 raise ValueError('%r not a successor of anything' % succ)
951 except KeyError: # pragma: no cover
952 raise ValueError('%r not a successor of %r' % (succ, pred))
954 def is_step(self, step):
955 return (step in self._preds or step in self._succs or
958 def get_steps(self, final):
959 if not self.is_step(final):
960 raise ValueError('Unknown: %r' % final)
968 # if a step was already seen,
969 # move it to the end (so it will appear earlier
970 # when reversed on return) ... but not for the
971 # final step, as that would be confusing for
979 preds = self._preds.get(step, ())
981 return reversed(result)
984 def strong_connections(self):
985 #http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
994 def strongconnect(node):
995 # set the depth index for this node to the smallest unused index
996 index[node] = index_counter[0]
997 lowlinks[node] = index_counter[0]
998 index_counter[0] += 1
1001 # Consider successors
1003 successors = graph[node]
1006 for successor in successors:
1007 if successor not in lowlinks:
1008 # Successor has not yet been visited
1009 strongconnect(successor)
1010 lowlinks[node] = min(lowlinks[node],lowlinks[successor])
1011 elif successor in stack:
1012 # the successor is in the stack and hence in the current
1013 # strongly connected component (SCC)
1014 lowlinks[node] = min(lowlinks[node],index[successor])
1016 # If `node` is a root node, pop the stack and generate an SCC
1017 if lowlinks[node] == index[node]:
1018 connected_component = []
1021 successor = stack.pop()
1022 connected_component.append(successor)
1023 if successor == node: break
1024 component = tuple(connected_component)
1025 # storing the result
1026 result.append(component)
1029 if node not in lowlinks:
1036 result = ['digraph G {']
1037 for succ in self._preds:
1038 preds = self._preds[succ]
1040 result.append(' %s -> %s;' % (pred, succ))
1041 for node in self._nodes:
1042 result.append(' %s;' % node)
1044 return '\n'.join(result)
1047 # Unarchiving functionality for zip, tar, tgz, tbz, whl
1050 ARCHIVE_EXTENSIONS = ('.tar.gz', '.tar.bz2', '.tar', '.zip',
1051 '.tgz', '.tbz', '.whl')
1053 def unarchive(archive_filename, dest_dir, format=None, check=True):
1055 def check_path(path):
1056 if not isinstance(path, text_type):
1057 path = path.decode('utf-8')
1058 p = os.path.abspath(os.path.join(dest_dir, path))
1059 if not p.startswith(dest_dir) or p[plen] != os.sep:
1060 raise ValueError('path outside destination: %r' % p)
1062 dest_dir = os.path.abspath(dest_dir)
1063 plen = len(dest_dir)
1066 if archive_filename.endswith(('.zip', '.whl')):
1068 elif archive_filename.endswith(('.tar.gz', '.tgz')):
1071 elif archive_filename.endswith(('.tar.bz2', '.tbz')):
1074 elif archive_filename.endswith('.tar'):
1077 else: # pragma: no cover
1078 raise ValueError('Unknown format for %r' % archive_filename)
1081 archive = ZipFile(archive_filename, 'r')
1083 names = archive.namelist()
1087 archive = tarfile.open(archive_filename, mode)
1089 names = archive.getnames()
1092 if format != 'zip' and sys.version_info[0] < 3:
1093 # See Python issue 17153. If the dest path contains Unicode,
1094 # tarfile extraction fails on Python 2.x if a member path name
1095 # contains non-ASCII characters - it leads to an implicit
1096 # bytes -> unicode conversion using ASCII to decode.
1097 for tarinfo in archive.getmembers():
1098 if not isinstance(tarinfo.name, text_type):
1099 tarinfo.name = tarinfo.name.decode('utf-8')
1100 archive.extractall(dest_dir)
1107 def zip_dir(directory):
1108 """zip a directory tree into a BytesIO object"""
1109 result = io.BytesIO()
1110 dlen = len(directory)
1111 with ZipFile(result, "w") as zf:
1112 for root, dirs, files in os.walk(directory):
1114 full = os.path.join(root, name)
1116 dest = os.path.join(rel, name)
1117 zf.write(full, dest)
1121 # Simple progress bar
1124 UNITS = ('', 'K', 'M', 'G','T','P')
1127 class Progress(object):
1130 def __init__(self, minval=0, maxval=100):
1131 assert maxval is None or maxval >= minval
1132 self.min = self.cur = minval
1138 def update(self, curval):
1139 assert self.min <= curval
1140 assert self.max is None or curval <= self.max
1143 if self.started is None:
1146 self.elapsed = now - self.started
1148 def increment(self, incr):
1150 self.update(self.cur + incr)
1153 self.update(self.min)
1157 if self.max is not None:
1158 self.update(self.max)
1163 return self.unknown if self.max is None else self.max
1166 def percentage(self):
1169 elif self.max is None:
1172 v = 100.0 * (self.cur - self.min) / (self.max - self.min)
1173 result = '%3d %%' % v
1176 def format_duration(self, duration):
1177 if (duration <= 0) and self.max is None or self.cur == self.min:
1180 # result = '--:--:--'
1182 result = time.strftime('%H:%M:%S', time.gmtime(duration))
1190 #import pdb; pdb.set_trace()
1193 if self.max is None:
1195 elif self.elapsed == 0 or (self.cur == self.min):
1198 #import pdb; pdb.set_trace()
1199 t = float(self.max - self.min)
1200 t /= self.cur - self.min
1201 t = (t - 1) * self.elapsed
1202 return '%s: %s' % (prefix, self.format_duration(t))
1206 if self.elapsed == 0:
1209 result = (self.cur - self.min) / self.elapsed
1214 return '%d %sB/s' % (result, unit)
1217 # Glob functionality
1220 RICH_GLOB = re.compile(r'\{([^}]*)\}')
1221 _CHECK_RECURSIVE_GLOB = re.compile(r'[^/\\,{]\*\*|\*\*[^/\\,}]')
1222 _CHECK_MISMATCH_SET = re.compile(r'^[^{]*\}|\{[^}]*$')
1225 def iglob(path_glob):
1226 """Extended globbing function that supports ** and {opt1,opt2,opt3}."""
1227 if _CHECK_RECURSIVE_GLOB.search(path_glob):
1228 msg = """invalid glob %r: recursive glob "**" must be used alone"""
1229 raise ValueError(msg % path_glob)
1230 if _CHECK_MISMATCH_SET.search(path_glob):
1231 msg = """invalid glob %r: mismatching set marker '{' or '}'"""
1232 raise ValueError(msg % path_glob)
1233 return _iglob(path_glob)
1236 def _iglob(path_glob):
1237 rich_path_glob = RICH_GLOB.split(path_glob, 1)
1238 if len(rich_path_glob) > 1:
1239 assert len(rich_path_glob) == 3, rich_path_glob
1240 prefix, set, suffix = rich_path_glob
1241 for item in set.split(','):
1242 for path in _iglob(''.join((prefix, item, suffix))):
1245 if '**' not in path_glob:
1246 for item in std_iglob(path_glob):
1249 prefix, radical = path_glob.split('**', 1)
1256 radical = radical.lstrip('/')
1257 radical = radical.lstrip('\\')
1258 for path, dir, files in os.walk(prefix):
1259 path = os.path.normpath(path)
1260 for fn in _iglob(os.path.join(path, radical)):
1264 from .compat import (HTTPSHandler as BaseHTTPSHandler, match_hostname,
1269 # HTTPSConnection which verifies certificates/matches domains
1272 class HTTPSConnection(httplib.HTTPSConnection):
1273 ca_certs = None # set this to the path to the certs file (.pem)
1274 check_domain = True # only used if ca_certs is not None
1276 # noinspection PyPropertyAccess
1278 sock = socket.create_connection((self.host, self.port), self.timeout)
1279 if getattr(self, '_tunnel_host', False):
1283 if not hasattr(ssl, 'SSLContext'):
1286 cert_reqs = ssl.CERT_REQUIRED
1288 cert_reqs = ssl.CERT_NONE
1289 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file,
1290 cert_reqs=cert_reqs,
1291 ssl_version=ssl.PROTOCOL_SSLv23,
1292 ca_certs=self.ca_certs)
1293 else: # pragma: no cover
1294 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
1295 context.options |= ssl.OP_NO_SSLv2
1297 context.load_cert_chain(self.cert_file, self.key_file)
1300 context.verify_mode = ssl.CERT_REQUIRED
1301 context.load_verify_locations(cafile=self.ca_certs)
1302 if getattr(ssl, 'HAS_SNI', False):
1303 kwargs['server_hostname'] = self.host
1304 self.sock = context.wrap_socket(sock, **kwargs)
1305 if self.ca_certs and self.check_domain:
1307 match_hostname(self.sock.getpeercert(), self.host)
1308 logger.debug('Host verified: %s', self.host)
1309 except CertificateError: # pragma: no cover
1310 self.sock.shutdown(socket.SHUT_RDWR)
1314 class HTTPSHandler(BaseHTTPSHandler):
1315 def __init__(self, ca_certs, check_domain=True):
1316 BaseHTTPSHandler.__init__(self)
1317 self.ca_certs = ca_certs
1318 self.check_domain = check_domain
1320 def _conn_maker(self, *args, **kwargs):
1322 This is called to create a connection instance. Normally you'd
1323 pass a connection class to do_open, but it doesn't actually check for
1324 a class, and just expects a callable. As long as we behave just as a
1325 constructor would have, we should be OK. If it ever changes so that
1326 we *must* pass a class, we'll create an UnsafeHTTPSConnection class
1327 which just sets check_domain to False in the class definition, and
1328 choose which one to pass to do_open.
1330 result = HTTPSConnection(*args, **kwargs)
1332 result.ca_certs = self.ca_certs
1333 result.check_domain = self.check_domain
1336 def https_open(self, req):
1338 return self.do_open(self._conn_maker, req)
1339 except URLError as e:
1340 if 'certificate verify failed' in str(e.reason):
1341 raise CertificateError('Unable to verify server certificate '
1342 'for %s' % req.host)
1347 # To prevent against mixing HTTP traffic with HTTPS (examples: A Man-In-The-
1348 # Middle proxy using HTTP listens on port 443, or an index mistakenly serves
1349 # HTML containing a http://xyz link when it should be https://xyz),
1350 # you can use the following handler class, which does not allow HTTP traffic.
1352 # It works by inheriting from HTTPHandler - so build_opener won't add a
1353 # handler for HTTP itself.
1355 class HTTPSOnlyHandler(HTTPSHandler, HTTPHandler):
1356 def http_open(self, req):
1357 raise URLError('Unexpected HTTP request on what should be a secure '
1358 'connection: %s' % req)
1361 # XML-RPC with timeouts
1364 _ver_info = sys.version_info[:2]
1366 if _ver_info == (2, 6):
1367 class HTTP(httplib.HTTP):
1368 def __init__(self, host='', port=None, **kwargs):
1369 if port == 0: # 0 means use port 0, not the default port
1371 self._setup(self._connection_class(host, port, **kwargs))
1375 class HTTPS(httplib.HTTPS):
1376 def __init__(self, host='', port=None, **kwargs):
1377 if port == 0: # 0 means use port 0, not the default port
1379 self._setup(self._connection_class(host, port, **kwargs))
1382 class Transport(xmlrpclib.Transport):
1383 def __init__(self, timeout, use_datetime=0):
1384 self.timeout = timeout
1385 xmlrpclib.Transport.__init__(self, use_datetime)
1387 def make_connection(self, host):
1388 h, eh, x509 = self.get_host_info(host)
1389 if _ver_info == (2, 6):
1390 result = HTTP(h, timeout=self.timeout)
1392 if not self._connection or host != self._connection[0]:
1393 self._extra_headers = eh
1394 self._connection = host, httplib.HTTPConnection(h)
1395 result = self._connection[1]
1399 class SafeTransport(xmlrpclib.SafeTransport):
1400 def __init__(self, timeout, use_datetime=0):
1401 self.timeout = timeout
1402 xmlrpclib.SafeTransport.__init__(self, use_datetime)
1404 def make_connection(self, host):
1405 h, eh, kwargs = self.get_host_info(host)
1408 kwargs['timeout'] = self.timeout
1409 if _ver_info == (2, 6):
1410 result = HTTPS(host, None, **kwargs)
1412 if not self._connection or host != self._connection[0]:
1413 self._extra_headers = eh
1414 self._connection = host, httplib.HTTPSConnection(h, None,
1416 result = self._connection[1]
1420 class ServerProxy(xmlrpclib.ServerProxy):
1421 def __init__(self, uri, **kwargs):
1422 self.timeout = timeout = kwargs.pop('timeout', None)
1423 # The above classes only come into play if a timeout
1425 if timeout is not None:
1426 scheme, _ = splittype(uri)
1427 use_datetime = kwargs.get('use_datetime', 0)
1428 if scheme == 'https':
1429 tcls = SafeTransport
1432 kwargs['transport'] = t = tcls(timeout, use_datetime=use_datetime)
1434 xmlrpclib.ServerProxy.__init__(self, uri, **kwargs)
1437 # CSV functionality. This is provided because on 2.x, the csv module can't
1438 # handle Unicode. However, we need to deal with Unicode in e.g. RECORD files.
1441 def _csv_open(fn, mode, **kwargs):
1442 if sys.version_info[0] < 3:
1445 kwargs['newline'] = ''
1446 return open(fn, mode, **kwargs)
1449 class CSVBase(object):
1451 'delimiter': str(','), # The strs are used because we need native
1452 'quotechar': str('"'), # str in the csv API (2.x won't take
1453 'lineterminator': str('\n') # Unicode)
1456 def __enter__(self):
1459 def __exit__(self, *exc_info):
1463 class CSVReader(CSVBase):
1464 def __init__(self, **kwargs):
1465 if 'stream' in kwargs:
1466 stream = kwargs['stream']
1467 if sys.version_info[0] >= 3:
1468 # needs to be a text stream
1469 stream = codecs.getreader('utf-8')(stream)
1470 self.stream = stream
1472 self.stream = _csv_open(kwargs['path'], 'r')
1473 self.reader = csv.reader(self.stream, **self.defaults)
1479 result = next(self.reader)
1480 if sys.version_info[0] < 3:
1481 for i, item in enumerate(result):
1482 if not isinstance(item, text_type):
1483 result[i] = item.decode('utf-8')
1488 class CSVWriter(CSVBase):
1489 def __init__(self, fn, **kwargs):
1490 self.stream = _csv_open(fn, 'w')
1491 self.writer = csv.writer(self.stream, **self.defaults)
1493 def writerow(self, row):
1494 if sys.version_info[0] < 3:
1497 if isinstance(item, text_type):
1498 item = item.encode('utf-8')
1501 self.writer.writerow(row)
1504 # Configurator functionality
1507 class Configurator(BaseConfigurator):
1509 value_converters = dict(BaseConfigurator.value_converters)
1510 value_converters['inc'] = 'inc_convert'
1512 def __init__(self, config, base=None):
1513 super(Configurator, self).__init__(config)
1514 self.base = base or os.getcwd()
1516 def configure_custom(self, config):
1518 if isinstance(o, (list, tuple)):
1519 result = type(o)([convert(i) for i in o])
1520 elif isinstance(o, dict):
1522 result = self.configure_custom(o)
1526 result[k] = convert(o[k])
1528 result = self.convert(o)
1531 c = config.pop('()')
1534 props = config.pop('.', None)
1535 # Check for valid identifiers
1536 args = config.pop('[]', ())
1538 args = tuple([convert(o) for o in args])
1539 items = [(k, convert(config[k])) for k in config if valid_ident(k)]
1540 kwargs = dict(items)
1541 result = c(*args, **kwargs)
1543 for n, v in props.items():
1544 setattr(result, n, convert(v))
1547 def __getitem__(self, key):
1548 result = self.config[key]
1549 if isinstance(result, dict) and '()' in result:
1550 self.config[key] = result = self.configure_custom(result)
1553 def inc_convert(self, value):
1554 """Default converter for the inc:// protocol."""
1555 if not os.path.isabs(value):
1556 value = os.path.join(self.base, value)
1557 with codecs.open(value, 'r', encoding='utf-8') as f:
1558 result = json.load(f)
1562 # Mixin for running subprocesses and capturing their output
1565 class SubprocessMixin(object):
1566 def __init__(self, verbose=False, progress=None):
1567 self.verbose = verbose
1568 self.progress = progress
1570 def reader(self, stream, context):
1572 Read lines from a subprocess' output stream and either pass to a progress
1573 callable (if specified) or write progress information to sys.stderr.
1575 progress = self.progress
1576 verbose = self.verbose
1578 s = stream.readline()
1581 if progress is not None:
1582 progress(s, context)
1585 sys.stderr.write('.')
1587 sys.stderr.write(s.decode('utf-8'))
1591 def run_command(self, cmd, **kwargs):
1592 p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
1593 stderr=subprocess.PIPE, **kwargs)
1594 t1 = threading.Thread(target=self.reader, args=(p.stdout, 'stdout'))
1596 t2 = threading.Thread(target=self.reader, args=(p.stderr, 'stderr'))
1601 if self.progress is not None:
1602 self.progress('done.', 'main')
1604 sys.stderr.write('done.\n')
1608 def normalize_name(name):
1609 """Normalize a python package name a la PEP 503"""
1610 # https://www.python.org/dev/peps/pep-0503/#normalized-names
1611 return re.sub('[-_.]+', '-', name).lower()