1 # -*- coding: utf-8 -*-
3 # Copyright (C) 2012-2015 Vinay Sajip.
4 # Licensed to the Python Software Foundation under a contributor agreement.
5 # See LICENSE.txt and CONTRIBUTORS.txt.
17 except ImportError: # pragma: no cover
18 import dummy_threading as threading
21 from . import DistlibException
22 from .compat import (urljoin, urlparse, urlunparse, url2pathname, pathname2url,
23 queue, quote, unescape, string_types, build_opener,
24 HTTPRedirectHandler as BaseRedirectHandler, text_type,
25 Request, HTTPError, URLError)
26 from .database import Distribution, DistributionPath, make_dist
27 from .metadata import Metadata
28 from .util import (cached_property, parse_credentials, ensure_slash,
29 split_filename, get_project_data, parse_requirement,
30 parse_name_and_version, ServerProxy, normalize_name)
31 from .version import get_scheme, UnsupportedVersionError
32 from .wheel import Wheel, is_compatible
34 logger = logging.getLogger(__name__)
36 HASHER_HASH = re.compile('^(\w+)=([a-f0-9]+)')
37 CHARSET = re.compile(r';\s*charset\s*=\s*(.*)\s*$', re.I)
38 HTML_CONTENT_TYPE = re.compile('text/html|application/x(ht)?ml')
39 DEFAULT_INDEX = 'https://pypi.python.org/pypi'
41 def get_all_distribution_names(url=None):
43 Return all distribution names known by an index.
44 :param url: The URL of the index.
45 :return: A list of all known distribution names.
49 client = ServerProxy(url, timeout=3.0)
50 return client.list_packages()
52 class RedirectHandler(BaseRedirectHandler):
54 A class to work around a bug in some Python 3.2.x releases.
56 # There's a bug in the base version for some 3.2.x
57 # (e.g. 3.2.2 on Ubuntu Oneiric). If a Location header
58 # returns e.g. /abc, it bails because it says the scheme ''
59 # is bogus, when actually it should use the request's
60 # URL for the scheme. See Python issue #13696.
61 def http_error_302(self, req, fp, code, msg, headers):
62 # Some servers (incorrectly) return multiple Location headers
63 # (so probably same goes for URI). Use first header.
65 for key in ('location', 'uri'):
71 urlparts = urlparse(newurl)
72 if urlparts.scheme == '':
73 newurl = urljoin(req.get_full_url(), newurl)
74 if hasattr(headers, 'replace_header'):
75 headers.replace_header(key, newurl)
78 return BaseRedirectHandler.http_error_302(self, req, fp, code, msg,
81 http_error_301 = http_error_303 = http_error_307 = http_error_302
83 class Locator(object):
85 A base class for locators - things that locate distributions.
87 source_extensions = ('.tar.gz', '.tar.bz2', '.tar', '.zip', '.tgz', '.tbz')
88 binary_extensions = ('.egg', '.exe', '.whl')
89 excluded_extensions = ('.pdf',)
91 # A list of tags indicating which wheels you want to match. The default
92 # value of None matches against the tags compatible with the running
93 # Python. If you want to match other values, set wheel_tags on a locator
94 # instance to a list of tuples (pyver, abi, arch) which you want to match.
97 downloadable_extensions = source_extensions + ('.whl',)
99 def __init__(self, scheme='default'):
101 Initialise an instance.
102 :param scheme: Because locators look for most recent versions, they
103 need to know the version scheme to use. This specifies
104 the current PEP-recommended scheme - use ``'legacy'``
105 if you need to support existing distributions on PyPI.
109 # Because of bugs in some of the handlers on some of the platforms,
110 # we use our own opener rather than just using urlopen.
111 self.opener = build_opener(RedirectHandler())
112 # If get_project() is called from locate(), the matcher instance
113 # is set from the requirement passed to locate(). See issue #18 for
114 # why this can be useful to know.
116 self.errors = queue.Queue()
118 def get_errors(self):
120 Return any errors which have occurred.
123 while not self.errors.empty(): # pragma: no cover
125 e = self.errors.get(False)
127 except self.errors.Empty:
129 self.errors.task_done()
132 def clear_errors(self):
134 Clear any errors which may have been logged.
136 # Just get the errors and throw them away
139 def clear_cache(self):
142 def _get_scheme(self):
145 def _set_scheme(self, value):
148 scheme = property(_get_scheme, _set_scheme)
150 def _get_project(self, name):
152 For a given project, get a dictionary mapping available versions to Distribution
155 This should be implemented in subclasses.
157 If called from a locate() request, self.matcher will be set to a
158 matcher for the requirement to satisfy, otherwise it will be None.
160 raise NotImplementedError('Please implement in the subclass')
162 def get_distribution_names(self):
164 Return all the distribution names known to this locator.
166 raise NotImplementedError('Please implement in the subclass')
168 def get_project(self, name):
170 For a given project, get a dictionary mapping available versions to Distribution
173 This calls _get_project to do all the work, and just implements a caching layer on top.
175 if self._cache is None:
176 result = self._get_project(name)
177 elif name in self._cache:
178 result = self._cache[name]
181 result = self._get_project(name)
182 self._cache[name] = result
185 def score_url(self, url):
187 Give an url a score which can be used to choose preferred URLs
188 for a given project release.
191 basename = posixpath.basename(t.path)
193 is_wheel = basename.endswith('.whl')
195 compatible = is_compatible(Wheel(basename), self.wheel_tags)
196 return (t.scheme != 'https', 'pypi.python.org' in t.netloc,
197 is_wheel, compatible, basename)
199 def prefer_url(self, url1, url2):
201 Choose one of two URLs where both are candidates for distribution
202 archives for the same version of a distribution (for example,
205 The current implementation favours https:// URLs over http://, archives
206 from PyPI over those from other locations, wheel compatibility (if a
207 wheel) and then the archive name.
211 s1 = self.score_url(url1)
212 s2 = self.score_url(url2)
216 logger.debug('Not replacing %r with %r', url1, url2)
218 logger.debug('Replacing %r with %r', url1, url2)
221 def split_filename(self, filename, project_name):
223 Attempt to split a filename in project name, version and Python version.
225 return split_filename(filename, project_name)
227 def convert_url_to_download_info(self, url, project_name):
229 See if a URL is a candidate for a download URL for a project (the URL
230 has typically been scraped from an HTML page).
232 If it is, a dictionary is returned with keys "name", "version",
233 "filename" and "url"; otherwise, None is returned.
235 def same_project(name1, name2):
236 return normalize_name(name1) == normalize_name(name2)
239 scheme, netloc, path, params, query, frag = urlparse(url)
240 if frag.lower().startswith('egg='):
241 logger.debug('%s: version hint in fragment: %r',
243 m = HASHER_HASH.match(frag)
245 algo, digest = m.groups()
247 algo, digest = None, None
249 if path and path[-1] == '/':
251 if path.endswith('.whl'):
254 if is_compatible(wheel, self.wheel_tags):
255 if project_name is None:
258 include = same_project(wheel.name, project_name)
262 'version': wheel.version,
263 'filename': wheel.filename,
264 'url': urlunparse((scheme, netloc, origpath,
266 'python-version': ', '.join(
267 ['.'.join(list(v[2:])) for v in wheel.pyver]),
269 except Exception as e: # pragma: no cover
270 logger.warning('invalid path for wheel: %s', path)
271 elif path.endswith(self.downloadable_extensions):
272 path = filename = posixpath.basename(path)
273 for ext in self.downloadable_extensions:
274 if path.endswith(ext):
275 path = path[:-len(ext)]
276 t = self.split_filename(path, project_name)
278 logger.debug('No match for project/version: %s', path)
280 name, version, pyver = t
281 if not project_name or same_project(project_name, name):
285 'filename': filename,
286 'url': urlunparse((scheme, netloc, origpath,
288 #'packagetype': 'sdist',
291 result['python-version'] = pyver
294 result['%s_digest' % algo] = digest
297 def _get_digest(self, info):
299 Get a digest from a dictionary by looking at keys of the form
302 Returns a 2-tuple (algo, digest) if found, else None. Currently
303 looks only for SHA256, then MD5.
306 for algo in ('sha256', 'md5'):
307 key = '%s_digest' % algo
309 result = (algo, info[key])
313 def _update_version_data(self, result, info):
315 Update a result dictionary (the final result from _get_project) with a
316 dictionary for a specific version, which typically holds information
317 gleaned from a filename or URL for an archive for the distribution.
319 name = info.pop('name')
320 version = info.pop('version')
321 if version in result:
322 dist = result[version]
325 dist = make_dist(name, version, scheme=self.scheme)
327 dist.digest = digest = self._get_digest(info)
329 result['digests'][url] = digest
330 if md.source_url != info['url']:
331 md.source_url = self.prefer_url(md.source_url, url)
332 result['urls'].setdefault(version, set()).add(url)
334 result[version] = dist
336 def locate(self, requirement, prereleases=False):
338 Find the most recent distribution which matches the given
341 :param requirement: A requirement of the form 'foo (1.0)' or perhaps
342 'foo (>= 1.0, < 2.0, != 1.3)'
343 :param prereleases: If ``True``, allow pre-release versions
344 to be located. Otherwise, pre-release versions
346 :return: A :class:`Distribution` instance, or ``None`` if no such
347 distribution could be located.
350 r = parse_requirement(requirement)
352 raise DistlibException('Not a valid requirement: %r' % requirement)
353 scheme = get_scheme(self.scheme)
354 self.matcher = matcher = scheme.matcher(r.requirement)
355 logger.debug('matcher: %s (%s)', matcher, type(matcher).__name__)
356 versions = self.get_project(r.name)
357 if len(versions) > 2: # urls and digests keys are present
358 # sometimes, versions are invalid
360 vcls = matcher.version_class
362 if k in ('urls', 'digests'):
365 if not matcher.match(k):
366 logger.debug('%s did not match %r', matcher, k)
368 if prereleases or not vcls(k).is_prerelease:
371 logger.debug('skipping pre-release '
372 'version %s of %s', k, matcher.name)
373 except Exception: # pragma: no cover
374 logger.warning('error matching %s with %r', matcher, k)
375 pass # slist.append(k)
377 slist = sorted(slist, key=scheme.key)
379 logger.debug('sorted list: %s', slist)
381 result = versions[version]
384 result.extras = r.extras
385 result.download_urls = versions.get('urls', {}).get(version, set())
387 sd = versions.get('digests', {})
388 for url in result.download_urls:
396 class PyPIRPCLocator(Locator):
398 This locator uses XML-RPC to locate distributions. It therefore
399 cannot be used with simple mirrors (that only mirror file content).
401 def __init__(self, url, **kwargs):
403 Initialise an instance.
405 :param url: The URL to use for XML-RPC.
406 :param kwargs: Passed to the superclass constructor.
408 super(PyPIRPCLocator, self).__init__(**kwargs)
410 self.client = ServerProxy(url, timeout=3.0)
412 def get_distribution_names(self):
414 Return all the distribution names known to this locator.
416 return set(self.client.list_packages())
418 def _get_project(self, name):
419 result = {'urls': {}, 'digests': {}}
420 versions = self.client.package_releases(name, True)
422 urls = self.client.release_urls(name, v)
423 data = self.client.release_data(name, v)
424 metadata = Metadata(scheme=self.scheme)
425 metadata.name = data['name']
426 metadata.version = data['version']
427 metadata.license = data.get('license')
428 metadata.keywords = data.get('keywords', [])
429 metadata.summary = data.get('summary')
430 dist = Distribution(metadata)
433 metadata.source_url = info['url']
434 dist.digest = self._get_digest(info)
439 digest = self._get_digest(info)
440 result['urls'].setdefault(v, set()).add(url)
441 result['digests'][url] = digest
444 class PyPIJSONLocator(Locator):
446 This locator uses PyPI's JSON interface. It's very limited in functionality
447 and probably not worth using.
449 def __init__(self, url, **kwargs):
450 super(PyPIJSONLocator, self).__init__(**kwargs)
451 self.base_url = ensure_slash(url)
453 def get_distribution_names(self):
455 Return all the distribution names known to this locator.
457 raise NotImplementedError('Not available from this locator')
459 def _get_project(self, name):
460 result = {'urls': {}, 'digests': {}}
461 url = urljoin(self.base_url, '%s/json' % quote(name))
463 resp = self.opener.open(url)
464 data = resp.read().decode() # for now
466 md = Metadata(scheme=self.scheme)
468 md.name = data['name']
469 md.version = data['version']
470 md.license = data.get('license')
471 md.keywords = data.get('keywords', [])
472 md.summary = data.get('summary')
473 dist = Distribution(md)
476 result[md.version] = dist
477 for info in d['urls']:
479 dist.download_urls.add(url)
480 dist.digests[url] = self._get_digest(info)
481 result['urls'].setdefault(md.version, set()).add(url)
482 result['digests'][url] = self._get_digest(info)
483 # Now get other releases
484 for version, infos in d['releases'].items():
485 if version == md.version:
486 continue # already done
487 omd = Metadata(scheme=self.scheme)
489 omd.version = version
490 odist = Distribution(omd)
492 result[version] = odist
495 odist.download_urls.add(url)
496 odist.digests[url] = self._get_digest(info)
497 result['urls'].setdefault(version, set()).add(url)
498 result['digests'][url] = self._get_digest(info)
500 # md.source_url = info['url']
501 # dist.digest = self._get_digest(info)
502 # dist.locator = self
505 # result['urls'].setdefault(md.version, set()).add(url)
506 # result['digests'][url] = self._get_digest(info)
507 except Exception as e:
508 self.errors.put(text_type(e))
509 logger.exception('JSON fetch failed: %s', e)
515 This class represents a scraped HTML page.
517 # The following slightly hairy-looking regex just looks for the contents of
518 # an anchor link, which has an attribute "href" either immediately preceded
519 # or immediately followed by a "rel" attribute. The attribute values can be
520 # declared with double quotes, single quotes or no quotes - which leads to
521 # the length of the expression.
522 _href = re.compile("""
523 (rel\s*=\s*(?:"(?P<rel1>[^"]*)"|'(?P<rel2>[^']*)'|(?P<rel3>[^>\s\n]*))\s+)?
524 href\s*=\s*(?:"(?P<url1>[^"]*)"|'(?P<url2>[^']*)'|(?P<url3>[^>\s\n]*))
525 (\s+rel\s*=\s*(?:"(?P<rel4>[^"]*)"|'(?P<rel5>[^']*)'|(?P<rel6>[^>\s\n]*)))?
526 """, re.I | re.S | re.X)
527 _base = re.compile(r"""<base\s+href\s*=\s*['"]?([^'">]+)""", re.I | re.S)
529 def __init__(self, data, url):
531 Initialise an instance with the Unicode page contents and the URL they
535 self.base_url = self.url = url
536 m = self._base.search(self.data)
538 self.base_url = m.group(1)
540 _clean_re = re.compile(r'[^a-z0-9$&+,/:;=?@.#%_\\|-]', re.I)
545 Return the URLs of all the links on a page together with information
546 about their "rel" attribute, for determining which ones to treat as
547 downloads and which ones to queue for further scraping.
551 scheme, netloc, path, params, query, frag = urlparse(url)
552 return urlunparse((scheme, netloc, quote(path),
553 params, query, frag))
556 for match in self._href.finditer(self.data):
557 d = match.groupdict('')
558 rel = (d['rel1'] or d['rel2'] or d['rel3'] or
559 d['rel4'] or d['rel5'] or d['rel6'])
560 url = d['url1'] or d['url2'] or d['url3']
561 url = urljoin(self.base_url, url)
563 url = self._clean_re.sub(lambda m: '%%%2x' % ord(m.group(0)), url)
564 result.add((url, rel))
565 # We sort the result, hoping to bring the most recent versions
567 result = sorted(result, key=lambda t: t[0], reverse=True)
571 class SimpleScrapingLocator(Locator):
573 A locator which scrapes HTML pages to locate downloads for a distribution.
574 This runs multiple threads to do the I/O; performance is at least as good
575 as pip's PackageFinder, which works in an analogous fashion.
578 # These are used to deal with various Content-Encoding schemes.
580 'deflate': zlib.decompress,
581 'gzip': lambda b: gzip.GzipFile(fileobj=BytesIO(d)).read(),
585 def __init__(self, url, timeout=None, num_workers=10, **kwargs):
587 Initialise an instance.
588 :param url: The root URL to use for scraping.
589 :param timeout: The timeout, in seconds, to be applied to requests.
590 This defaults to ``None`` (no timeout specified).
591 :param num_workers: The number of worker threads you want to do I/O,
593 :param kwargs: Passed to the superclass.
595 super(SimpleScrapingLocator, self).__init__(**kwargs)
596 self.base_url = ensure_slash(url)
597 self.timeout = timeout
598 self._page_cache = {}
600 self._to_fetch = queue.Queue()
601 self._bad_hosts = set()
602 self.skip_externals = False
603 self.num_workers = num_workers
604 self._lock = threading.RLock()
605 # See issue #45: we need to be resilient when the locator is used
606 # in a thread, e.g. with concurrent.futures. We can't use self._lock
607 # as it is for coordinating our internal threads - the ones created
608 # in _prepare_threads.
609 self._gplock = threading.RLock()
611 def _prepare_threads(self):
613 Threads are created only when get_project is called, and terminate
614 before it returns. They are there primarily to parallelise I/O (i.e.
618 for i in range(self.num_workers):
619 t = threading.Thread(target=self._fetch)
622 self._threads.append(t)
624 def _wait_threads(self):
626 Tell all the threads to terminate (by sending a sentinel value) and
627 wait for them to do so.
629 # Note that you need two loops, since you can't say which
630 # thread will get each sentinel
631 for t in self._threads:
632 self._to_fetch.put(None) # sentinel
633 for t in self._threads:
637 def _get_project(self, name):
638 result = {'urls': {}, 'digests': {}}
641 self.project_name = name
642 url = urljoin(self.base_url, '%s/' % quote(name))
644 self._page_cache.clear()
645 self._prepare_threads()
647 logger.debug('Queueing %s', url)
648 self._to_fetch.put(url)
649 self._to_fetch.join()
655 platform_dependent = re.compile(r'\b(linux-(i\d86|x86_64|arm\w+)|'
656 r'win(32|-amd64)|macosx-?\d+)\b', re.I)
658 def _is_platform_dependent(self, url):
660 Does an URL refer to a platform-specific download?
662 return self.platform_dependent.search(url)
664 def _process_download(self, url):
666 See if an URL is a suitable download for a project.
668 If it is, register information in the result dictionary (for
669 _get_project) about the specific version it's for.
671 Note that the return value isn't actually used other than as a boolean
674 if self._is_platform_dependent(url):
677 info = self.convert_url_to_download_info(url, self.project_name)
678 logger.debug('process_download: %s -> %s', url, info)
680 with self._lock: # needed because self.result is shared
681 self._update_version_data(self.result, info)
684 def _should_queue(self, link, referrer, rel):
686 Determine whether a link URL from a referring page and with a
687 particular "rel" attribute should be queued for scraping.
689 scheme, netloc, path, _, _, _ = urlparse(link)
690 if path.endswith(self.source_extensions + self.binary_extensions +
691 self.excluded_extensions):
693 elif self.skip_externals and not link.startswith(self.base_url):
695 elif not referrer.startswith(self.base_url):
697 elif rel not in ('homepage', 'download'):
699 elif scheme not in ('http', 'https', 'ftp'):
701 elif self._is_platform_dependent(link):
704 host = netloc.split(':', 1)[0]
705 if host.lower() == 'localhost':
709 logger.debug('should_queue: %s (%s) from %s -> %s', link, rel,
715 Get a URL to fetch from the work queue, get the HTML page, examine its
716 links for download candidates and candidates for further scraping.
718 This is a handy method to run in a thread.
721 url = self._to_fetch.get()
724 page = self.get_page(url)
725 if page is None: # e.g. after an error
727 for link, rel in page.links:
728 if link not in self._seen:
730 if (not self._process_download(link) and
731 self._should_queue(link, url, rel)):
732 logger.debug('Queueing %s from %s', link, url)
733 self._to_fetch.put(link)
734 except Exception as e: # pragma: no cover
735 self.errors.put(text_type(e))
737 # always do this, to avoid hangs :-)
738 self._to_fetch.task_done()
740 #logger.debug('Sentinel seen, quitting.')
743 def get_page(self, url):
745 Get the HTML for an URL, possibly from an in-memory cache.
747 XXX TODO Note: this cache is never actually cleared. It's assumed that
748 the data won't get stale over the lifetime of a locator instance (not
749 necessarily true for the default_locator).
751 # http://peak.telecommunity.com/DevCenter/EasyInstall#package-index-api
752 scheme, netloc, path, _, _, _ = urlparse(url)
753 if scheme == 'file' and os.path.isdir(url2pathname(path)):
754 url = urljoin(ensure_slash(url), 'index.html')
756 if url in self._page_cache:
757 result = self._page_cache[url]
758 logger.debug('Returning %s from cache: %s', url, result)
760 host = netloc.split(':', 1)[0]
762 if host in self._bad_hosts:
763 logger.debug('Skipping %s due to bad host %s', url, host)
765 req = Request(url, headers={'Accept-encoding': 'identity'})
767 logger.debug('Fetching %s', url)
768 resp = self.opener.open(req, timeout=self.timeout)
769 logger.debug('Fetched %s', url)
770 headers = resp.info()
771 content_type = headers.get('Content-Type', '')
772 if HTML_CONTENT_TYPE.match(content_type):
773 final_url = resp.geturl()
775 encoding = headers.get('Content-Encoding')
777 decoder = self.decoders[encoding] # fail if not found
780 m = CHARSET.search(content_type)
782 encoding = m.group(1)
784 data = data.decode(encoding)
785 except UnicodeError: # pragma: no cover
786 data = data.decode('latin-1') # fallback
787 result = Page(data, final_url)
788 self._page_cache[final_url] = result
789 except HTTPError as e:
791 logger.exception('Fetch failed: %s: %s', url, e)
792 except URLError as e: # pragma: no cover
793 logger.exception('Fetch failed: %s: %s', url, e)
795 self._bad_hosts.add(host)
796 except Exception as e: # pragma: no cover
797 logger.exception('Fetch failed: %s: %s', url, e)
799 self._page_cache[url] = result # even if None (failure)
802 _distname_re = re.compile('<a href=[^>]*>([^<]+)<')
804 def get_distribution_names(self):
806 Return all the distribution names known to this locator.
809 page = self.get_page(self.base_url)
811 raise DistlibException('Unable to get %s' % self.base_url)
812 for match in self._distname_re.finditer(page.data):
813 result.add(match.group(1))
816 class DirectoryLocator(Locator):
818 This class locates distributions in a directory tree.
821 def __init__(self, path, **kwargs):
823 Initialise an instance.
824 :param path: The root of the directory tree to search.
825 :param kwargs: Passed to the superclass constructor,
827 * recursive - if True (the default), subdirectories are
828 recursed into. If False, only the top-level directory
831 self.recursive = kwargs.pop('recursive', True)
832 super(DirectoryLocator, self).__init__(**kwargs)
833 path = os.path.abspath(path)
834 if not os.path.isdir(path): # pragma: no cover
835 raise DistlibException('Not a directory: %r' % path)
838 def should_include(self, filename, parent):
840 Should a filename be considered as a candidate for a distribution
841 archive? As well as the filename, the directory which contains it
842 is provided, though not used by the current implementation.
844 return filename.endswith(self.downloadable_extensions)
846 def _get_project(self, name):
847 result = {'urls': {}, 'digests': {}}
848 for root, dirs, files in os.walk(self.base_dir):
850 if self.should_include(fn, root):
851 fn = os.path.join(root, fn)
852 url = urlunparse(('file', '',
853 pathname2url(os.path.abspath(fn)),
855 info = self.convert_url_to_download_info(url, name)
857 self._update_version_data(result, info)
858 if not self.recursive:
862 def get_distribution_names(self):
864 Return all the distribution names known to this locator.
867 for root, dirs, files in os.walk(self.base_dir):
869 if self.should_include(fn, root):
870 fn = os.path.join(root, fn)
871 url = urlunparse(('file', '',
872 pathname2url(os.path.abspath(fn)),
874 info = self.convert_url_to_download_info(url, None)
876 result.add(info['name'])
877 if not self.recursive:
881 class JSONLocator(Locator):
883 This locator uses special extended metadata (not available on PyPI) and is
884 the basis of performant dependency resolution in distlib. Other locators
885 require archive downloads before dependencies can be determined! As you
886 might imagine, that can be slow.
888 def get_distribution_names(self):
890 Return all the distribution names known to this locator.
892 raise NotImplementedError('Not available from this locator')
894 def _get_project(self, name):
895 result = {'urls': {}, 'digests': {}}
896 data = get_project_data(name)
898 for info in data.get('files', []):
899 if info['ptype'] != 'sdist' or info['pyversion'] != 'source':
901 # We don't store summary in project metadata as it makes
902 # the data bigger for no benefit during dependency
904 dist = make_dist(data['name'], info['version'],
905 summary=data.get('summary',
906 'Placeholder for summary'),
909 md.source_url = info['url']
911 if 'digest' in info and info['digest']:
912 dist.digest = ('md5', info['digest'])
913 md.dependencies = info.get('requirements', {})
914 dist.exports = info.get('exports', {})
915 result[dist.version] = dist
916 result['urls'].setdefault(dist.version, set()).add(info['url'])
919 class DistPathLocator(Locator):
921 This locator finds installed distributions in a path. It can be useful for
922 adding to an :class:`AggregatingLocator`.
924 def __init__(self, distpath, **kwargs):
926 Initialise an instance.
928 :param distpath: A :class:`DistributionPath` instance to search.
930 super(DistPathLocator, self).__init__(**kwargs)
931 assert isinstance(distpath, DistributionPath)
932 self.distpath = distpath
934 def _get_project(self, name):
935 dist = self.distpath.get_distribution(name)
937 result = {'urls': {}, 'digests': {}}
941 'urls': {dist.version: set([dist.source_url])},
942 'digests': {dist.version: set([None])}
947 class AggregatingLocator(Locator):
949 This class allows you to chain and/or merge a list of locators.
951 def __init__(self, *locators, **kwargs):
953 Initialise an instance.
955 :param locators: The list of locators to search.
956 :param kwargs: Passed to the superclass constructor,
958 * merge - if False (the default), the first successful
959 search from any of the locators is returned. If True,
960 the results from all locators are merged (this can be
963 self.merge = kwargs.pop('merge', False)
964 self.locators = locators
965 super(AggregatingLocator, self).__init__(**kwargs)
967 def clear_cache(self):
968 super(AggregatingLocator, self).clear_cache()
969 for locator in self.locators:
970 locator.clear_cache()
972 def _set_scheme(self, value):
974 for locator in self.locators:
975 locator.scheme = value
977 scheme = property(Locator.scheme.fget, _set_scheme)
979 def _get_project(self, name):
981 for locator in self.locators:
982 d = locator.get_project(name)
985 files = result.get('urls', {})
986 digests = result.get('digests', {})
987 # next line could overwrite result['urls'], result['digests']
989 df = result.get('urls')
991 for k, v in files.items():
996 dd = result.get('digests')
1000 # See issue #18. If any dists are found and we're looking
1001 # for specific constraints, we only return something if
1002 # a match is found. For example, if a DirectoryLocator
1003 # returns just foo (1.0) while we're looking for
1004 # foo (>= 2.0), we'll pretend there was nothing there so
1005 # that subsequent locators can be queried. Otherwise we
1006 # would just return foo (1.0) which would then lead to a
1007 # failure to find foo (>= 2.0), because other locators
1008 # weren't searched. Note that this only matters when
1010 if self.matcher is None:
1015 if self.matcher.match(k):
1023 def get_distribution_names(self):
1025 Return all the distribution names known to this locator.
1028 for locator in self.locators:
1030 result |= locator.get_distribution_names()
1031 except NotImplementedError:
1036 # We use a legacy scheme simply because most of the dists on PyPI use legacy
1037 # versions which don't conform to PEP 426 / PEP 440.
1038 default_locator = AggregatingLocator(
1040 SimpleScrapingLocator('https://pypi.python.org/simple/',
1044 locate = default_locator.locate
1046 NAME_VERSION_RE = re.compile(r'(?P<name>[\w-]+)\s*'
1047 r'\(\s*(==\s*)?(?P<ver>[^)]+)\)$')
1049 class DependencyFinder(object):
1051 Locate dependencies for distributions.
1054 def __init__(self, locator=None):
1056 Initialise an instance, using the specified locator
1057 to locate distributions.
1059 self.locator = locator or default_locator
1060 self.scheme = get_scheme(self.locator.scheme)
1062 def add_distribution(self, dist):
1064 Add a distribution to the finder. This will update internal information
1065 about who provides what.
1066 :param dist: The distribution to add.
1068 logger.debug('adding distribution %s', dist)
1070 self.dists_by_name[name] = dist
1071 self.dists[(name, dist.version)] = dist
1072 for p in dist.provides:
1073 name, version = parse_name_and_version(p)
1074 logger.debug('Add to provided: %s, %s, %s', name, version, dist)
1075 self.provided.setdefault(name, set()).add((version, dist))
1077 def remove_distribution(self, dist):
1079 Remove a distribution from the finder. This will update internal
1080 information about who provides what.
1081 :param dist: The distribution to remove.
1083 logger.debug('removing distribution %s', dist)
1085 del self.dists_by_name[name]
1086 del self.dists[(name, dist.version)]
1087 for p in dist.provides:
1088 name, version = parse_name_and_version(p)
1089 logger.debug('Remove from provided: %s, %s, %s', name, version, dist)
1090 s = self.provided[name]
1091 s.remove((version, dist))
1093 del self.provided[name]
1095 def get_matcher(self, reqt):
1097 Get a version matcher for a requirement.
1098 :param reqt: The requirement
1100 :return: A version matcher (an instance of
1101 :class:`distlib.version.Matcher`).
1104 matcher = self.scheme.matcher(reqt)
1105 except UnsupportedVersionError: # pragma: no cover
1106 # XXX compat-mode if cannot read the version
1107 name = reqt.split()[0]
1108 matcher = self.scheme.matcher(name)
1111 def find_providers(self, reqt):
1113 Find the distributions which can fulfill a requirement.
1115 :param reqt: The requirement.
1117 :return: A set of distribution which can fulfill the requirement.
1119 matcher = self.get_matcher(reqt)
1120 name = matcher.key # case-insensitive
1122 provided = self.provided
1123 if name in provided:
1124 for version, provider in provided[name]:
1126 match = matcher.match(version)
1127 except UnsupportedVersionError:
1131 result.add(provider)
1135 def try_to_replace(self, provider, other, problems):
1137 Attempt to replace one provider with another. This is typically used
1138 when resolving dependencies from multiple sources, e.g. A requires
1139 (B >= 1.0) while C requires (B >= 1.1).
1141 For successful replacement, ``provider`` must meet all the requirements
1142 which ``other`` fulfills.
1144 :param provider: The provider we are trying to replace with.
1145 :param other: The provider we're trying to replace.
1146 :param problems: If False is returned, this will contain what
1147 problems prevented replacement. This is currently
1148 a tuple of the literal string 'cantreplace',
1149 ``provider``, ``other`` and the set of requirements
1150 that ``provider`` couldn't fulfill.
1151 :return: True if we can replace ``other`` with ``provider``, else
1154 rlist = self.reqts[other]
1157 matcher = self.get_matcher(s)
1158 if not matcher.match(provider.version):
1161 # can't replace other with provider
1162 problems.add(('cantreplace', provider, other,
1163 frozenset(unmatched)))
1166 # can replace other with provider
1167 self.remove_distribution(other)
1168 del self.reqts[other]
1170 self.reqts.setdefault(provider, set()).add(s)
1171 self.add_distribution(provider)
1175 def find(self, requirement, meta_extras=None, prereleases=False):
1177 Find a distribution and all distributions it depends on.
1179 :param requirement: The requirement specifying the distribution to
1180 find, or a Distribution instance.
1181 :param meta_extras: A list of meta extras such as :test:, :build: and
1183 :param prereleases: If ``True``, allow pre-release versions to be
1184 returned - otherwise, don't return prereleases
1185 unless they're all that's available.
1187 Return a set of :class:`Distribution` instances and a set of
1190 The distributions returned should be such that they have the
1191 :attr:`required` attribute set to ``True`` if they were
1192 from the ``requirement`` passed to ``find()``, and they have the
1193 :attr:`build_time_dependency` attribute set to ``True`` unless they
1194 are post-installation dependencies of the ``requirement``.
1196 The problems should be a tuple consisting of the string
1197 ``'unsatisfied'`` and the requirement which couldn't be satisfied
1198 by any distribution known to the locator.
1203 self.dists_by_name = {}
1206 meta_extras = set(meta_extras or [])
1207 if ':*:' in meta_extras:
1208 meta_extras.remove(':*:')
1209 # :meta: and :run: are implicitly included
1210 meta_extras |= set([':test:', ':build:', ':dev:'])
1212 if isinstance(requirement, Distribution):
1213 dist = odist = requirement
1214 logger.debug('passed %s as requirement', odist)
1216 dist = odist = self.locator.locate(requirement,
1217 prereleases=prereleases)
1219 raise DistlibException('Unable to locate %r' % requirement)
1220 logger.debug('located %s', odist)
1221 dist.requested = True
1224 install_dists = set([odist])
1227 name = dist.key # case-insensitive
1228 if name not in self.dists_by_name:
1229 self.add_distribution(dist)
1231 #import pdb; pdb.set_trace()
1232 other = self.dists_by_name[name]
1234 self.try_to_replace(dist, other, problems)
1236 ireqts = dist.run_requires | dist.meta_requires
1237 sreqts = dist.build_requires
1239 if dist in install_dists:
1240 for key in ('test', 'build', 'dev'):
1242 if e in meta_extras:
1243 ereqts |= getattr(dist, '%s_requires' % key)
1244 all_reqts = ireqts | sreqts | ereqts
1246 providers = self.find_providers(r)
1248 logger.debug('No providers found for %r', r)
1249 provider = self.locator.locate(r, prereleases=prereleases)
1250 # If no provider is found and we didn't consider
1251 # prereleases, consider them now.
1252 if provider is None and not prereleases:
1253 provider = self.locator.locate(r, prereleases=True)
1254 if provider is None:
1255 logger.debug('Cannot satisfy %r', r)
1256 problems.add(('unsatisfied', r))
1258 n, v = provider.key, provider.version
1259 if (n, v) not in self.dists:
1261 providers.add(provider)
1262 if r in ireqts and dist in install_dists:
1263 install_dists.add(provider)
1264 logger.debug('Adding %s to install_dists',
1265 provider.name_and_version)
1268 if name not in self.dists_by_name:
1269 self.reqts.setdefault(p, set()).add(r)
1271 other = self.dists_by_name[name]
1273 # see if other can be replaced by p
1274 self.try_to_replace(p, other, problems)
1276 dists = set(self.dists.values())
1278 dist.build_time_dependency = dist not in install_dists
1279 if dist.build_time_dependency:
1280 logger.debug('%s is a build-time dependency only.',
1281 dist.name_and_version)
1282 logger.debug('find done for %s', odist)
1283 return dists, problems