1 """Open an arbitrary URL.
\r
3 See the following document for more info on URLs:
\r
4 "Names and Addresses, URIs, URLs, URNs, URCs", at
\r
5 http://www.w3.org/pub/WWW/Addressing/Overview.html
\r
7 See also the HTTP spec (from which the error codes are derived):
\r
8 "HTTP - Hypertext Transfer Protocol", at
\r
9 http://www.w3.org/pub/WWW/Protocols/
\r
11 Related standards and specs:
\r
12 - RFC1808: the "relative URL" spec. (authoritative status)
\r
13 - RFC1738 - the "URL standard". (authoritative status)
\r
14 - RFC1630 - the "URI spec". (informational status)
\r
16 The object returned by URLopener().open(file) will differ per
\r
17 protocol. All you know is that is has methods read(), readline(),
\r
18 readlines(), fileno(), close() and info(). The read*(), fileno()
\r
19 and close() methods work like those of open files.
\r
20 The info() method returns a mimetools.Message object which can be
\r
21 used to query various info about the object, if available.
\r
22 (mimetools.Message objects are queried with the getheader() method.)
\r
30 from urlparse import urljoin as basejoin
\r
33 __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
\r
34 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
\r
35 "urlencode", "url2pathname", "pathname2url", "splittag",
\r
36 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
\r
37 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
\r
38 "splitnport", "splitquery", "splitattr", "splitvalue",
\r
41 __version__ = '1.17' # XXX This version is not always updated :-(
\r
43 MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
\r
45 # Helper for non-unix systems
\r
46 if os.name == 'mac':
\r
47 from macurl2path import url2pathname, pathname2url
\r
48 elif os.name == 'nt':
\r
49 from nturl2path import url2pathname, pathname2url
\r
50 elif os.name == 'riscos':
\r
51 from rourl2path import url2pathname, pathname2url
\r
53 def url2pathname(pathname):
\r
54 """OS-specific conversion from a relative URL of the 'file' scheme
\r
55 to a file system path; not recommended for general use."""
\r
56 return unquote(pathname)
\r
58 def pathname2url(pathname):
\r
59 """OS-specific conversion from a file system path to a relative URL
\r
60 of the 'file' scheme; not recommended for general use."""
\r
61 return quote(pathname)
\r
63 # This really consists of two pieces:
\r
64 # (1) a class which handles opening of all sorts of URLs
\r
65 # (plus assorted utilities etc.)
\r
66 # (2) a set of functions for parsing URLs
\r
67 # XXX Should these be separated out into different modules?
\r
70 # Shortcut for basic usage
\r
72 def urlopen(url, data=None, proxies=None):
\r
73 """Create a file-like object for the specified URL to read from."""
\r
74 from warnings import warnpy3k
\r
75 warnings.warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
\r
76 "favor of urllib2.urlopen()", stacklevel=2)
\r
79 if proxies is not None:
\r
80 opener = FancyURLopener(proxies=proxies)
\r
81 elif not _urlopener:
\r
82 opener = FancyURLopener()
\r
87 return opener.open(url)
\r
89 return opener.open(url, data)
\r
90 def urlretrieve(url, filename=None, reporthook=None, data=None):
\r
93 _urlopener = FancyURLopener()
\r
94 return _urlopener.retrieve(url, filename, reporthook, data)
\r
97 _urlopener.cleanup()
\r
107 # exception raised when downloaded size does not match content-length
\r
108 class ContentTooShortError(IOError):
\r
109 def __init__(self, message, content):
\r
110 IOError.__init__(self, message)
\r
111 self.content = content
\r
115 """Class to open URLs.
\r
116 This is a class rather than just a subroutine because we may need
\r
117 more than one set of global protocol-specific options.
\r
118 Note -- this is a base class for those who don't want the
\r
119 automatic handling of errors type 302 (relocated) and 401
\r
120 (authorization needed)."""
\r
124 version = "Python-urllib/%s" % __version__
\r
127 def __init__(self, proxies=None, **x509):
\r
128 if proxies is None:
\r
129 proxies = getproxies()
\r
130 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
\r
131 self.proxies = proxies
\r
132 self.key_file = x509.get('key_file')
\r
133 self.cert_file = x509.get('cert_file')
\r
134 self.addheaders = [('User-Agent', self.version)]
\r
135 self.__tempfiles = []
\r
136 self.__unlink = os.unlink # See cleanup()
\r
137 self.tempcache = None
\r
138 # Undocumented feature: if you assign {} to tempcache,
\r
139 # it is used to cache files retrieved with
\r
140 # self.retrieve(). This is not enabled by default
\r
141 # since it does not work for changing documents (and I
\r
142 # haven't got the logic to check expiration headers
\r
144 self.ftpcache = ftpcache
\r
145 # Undocumented feature: you can use a different
\r
146 # ftp cache by assigning to the .ftpcache member;
\r
147 # in case you want logically independent URL openers
\r
148 # XXX This is not threadsafe. Bah.
\r
157 # This code sometimes runs when the rest of this module
\r
158 # has already been deleted, so it can't use any globals
\r
159 # or import anything.
\r
160 if self.__tempfiles:
\r
161 for file in self.__tempfiles:
\r
163 self.__unlink(file)
\r
166 del self.__tempfiles[:]
\r
168 self.tempcache.clear()
\r
170 def addheader(self, *args):
\r
171 """Add a header to be used by the HTTP interface only
\r
172 e.g. u.addheader('Accept', 'sound/basic')"""
\r
173 self.addheaders.append(args)
\r
175 # External interface
\r
176 def open(self, fullurl, data=None):
\r
177 """Use URLopener().open(file) instead of open(file, 'r')."""
\r
178 fullurl = unwrap(toBytes(fullurl))
\r
179 if self.tempcache and fullurl in self.tempcache:
\r
180 filename, headers = self.tempcache[fullurl]
\r
181 fp = open(filename, 'rb')
\r
182 return addinfourl(fp, headers, fullurl)
\r
183 urltype, url = splittype(fullurl)
\r
186 if urltype in self.proxies:
\r
187 proxy = self.proxies[urltype]
\r
188 urltype, proxyhost = splittype(proxy)
\r
189 host, selector = splithost(proxyhost)
\r
190 url = (host, fullurl) # Signal special case to open_*()
\r
193 name = 'open_' + urltype
\r
194 self.type = urltype
\r
195 name = name.replace('-', '_')
\r
196 if not hasattr(self, name):
\r
198 return self.open_unknown_proxy(proxy, fullurl, data)
\r
200 return self.open_unknown(fullurl, data)
\r
203 return getattr(self, name)(url)
\r
205 return getattr(self, name)(url, data)
\r
206 except socket.error, msg:
\r
207 raise IOError, ('socket error', msg), sys.exc_info()[2]
\r
209 def open_unknown(self, fullurl, data=None):
\r
210 """Overridable interface to open unknown URL type."""
\r
211 type, url = splittype(fullurl)
\r
212 raise IOError, ('url error', 'unknown url type', type)
\r
214 def open_unknown_proxy(self, proxy, fullurl, data=None):
\r
215 """Overridable interface to open unknown URL type."""
\r
216 type, url = splittype(fullurl)
\r
217 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
\r
219 # External interface
\r
220 def retrieve(self, url, filename=None, reporthook=None, data=None):
\r
221 """retrieve(url) returns (filename, headers) for a local object
\r
222 or (tempfilename, headers) for a remote object."""
\r
223 url = unwrap(toBytes(url))
\r
224 if self.tempcache and url in self.tempcache:
\r
225 return self.tempcache[url]
\r
226 type, url1 = splittype(url)
\r
227 if filename is None and (not type or type == 'file'):
\r
229 fp = self.open_local_file(url1)
\r
232 return url2pathname(splithost(url1)[1]), hdrs
\r
233 except IOError, msg:
\r
235 fp = self.open(url, data)
\r
237 headers = fp.info()
\r
239 tfp = open(filename, 'wb')
\r
242 garbage, path = splittype(url)
\r
243 garbage, path = splithost(path or "")
\r
244 path, garbage = splitquery(path or "")
\r
245 path, garbage = splitattr(path or "")
\r
246 suffix = os.path.splitext(path)[1]
\r
247 (fd, filename) = tempfile.mkstemp(suffix)
\r
248 self.__tempfiles.append(filename)
\r
249 tfp = os.fdopen(fd, 'wb')
\r
251 result = filename, headers
\r
252 if self.tempcache is not None:
\r
253 self.tempcache[url] = result
\r
259 if "content-length" in headers:
\r
260 size = int(headers["Content-Length"])
\r
261 reporthook(blocknum, bs, size)
\r
263 block = fp.read(bs)
\r
270 reporthook(blocknum, bs, size)
\r
278 # raise exception if actual size does not match content-length header
\r
279 if size >= 0 and read < size:
\r
280 raise ContentTooShortError("retrieval incomplete: got only %i out "
\r
281 "of %i bytes" % (read, size), result)
\r
285 # Each method named open_<type> knows how to open that type of URL
\r
287 def open_http(self, url, data=None):
\r
288 """Use HTTP protocol."""
\r
292 if isinstance(url, str):
\r
293 host, selector = splithost(url)
\r
295 user_passwd, host = splituser(host)
\r
296 host = unquote(host)
\r
299 host, selector = url
\r
300 # check whether the proxy contains authorization information
\r
301 proxy_passwd, host = splituser(host)
\r
302 # now we proceed with the url we want to obtain
\r
303 urltype, rest = splittype(selector)
\r
306 if urltype.lower() != 'http':
\r
309 realhost, rest = splithost(rest)
\r
311 user_passwd, realhost = splituser(realhost)
\r
313 selector = "%s://%s%s" % (urltype, realhost, rest)
\r
314 if proxy_bypass(realhost):
\r
317 #print "proxy via http:", host, selector
\r
318 if not host: raise IOError, ('http error', 'no host given')
\r
322 proxy_auth = base64.b64encode(proxy_passwd).strip()
\r
328 auth = base64.b64encode(user_passwd).strip()
\r
331 h = httplib.HTTP(host)
\r
332 if data is not None:
\r
333 h.putrequest('POST', selector)
\r
334 h.putheader('Content-Type', 'application/x-www-form-urlencoded')
\r
335 h.putheader('Content-Length', '%d' % len(data))
\r
337 h.putrequest('GET', selector)
\r
338 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
\r
339 if auth: h.putheader('Authorization', 'Basic %s' % auth)
\r
340 if realhost: h.putheader('Host', realhost)
\r
341 for args in self.addheaders: h.putheader(*args)
\r
343 if data is not None:
\r
345 errcode, errmsg, headers = h.getreply()
\r
349 # something went wrong with the HTTP status line
\r
350 raise IOError, ('http protocol error', 0,
\r
351 'got a bad status line', None)
\r
352 # According to RFC 2616, "2xx" code indicates that the client's
\r
353 # request was successfully received, understood, and accepted.
\r
354 if (200 <= errcode < 300):
\r
355 return addinfourl(fp, headers, "http:" + url, errcode)
\r
358 return self.http_error(url, fp, errcode, errmsg, headers)
\r
360 return self.http_error(url, fp, errcode, errmsg, headers, data)
\r
362 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
\r
363 """Handle http errors.
\r
364 Derived class can override this, or provide specific handlers
\r
365 named http_error_DDD where DDD is the 3-digit error code."""
\r
366 # First check if there's a specific handler for this error
\r
367 name = 'http_error_%d' % errcode
\r
368 if hasattr(self, name):
\r
369 method = getattr(self, name)
\r
371 result = method(url, fp, errcode, errmsg, headers)
\r
373 result = method(url, fp, errcode, errmsg, headers, data)
\r
374 if result: return result
\r
375 return self.http_error_default(url, fp, errcode, errmsg, headers)
\r
377 def http_error_default(self, url, fp, errcode, errmsg, headers):
\r
378 """Default error handler: close the connection and raise IOError."""
\r
381 raise IOError, ('http error', errcode, errmsg, headers)
\r
384 def open_https(self, url, data=None):
\r
385 """Use HTTPS protocol."""
\r
389 proxy_passwd = None
\r
390 if isinstance(url, str):
\r
391 host, selector = splithost(url)
\r
393 user_passwd, host = splituser(host)
\r
394 host = unquote(host)
\r
397 host, selector = url
\r
398 # here, we determine, whether the proxy contains authorization information
\r
399 proxy_passwd, host = splituser(host)
\r
400 urltype, rest = splittype(selector)
\r
403 if urltype.lower() != 'https':
\r
406 realhost, rest = splithost(rest)
\r
408 user_passwd, realhost = splituser(realhost)
\r
410 selector = "%s://%s%s" % (urltype, realhost, rest)
\r
411 #print "proxy via https:", host, selector
\r
412 if not host: raise IOError, ('https error', 'no host given')
\r
415 proxy_auth = base64.b64encode(proxy_passwd).strip()
\r
420 auth = base64.b64encode(user_passwd).strip()
\r
423 h = httplib.HTTPS(host, 0,
\r
424 key_file=self.key_file,
\r
425 cert_file=self.cert_file)
\r
426 if data is not None:
\r
427 h.putrequest('POST', selector)
\r
428 h.putheader('Content-Type',
\r
429 'application/x-www-form-urlencoded')
\r
430 h.putheader('Content-Length', '%d' % len(data))
\r
432 h.putrequest('GET', selector)
\r
433 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
\r
434 if auth: h.putheader('Authorization', 'Basic %s' % auth)
\r
435 if realhost: h.putheader('Host', realhost)
\r
436 for args in self.addheaders: h.putheader(*args)
\r
438 if data is not None:
\r
440 errcode, errmsg, headers = h.getreply()
\r
444 # something went wrong with the HTTP status line
\r
445 raise IOError, ('http protocol error', 0,
\r
446 'got a bad status line', None)
\r
447 # According to RFC 2616, "2xx" code indicates that the client's
\r
448 # request was successfully received, understood, and accepted.
\r
449 if (200 <= errcode < 300):
\r
450 return addinfourl(fp, headers, "https:" + url, errcode)
\r
453 return self.http_error(url, fp, errcode, errmsg, headers)
\r
455 return self.http_error(url, fp, errcode, errmsg, headers,
\r
458 def open_file(self, url):
\r
459 """Use local file or FTP depending on form of URL."""
\r
460 if not isinstance(url, str):
\r
461 raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
\r
462 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
\r
463 return self.open_ftp(url)
\r
465 return self.open_local_file(url)
\r
467 def open_local_file(self, url):
\r
468 """Use local file."""
\r
469 import mimetypes, mimetools, email.utils
\r
471 from cStringIO import StringIO
\r
472 except ImportError:
\r
473 from StringIO import StringIO
\r
474 host, file = splithost(url)
\r
475 localname = url2pathname(file)
\r
477 stats = os.stat(localname)
\r
479 raise IOError(e.errno, e.strerror, e.filename)
\r
480 size = stats.st_size
\r
481 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
\r
482 mtype = mimetypes.guess_type(url)[0]
\r
483 headers = mimetools.Message(StringIO(
\r
484 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
\r
485 (mtype or 'text/plain', size, modified)))
\r
488 if file[:1] == '/':
\r
489 urlfile = 'file://' + file
\r
490 return addinfourl(open(localname, 'rb'),
\r
492 host, port = splitport(host)
\r
494 and socket.gethostbyname(host) in (localhost(), thishost()):
\r
496 if file[:1] == '/':
\r
497 urlfile = 'file://' + file
\r
498 return addinfourl(open(localname, 'rb'),
\r
500 raise IOError, ('local file error', 'not on local host')
\r
502 def open_ftp(self, url):
\r
503 """Use FTP protocol."""
\r
504 if not isinstance(url, str):
\r
505 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
\r
506 import mimetypes, mimetools
\r
508 from cStringIO import StringIO
\r
509 except ImportError:
\r
510 from StringIO import StringIO
\r
511 host, path = splithost(url)
\r
512 if not host: raise IOError, ('ftp error', 'no host given')
\r
513 host, port = splitport(host)
\r
514 user, host = splituser(host)
\r
515 if user: user, passwd = splitpasswd(user)
\r
516 else: passwd = None
\r
517 host = unquote(host)
\r
518 user = unquote(user or '')
\r
519 passwd = unquote(passwd or '')
\r
520 host = socket.gethostbyname(host)
\r
523 port = ftplib.FTP_PORT
\r
526 path, attrs = splitattr(path)
\r
527 path = unquote(path)
\r
528 dirs = path.split('/')
\r
529 dirs, file = dirs[:-1], dirs[-1]
\r
530 if dirs and not dirs[0]: dirs = dirs[1:]
\r
531 if dirs and not dirs[0]: dirs[0] = '/'
\r
532 key = user, host, port, '/'.join(dirs)
\r
533 # XXX thread unsafe!
\r
534 if len(self.ftpcache) > MAXFTPCACHE:
\r
535 # Prune the cache, rather arbitrarily
\r
536 for k in self.ftpcache.keys():
\r
538 v = self.ftpcache[k]
\r
539 del self.ftpcache[k]
\r
542 if not key in self.ftpcache:
\r
543 self.ftpcache[key] = \
\r
544 ftpwrapper(user, passwd, host, port, dirs)
\r
545 if not file: type = 'D'
\r
548 attr, value = splitvalue(attr)
\r
549 if attr.lower() == 'type' and \
\r
550 value in ('a', 'A', 'i', 'I', 'd', 'D'):
\r
551 type = value.upper()
\r
552 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
\r
553 mtype = mimetypes.guess_type("ftp:" + url)[0]
\r
556 headers += "Content-Type: %s\n" % mtype
\r
557 if retrlen is not None and retrlen >= 0:
\r
558 headers += "Content-Length: %d\n" % retrlen
\r
559 headers = mimetools.Message(StringIO(headers))
\r
560 return addinfourl(fp, headers, "ftp:" + url)
\r
561 except ftperrors(), msg:
\r
562 raise IOError, ('ftp error', msg), sys.exc_info()[2]
\r
564 def open_data(self, url, data=None):
\r
565 """Use "data" URL."""
\r
566 if not isinstance(url, str):
\r
567 raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
\r
568 # ignore POSTed data
\r
570 # syntax of data URLs:
\r
571 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
\r
572 # mediatype := [ type "/" subtype ] *( ";" parameter )
\r
574 # parameter := attribute "=" value
\r
577 from cStringIO import StringIO
\r
578 except ImportError:
\r
579 from StringIO import StringIO
\r
581 [type, data] = url.split(',', 1)
\r
583 raise IOError, ('data error', 'bad data URL')
\r
585 type = 'text/plain;charset=US-ASCII'
\r
586 semi = type.rfind(';')
\r
587 if semi >= 0 and '=' not in type[semi:]:
\r
588 encoding = type[semi+1:]
\r
593 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
\r
594 time.gmtime(time.time())))
\r
595 msg.append('Content-type: %s' % type)
\r
596 if encoding == 'base64':
\r
598 data = base64.decodestring(data)
\r
600 data = unquote(data)
\r
601 msg.append('Content-Length: %d' % len(data))
\r
604 msg = '\n'.join(msg)
\r
606 headers = mimetools.Message(f, 0)
\r
607 #f.fileno = None # needed for addinfourl
\r
608 return addinfourl(f, headers, url)
\r
611 class FancyURLopener(URLopener):
\r
612 """Derived class with handlers for errors we can handle (perhaps)."""
\r
614 def __init__(self, *args, **kwargs):
\r
615 URLopener.__init__(self, *args, **kwargs)
\r
616 self.auth_cache = {}
\r
620 def http_error_default(self, url, fp, errcode, errmsg, headers):
\r
621 """Default error handling -- don't raise an exception."""
\r
622 return addinfourl(fp, headers, "http:" + url, errcode)
\r
624 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
\r
625 """Error 302 -- relocated (temporarily)."""
\r
627 if self.maxtries and self.tries >= self.maxtries:
\r
628 if hasattr(self, "http_error_500"):
\r
629 meth = self.http_error_500
\r
631 meth = self.http_error_default
\r
633 return meth(url, fp, 500,
\r
634 "Internal Server Error: Redirect Recursion", headers)
\r
635 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
\r
640 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
\r
641 if 'location' in headers:
\r
642 newurl = headers['location']
\r
643 elif 'uri' in headers:
\r
644 newurl = headers['uri']
\r
649 # In case the server sent a relative URL, join with original:
\r
650 newurl = basejoin(self.type + ":" + url, newurl)
\r
651 return self.open(newurl)
\r
653 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
\r
654 """Error 301 -- also relocated (permanently)."""
\r
655 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
\r
657 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
\r
658 """Error 303 -- also relocated (essentially identical to 302)."""
\r
659 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
\r
661 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
\r
662 """Error 307 -- relocated, but turn POST into error."""
\r
664 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
\r
666 return self.http_error_default(url, fp, errcode, errmsg, headers)
\r
668 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
\r
669 """Error 401 -- authentication required.
\r
670 This function supports Basic authentication only."""
\r
671 if not 'www-authenticate' in headers:
\r
672 URLopener.http_error_default(self, url, fp,
\r
673 errcode, errmsg, headers)
\r
674 stuff = headers['www-authenticate']
\r
676 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
\r
678 URLopener.http_error_default(self, url, fp,
\r
679 errcode, errmsg, headers)
\r
680 scheme, realm = match.groups()
\r
681 if scheme.lower() != 'basic':
\r
682 URLopener.http_error_default(self, url, fp,
\r
683 errcode, errmsg, headers)
\r
684 name = 'retry_' + self.type + '_basic_auth'
\r
686 return getattr(self,name)(url, realm)
\r
688 return getattr(self,name)(url, realm, data)
\r
690 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
\r
691 """Error 407 -- proxy authentication required.
\r
692 This function supports Basic authentication only."""
\r
693 if not 'proxy-authenticate' in headers:
\r
694 URLopener.http_error_default(self, url, fp,
\r
695 errcode, errmsg, headers)
\r
696 stuff = headers['proxy-authenticate']
\r
698 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
\r
700 URLopener.http_error_default(self, url, fp,
\r
701 errcode, errmsg, headers)
\r
702 scheme, realm = match.groups()
\r
703 if scheme.lower() != 'basic':
\r
704 URLopener.http_error_default(self, url, fp,
\r
705 errcode, errmsg, headers)
\r
706 name = 'retry_proxy_' + self.type + '_basic_auth'
\r
708 return getattr(self,name)(url, realm)
\r
710 return getattr(self,name)(url, realm, data)
\r
712 def retry_proxy_http_basic_auth(self, url, realm, data=None):
\r
713 host, selector = splithost(url)
\r
714 newurl = 'http://' + host + selector
\r
715 proxy = self.proxies['http']
\r
716 urltype, proxyhost = splittype(proxy)
\r
717 proxyhost, proxyselector = splithost(proxyhost)
\r
718 i = proxyhost.find('@') + 1
\r
719 proxyhost = proxyhost[i:]
\r
720 user, passwd = self.get_user_passwd(proxyhost, realm, i)
\r
721 if not (user or passwd): return None
\r
722 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
\r
723 self.proxies['http'] = 'http://' + proxyhost + proxyselector
\r
725 return self.open(newurl)
\r
727 return self.open(newurl, data)
\r
729 def retry_proxy_https_basic_auth(self, url, realm, data=None):
\r
730 host, selector = splithost(url)
\r
731 newurl = 'https://' + host + selector
\r
732 proxy = self.proxies['https']
\r
733 urltype, proxyhost = splittype(proxy)
\r
734 proxyhost, proxyselector = splithost(proxyhost)
\r
735 i = proxyhost.find('@') + 1
\r
736 proxyhost = proxyhost[i:]
\r
737 user, passwd = self.get_user_passwd(proxyhost, realm, i)
\r
738 if not (user or passwd): return None
\r
739 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
\r
740 self.proxies['https'] = 'https://' + proxyhost + proxyselector
\r
742 return self.open(newurl)
\r
744 return self.open(newurl, data)
\r
746 def retry_http_basic_auth(self, url, realm, data=None):
\r
747 host, selector = splithost(url)
\r
748 i = host.find('@') + 1
\r
750 user, passwd = self.get_user_passwd(host, realm, i)
\r
751 if not (user or passwd): return None
\r
752 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
\r
753 newurl = 'http://' + host + selector
\r
755 return self.open(newurl)
\r
757 return self.open(newurl, data)
\r
759 def retry_https_basic_auth(self, url, realm, data=None):
\r
760 host, selector = splithost(url)
\r
761 i = host.find('@') + 1
\r
763 user, passwd = self.get_user_passwd(host, realm, i)
\r
764 if not (user or passwd): return None
\r
765 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
\r
766 newurl = 'https://' + host + selector
\r
768 return self.open(newurl)
\r
770 return self.open(newurl, data)
\r
772 def get_user_passwd(self, host, realm, clear_cache = 0):
\r
773 key = realm + '@' + host.lower()
\r
774 if key in self.auth_cache:
\r
776 del self.auth_cache[key]
\r
778 return self.auth_cache[key]
\r
779 user, passwd = self.prompt_user_passwd(host, realm)
\r
780 if user or passwd: self.auth_cache[key] = (user, passwd)
\r
781 return user, passwd
\r
783 def prompt_user_passwd(self, host, realm):
\r
784 """Override this in a GUI environment!"""
\r
787 user = raw_input("Enter username for %s at %s: " % (realm,
\r
789 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
\r
790 (user, realm, host))
\r
791 return user, passwd
\r
792 except KeyboardInterrupt:
\r
797 # Utility functions
\r
801 """Return the IP address of the magic hostname 'localhost'."""
\r
803 if _localhost is None:
\r
804 _localhost = socket.gethostbyname('localhost')
\r
809 """Return the IP address of the current host."""
\r
811 if _thishost is None:
\r
812 _thishost = socket.gethostbyname(socket.gethostname())
\r
817 """Return the set of errors raised by the FTP class."""
\r
819 if _ftperrors is None:
\r
821 _ftperrors = ftplib.all_errors
\r
826 """Return an empty mimetools.Message object."""
\r
828 if _noheaders is None:
\r
831 from cStringIO import StringIO
\r
832 except ImportError:
\r
833 from StringIO import StringIO
\r
834 _noheaders = mimetools.Message(StringIO(), 0)
\r
835 _noheaders.fp.close() # Recycle file descriptor
\r
842 """Class used by open_ftp() for cache of open FTP connections."""
\r
844 def __init__(self, user, passwd, host, port, dirs,
\r
845 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
\r
847 self.passwd = passwd
\r
851 self.timeout = timeout
\r
857 self.ftp = ftplib.FTP()
\r
858 self.ftp.connect(self.host, self.port, self.timeout)
\r
859 self.ftp.login(self.user, self.passwd)
\r
860 for dir in self.dirs:
\r
863 def retrfile(self, file, type):
\r
866 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
\r
867 else: cmd = 'TYPE ' + type; isdir = 0
\r
869 self.ftp.voidcmd(cmd)
\r
870 except ftplib.all_errors:
\r
872 self.ftp.voidcmd(cmd)
\r
874 if file and not isdir:
\r
875 # Try to retrieve as a file
\r
877 cmd = 'RETR ' + file
\r
878 conn = self.ftp.ntransfercmd(cmd)
\r
879 except ftplib.error_perm, reason:
\r
880 if str(reason)[:3] != '550':
\r
881 raise IOError, ('ftp error', reason), sys.exc_info()[2]
\r
883 # Set transfer mode to ASCII!
\r
884 self.ftp.voidcmd('TYPE A')
\r
885 # Try a directory listing. Verify that directory exists.
\r
887 pwd = self.ftp.pwd()
\r
891 except ftplib.error_perm, reason:
\r
892 raise IOError, ('ftp error', reason), sys.exc_info()[2]
\r
895 cmd = 'LIST ' + file
\r
898 conn = self.ftp.ntransfercmd(cmd)
\r
900 # Pass back both a suitably decorated object and a retrieval length
\r
901 return (addclosehook(conn[0].makefile('rb'),
\r
902 self.endtransfer), conn[1])
\r
903 def endtransfer(self):
\r
908 self.ftp.voidresp()
\r
909 except ftperrors():
\r
916 except ftperrors():
\r
920 """Base class for addinfo and addclosehook."""
\r
922 def __init__(self, fp):
\r
924 self.read = self.fp.read
\r
925 self.readline = self.fp.readline
\r
926 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
\r
927 if hasattr(self.fp, "fileno"):
\r
928 self.fileno = self.fp.fileno
\r
930 self.fileno = lambda: None
\r
931 if hasattr(self.fp, "__iter__"):
\r
932 self.__iter__ = self.fp.__iter__
\r
933 if hasattr(self.fp, "next"):
\r
934 self.next = self.fp.next
\r
936 def __repr__(self):
\r
937 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
\r
942 self.readline = None
\r
943 self.readlines = None
\r
945 if self.fp: self.fp.close()
\r
948 class addclosehook(addbase):
\r
949 """Class to add a close hook to an open file."""
\r
951 def __init__(self, fp, closehook, *hookargs):
\r
952 addbase.__init__(self, fp)
\r
953 self.closehook = closehook
\r
954 self.hookargs = hookargs
\r
957 addbase.close(self)
\r
959 self.closehook(*self.hookargs)
\r
960 self.closehook = None
\r
961 self.hookargs = None
\r
963 class addinfo(addbase):
\r
964 """class to add an info() method to an open file."""
\r
966 def __init__(self, fp, headers):
\r
967 addbase.__init__(self, fp)
\r
968 self.headers = headers
\r
971 return self.headers
\r
973 class addinfourl(addbase):
\r
974 """class to add info() and geturl() methods to an open file."""
\r
976 def __init__(self, fp, headers, url, code=None):
\r
977 addbase.__init__(self, fp)
\r
978 self.headers = headers
\r
983 return self.headers
\r
992 # Utilities to parse URLs (most of these return None for missing parts):
\r
993 # unwrap('<URL:type://host/path>') --> 'type://host/path'
\r
994 # splittype('type:opaquestring') --> 'type', 'opaquestring'
\r
995 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
\r
996 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
\r
997 # splitpasswd('user:passwd') -> 'user', 'passwd'
\r
998 # splitport('host:port') --> 'host', 'port'
\r
999 # splitquery('/path?query') --> '/path', 'query'
\r
1000 # splittag('/path#tag') --> '/path', 'tag'
\r
1001 # splitattr('/path;attr1=value1;attr2=value2;...') ->
\r
1002 # '/path', ['attr1=value1', 'attr2=value2', ...]
\r
1003 # splitvalue('attr=value') --> 'attr', 'value'
\r
1004 # unquote('abc%20def') -> 'abc def'
\r
1005 # quote('abc def') -> 'abc%20def')
\r
1010 def _is_unicode(x):
\r
1013 def _is_unicode(x):
\r
1014 return isinstance(x, unicode)
\r
1017 """toBytes(u"URL") --> 'URL'."""
\r
1018 # Most URL schemes require ASCII. If that changes, the conversion
\r
1020 if _is_unicode(url):
\r
1022 url = url.encode("ASCII")
\r
1023 except UnicodeError:
\r
1024 raise UnicodeError("URL " + repr(url) +
\r
1025 " contains non-ASCII characters")
\r
1029 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
\r
1031 if url[:1] == '<' and url[-1:] == '>':
\r
1032 url = url[1:-1].strip()
\r
1033 if url[:4] == 'URL:': url = url[4:].strip()
\r
1037 def splittype(url):
\r
1038 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
\r
1040 if _typeprog is None:
\r
1042 _typeprog = re.compile('^([^/:]+):')
\r
1044 match = _typeprog.match(url)
\r
1046 scheme = match.group(1)
\r
1047 return scheme.lower(), url[len(scheme) + 1:]
\r
1051 def splithost(url):
\r
1052 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
\r
1054 if _hostprog is None:
\r
1056 _hostprog = re.compile('^//([^/?]*)(.*)$')
\r
1058 match = _hostprog.match(url)
\r
1059 if match: return match.group(1, 2)
\r
1063 def splituser(host):
\r
1064 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
\r
1066 if _userprog is None:
\r
1068 _userprog = re.compile('^(.*)@(.*)$')
\r
1070 match = _userprog.match(host)
\r
1071 if match: return map(unquote, match.group(1, 2))
\r
1074 _passwdprog = None
\r
1075 def splitpasswd(user):
\r
1076 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
\r
1077 global _passwdprog
\r
1078 if _passwdprog is None:
\r
1080 _passwdprog = re.compile('^([^:]*):(.*)$')
\r
1082 match = _passwdprog.match(user)
\r
1083 if match: return match.group(1, 2)
\r
1086 # splittag('/path#tag') --> '/path', 'tag'
\r
1088 def splitport(host):
\r
1089 """splitport('host:port') --> 'host', 'port'."""
\r
1091 if _portprog is None:
\r
1093 _portprog = re.compile('^(.*):([0-9]+)$')
\r
1095 match = _portprog.match(host)
\r
1096 if match: return match.group(1, 2)
\r
1100 def splitnport(host, defport=-1):
\r
1101 """Split host and port, returning numeric port.
\r
1102 Return given default port if no ':' found; defaults to -1.
\r
1103 Return numerical port if a valid number are found after ':'.
\r
1104 Return None if ':' but not a valid number."""
\r
1106 if _nportprog is None:
\r
1108 _nportprog = re.compile('^(.*):(.*)$')
\r
1110 match = _nportprog.match(host)
\r
1112 host, port = match.group(1, 2)
\r
1114 if not port: raise ValueError, "no digits"
\r
1116 except ValueError:
\r
1118 return host, nport
\r
1119 return host, defport
\r
1122 def splitquery(url):
\r
1123 """splitquery('/path?query') --> '/path', 'query'."""
\r
1125 if _queryprog is None:
\r
1127 _queryprog = re.compile('^(.*)\?([^?]*)$')
\r
1129 match = _queryprog.match(url)
\r
1130 if match: return match.group(1, 2)
\r
1134 def splittag(url):
\r
1135 """splittag('/path#tag') --> '/path', 'tag'."""
\r
1137 if _tagprog is None:
\r
1139 _tagprog = re.compile('^(.*)#([^#]*)$')
\r
1141 match = _tagprog.match(url)
\r
1142 if match: return match.group(1, 2)
\r
1145 def splitattr(url):
\r
1146 """splitattr('/path;attr1=value1;attr2=value2;...') ->
\r
1147 '/path', ['attr1=value1', 'attr2=value2', ...]."""
\r
1148 words = url.split(';')
\r
1149 return words[0], words[1:]
\r
1152 def splitvalue(attr):
\r
1153 """splitvalue('attr=value') --> 'attr', 'value'."""
\r
1155 if _valueprog is None:
\r
1157 _valueprog = re.compile('^([^=]*)=(.*)$')
\r
1159 match = _valueprog.match(attr)
\r
1160 if match: return match.group(1, 2)
\r
1163 _hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
\r
1164 _hextochr.update(('%02X' % i, chr(i)) for i in range(256))
\r
1167 """unquote('abc%20def') -> 'abc def'."""
\r
1168 res = s.split('%')
\r
1169 for i in xrange(1, len(res)):
\r
1172 res[i] = _hextochr[item[:2]] + item[2:]
\r
1174 res[i] = '%' + item
\r
1175 except UnicodeDecodeError:
\r
1176 res[i] = unichr(int(item[:2], 16)) + item[2:]
\r
1177 return "".join(res)
\r
1179 def unquote_plus(s):
\r
1180 """unquote('%7e/abc+def') -> '~/abc def'"""
\r
1181 s = s.replace('+', ' ')
\r
1184 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
\r
1185 'abcdefghijklmnopqrstuvwxyz'
\r
1186 '0123456789' '_.-')
\r
1189 def quote(s, safe = '/'):
\r
1190 """quote('abc def') -> 'abc%20def'
\r
1192 Each part of a URL, e.g. the path info, the query, etc., has a
\r
1193 different set of reserved characters that must be quoted.
\r
1195 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
\r
1196 the following reserved characters.
\r
1198 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
\r
1201 Each of these characters is reserved in some component of a URL,
\r
1202 but not necessarily in all of them.
\r
1204 By default, the quote function is intended for quoting the path
\r
1205 section of a URL. Thus, it will not encode '/'. This character
\r
1206 is reserved, but in typical usage the quote function is being
\r
1207 called on a path where the existing slash characters are used as
\r
1208 reserved characters.
\r
1210 cachekey = (safe, always_safe)
\r
1212 safe_map = _safemaps[cachekey]
\r
1214 safe += always_safe
\r
1216 for i in range(256):
\r
1218 safe_map[c] = (c in safe) and c or ('%%%02X' % i)
\r
1219 _safemaps[cachekey] = safe_map
\r
1220 res = map(safe_map.__getitem__, s)
\r
1221 return ''.join(res)
\r
1223 def quote_plus(s, safe = ''):
\r
1224 """Quote the query fragment of a URL; replacing ' ' with '+'"""
\r
1226 s = quote(s, safe + ' ')
\r
1227 return s.replace(' ', '+')
\r
1228 return quote(s, safe)
\r
1230 def urlencode(query,doseq=0):
\r
1231 """Encode a sequence of two-element tuples or dictionary into a URL query string.
\r
1233 If any values in the query arg are sequences and doseq is true, each
\r
1234 sequence element is converted to a separate parameter.
\r
1236 If the query arg is a sequence of two-element tuples, the order of the
\r
1237 parameters in the output will match the order of parameters in the
\r
1241 if hasattr(query,"items"):
\r
1243 query = query.items()
\r
1245 # it's a bother at times that strings and string-like objects are
\r
1248 # non-sequence items should not work with len()
\r
1249 # non-empty strings will fail this
\r
1250 if len(query) and not isinstance(query[0], tuple):
\r
1252 # zero-length sequences of all types will get here and succeed,
\r
1253 # but that's a minor nit - since the original implementation
\r
1254 # allowed empty dicts that type of behavior probably should be
\r
1255 # preserved for consistency
\r
1257 ty,va,tb = sys.exc_info()
\r
1258 raise TypeError, "not a valid non-string sequence or mapping object", tb
\r
1262 # preserve old behavior
\r
1263 for k, v in query:
\r
1264 k = quote_plus(str(k))
\r
1265 v = quote_plus(str(v))
\r
1266 l.append(k + '=' + v)
\r
1268 for k, v in query:
\r
1269 k = quote_plus(str(k))
\r
1270 if isinstance(v, str):
\r
1272 l.append(k + '=' + v)
\r
1273 elif _is_unicode(v):
\r
1274 # is there a reasonable way to convert to ASCII?
\r
1275 # encode generates a string, but "replace" or "ignore"
\r
1276 # lose information and "strict" can raise UnicodeError
\r
1277 v = quote_plus(v.encode("ASCII","replace"))
\r
1278 l.append(k + '=' + v)
\r
1281 # is this a sufficient test for sequence-ness?
\r
1285 v = quote_plus(str(v))
\r
1286 l.append(k + '=' + v)
\r
1288 # loop over the sequence
\r
1290 l.append(k + '=' + quote_plus(str(elt)))
\r
1291 return '&'.join(l)
\r
1294 def getproxies_environment():
\r
1295 """Return a dictionary of scheme -> proxy server URL mappings.
\r
1297 Scan the environment for variables named <scheme>_proxy;
\r
1298 this seems to be the standard convention. If you need a
\r
1299 different way, you can pass a proxies dictionary to the
\r
1300 [Fancy]URLopener constructor.
\r
1304 for name, value in os.environ.items():
\r
1305 name = name.lower()
\r
1306 if value and name[-6:] == '_proxy':
\r
1307 proxies[name[:-6]] = value
\r
1310 def proxy_bypass_environment(host):
\r
1311 """Test if proxies should not be used for a particular host.
\r
1313 Checks the environment for a variable named no_proxy, which should
\r
1314 be a list of DNS suffixes separated by commas, or '*' for all hosts.
\r
1316 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
\r
1317 # '*' is special case for always bypass
\r
1318 if no_proxy == '*':
\r
1320 # strip port off host
\r
1321 hostonly, port = splitport(host)
\r
1322 # check if the host ends with any of the DNS suffixes
\r
1323 for name in no_proxy.split(','):
\r
1324 if name and (hostonly.endswith(name) or host.endswith(name)):
\r
1326 # otherwise, don't bypass
\r
1330 if sys.platform == 'darwin':
\r
1333 from ctypes import c_int32, c_void_p, c_char_p, c_int
\r
1334 sc.CFStringCreateWithCString.argtypes = [ c_void_p, c_char_p, c_int32 ]
\r
1335 sc.CFStringCreateWithCString.restype = c_void_p
\r
1336 sc.SCDynamicStoreCopyProxies.argtypes = [ c_void_p ]
\r
1337 sc.SCDynamicStoreCopyProxies.restype = c_void_p
\r
1338 sc.CFDictionaryGetValue.argtypes = [ c_void_p, c_void_p ]
\r
1339 sc.CFDictionaryGetValue.restype = c_void_p
\r
1340 sc.CFStringGetLength.argtypes = [ c_void_p ]
\r
1341 sc.CFStringGetLength.restype = c_int32
\r
1342 sc.CFStringGetCString.argtypes = [ c_void_p, c_char_p, c_int32, c_int32 ]
\r
1343 sc.CFStringGetCString.restype = c_int32
\r
1344 sc.CFNumberGetValue.argtypes = [ c_void_p, c_int, c_void_p ]
\r
1345 sc.CFNumberGetValue.restype = c_int32
\r
1346 sc.CFRelease.argtypes = [ c_void_p ]
\r
1347 sc.CFRelease.restype = None
\r
1349 def _CStringFromCFString(sc, value):
\r
1350 from ctypes import create_string_buffer
\r
1351 length = sc.CFStringGetLength(value) + 1
\r
1352 buff = create_string_buffer(length)
\r
1353 sc.CFStringGetCString(value, buff, length, 0)
\r
1356 def _CFNumberToInt32(sc, cfnum):
\r
1357 from ctypes import byref, c_int
\r
1359 kCFNumberSInt32Type = 3
\r
1360 sc.CFNumberGetValue(cfnum, kCFNumberSInt32Type, byref(val))
\r
1364 def proxy_bypass_macosx_sysconf(host):
\r
1366 Return True iff this host shouldn't be accessed using a proxy
\r
1368 This function uses the MacOSX framework SystemConfiguration
\r
1369 to fetch the proxy information.
\r
1371 from ctypes import cdll
\r
1372 from ctypes.util import find_library
\r
1375 from fnmatch import fnmatch
\r
1377 def ip2num(ipAddr):
\r
1378 parts = ipAddr.split('.')
\r
1379 parts = map(int, parts)
\r
1380 if len(parts) != 4:
\r
1381 parts = (parts + [0, 0, 0, 0])[:4]
\r
1382 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
\r
1384 sc = cdll.LoadLibrary(find_library("SystemConfiguration"))
\r
1392 kSCPropNetProxiesExceptionsList = sc.CFStringCreateWithCString(0, "ExceptionsList", 0)
\r
1393 kSCPropNetProxiesExcludeSimpleHostnames = sc.CFStringCreateWithCString(0,
\r
1394 "ExcludeSimpleHostnames", 0)
\r
1397 proxyDict = sc.SCDynamicStoreCopyProxies(None)
\r
1398 if proxyDict is None:
\r
1402 # Check for simple host names:
\r
1403 if '.' not in host:
\r
1404 exclude_simple = sc.CFDictionaryGetValue(proxyDict,
\r
1405 kSCPropNetProxiesExcludeSimpleHostnames)
\r
1406 if exclude_simple and _CFNumberToInt32(sc, exclude_simple):
\r
1410 # Check the exceptions list:
\r
1411 exceptions = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesExceptionsList)
\r
1413 # Items in the list are strings like these: *.local, 169.254/16
\r
1414 for index in xrange(sc.CFArrayGetCount(exceptions)):
\r
1415 value = sc.CFArrayGetValueAtIndex(exceptions, index)
\r
1416 if not value: continue
\r
1417 value = _CStringFromCFString(sc, value)
\r
1419 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
\r
1421 if hostIP is None:
\r
1422 hostIP = socket.gethostbyname(host)
\r
1423 hostIP = ip2num(hostIP)
\r
1425 base = ip2num(m.group(1))
\r
1426 mask = int(m.group(2)[1:])
\r
1429 if (hostIP >> mask) == (base >> mask):
\r
1432 elif fnmatch(host, value):
\r
1438 sc.CFRelease(kSCPropNetProxiesExceptionsList)
\r
1439 sc.CFRelease(kSCPropNetProxiesExcludeSimpleHostnames)
\r
1443 def getproxies_macosx_sysconf():
\r
1444 """Return a dictionary of scheme -> proxy server URL mappings.
\r
1446 This function uses the MacOSX framework SystemConfiguration
\r
1447 to fetch the proxy information.
\r
1449 from ctypes import cdll
\r
1450 from ctypes.util import find_library
\r
1452 sc = cdll.LoadLibrary(find_library("SystemConfiguration"))
\r
1458 kSCPropNetProxiesHTTPEnable = sc.CFStringCreateWithCString(0, "HTTPEnable", 0)
\r
1459 kSCPropNetProxiesHTTPProxy = sc.CFStringCreateWithCString(0, "HTTPProxy", 0)
\r
1460 kSCPropNetProxiesHTTPPort = sc.CFStringCreateWithCString(0, "HTTPPort", 0)
\r
1462 kSCPropNetProxiesHTTPSEnable = sc.CFStringCreateWithCString(0, "HTTPSEnable", 0)
\r
1463 kSCPropNetProxiesHTTPSProxy = sc.CFStringCreateWithCString(0, "HTTPSProxy", 0)
\r
1464 kSCPropNetProxiesHTTPSPort = sc.CFStringCreateWithCString(0, "HTTPSPort", 0)
\r
1466 kSCPropNetProxiesFTPEnable = sc.CFStringCreateWithCString(0, "FTPEnable", 0)
\r
1467 kSCPropNetProxiesFTPPassive = sc.CFStringCreateWithCString(0, "FTPPassive", 0)
\r
1468 kSCPropNetProxiesFTPPort = sc.CFStringCreateWithCString(0, "FTPPort", 0)
\r
1469 kSCPropNetProxiesFTPProxy = sc.CFStringCreateWithCString(0, "FTPProxy", 0)
\r
1471 kSCPropNetProxiesGopherEnable = sc.CFStringCreateWithCString(0, "GopherEnable", 0)
\r
1472 kSCPropNetProxiesGopherPort = sc.CFStringCreateWithCString(0, "GopherPort", 0)
\r
1473 kSCPropNetProxiesGopherProxy = sc.CFStringCreateWithCString(0, "GopherProxy", 0)
\r
1476 proxyDict = sc.SCDynamicStoreCopyProxies(None)
\r
1480 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPEnable)
\r
1481 if enabled and _CFNumberToInt32(sc, enabled):
\r
1482 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPProxy)
\r
1483 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPPort)
\r
1486 proxy = _CStringFromCFString(sc, proxy)
\r
1488 port = _CFNumberToInt32(sc, port)
\r
1489 proxies["http"] = "http://%s:%i" % (proxy, port)
\r
1491 proxies["http"] = "http://%s" % (proxy, )
\r
1494 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSEnable)
\r
1495 if enabled and _CFNumberToInt32(sc, enabled):
\r
1496 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSProxy)
\r
1497 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSPort)
\r
1500 proxy = _CStringFromCFString(sc, proxy)
\r
1502 port = _CFNumberToInt32(sc, port)
\r
1503 proxies["https"] = "http://%s:%i" % (proxy, port)
\r
1505 proxies["https"] = "http://%s" % (proxy, )
\r
1508 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPEnable)
\r
1509 if enabled and _CFNumberToInt32(sc, enabled):
\r
1510 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPProxy)
\r
1511 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPPort)
\r
1514 proxy = _CStringFromCFString(sc, proxy)
\r
1516 port = _CFNumberToInt32(sc, port)
\r
1517 proxies["ftp"] = "http://%s:%i" % (proxy, port)
\r
1519 proxies["ftp"] = "http://%s" % (proxy, )
\r
1522 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherEnable)
\r
1523 if enabled and _CFNumberToInt32(sc, enabled):
\r
1524 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherProxy)
\r
1525 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherPort)
\r
1528 proxy = _CStringFromCFString(sc, proxy)
\r
1530 port = _CFNumberToInt32(sc, port)
\r
1531 proxies["gopher"] = "http://%s:%i" % (proxy, port)
\r
1533 proxies["gopher"] = "http://%s" % (proxy, )
\r
1535 sc.CFRelease(proxyDict)
\r
1537 sc.CFRelease(kSCPropNetProxiesHTTPEnable)
\r
1538 sc.CFRelease(kSCPropNetProxiesHTTPProxy)
\r
1539 sc.CFRelease(kSCPropNetProxiesHTTPPort)
\r
1540 sc.CFRelease(kSCPropNetProxiesFTPEnable)
\r
1541 sc.CFRelease(kSCPropNetProxiesFTPPassive)
\r
1542 sc.CFRelease(kSCPropNetProxiesFTPPort)
\r
1543 sc.CFRelease(kSCPropNetProxiesFTPProxy)
\r
1544 sc.CFRelease(kSCPropNetProxiesGopherEnable)
\r
1545 sc.CFRelease(kSCPropNetProxiesGopherPort)
\r
1546 sc.CFRelease(kSCPropNetProxiesGopherProxy)
\r
1552 def proxy_bypass(host):
\r
1553 if getproxies_environment():
\r
1554 return proxy_bypass_environment(host)
\r
1556 return proxy_bypass_macosx_sysconf(host)
\r
1559 return getproxies_environment() or getproxies_macosx_sysconf()
\r
1561 elif os.name == 'nt':
\r
1562 def getproxies_registry():
\r
1563 """Return a dictionary of scheme -> proxy server URL mappings.
\r
1565 Win32 uses the registry to store proxies.
\r
1571 except ImportError:
\r
1572 # Std module, so should be around - but you never know!
\r
1575 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
\r
1576 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
\r
1577 proxyEnable = _winreg.QueryValueEx(internetSettings,
\r
1580 # Returned as Unicode but problems if not converted to ASCII
\r
1581 proxyServer = str(_winreg.QueryValueEx(internetSettings,
\r
1582 'ProxyServer')[0])
\r
1583 if '=' in proxyServer:
\r
1584 # Per-protocol settings
\r
1585 for p in proxyServer.split(';'):
\r
1586 protocol, address = p.split('=', 1)
\r
1587 # See if address has a type:// prefix
\r
1589 if not re.match('^([^/:]+)://', address):
\r
1590 address = '%s://%s' % (protocol, address)
\r
1591 proxies[protocol] = address
\r
1593 # Use one setting for all protocols
\r
1594 if proxyServer[:5] == 'http:':
\r
1595 proxies['http'] = proxyServer
\r
1597 proxies['http'] = 'http://%s' % proxyServer
\r
1598 proxies['ftp'] = 'ftp://%s' % proxyServer
\r
1599 internetSettings.Close()
\r
1600 except (WindowsError, ValueError, TypeError):
\r
1601 # Either registry key not found etc, or the value in an
\r
1602 # unexpected format.
\r
1603 # proxies already set up to be empty so nothing to do
\r
1608 """Return a dictionary of scheme -> proxy server URL mappings.
\r
1610 Returns settings gathered from the environment, if specified,
\r
1614 return getproxies_environment() or getproxies_registry()
\r
1616 def proxy_bypass_registry(host):
\r
1620 except ImportError:
\r
1621 # Std modules, so should be around - but you never know!
\r
1624 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
\r
1625 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
\r
1626 proxyEnable = _winreg.QueryValueEx(internetSettings,
\r
1628 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
\r
1629 'ProxyOverride')[0])
\r
1630 # ^^^^ Returned as Unicode but problems if not converted to ASCII
\r
1631 except WindowsError:
\r
1633 if not proxyEnable or not proxyOverride:
\r
1635 # try to make a host list from name and IP address.
\r
1636 rawHost, port = splitport(host)
\r
1639 addr = socket.gethostbyname(rawHost)
\r
1640 if addr != rawHost:
\r
1642 except socket.error:
\r
1645 fqdn = socket.getfqdn(rawHost)
\r
1646 if fqdn != rawHost:
\r
1648 except socket.error:
\r
1650 # make a check value list from the registry entry: replace the
\r
1651 # '<local>' string by the localhost entry and the corresponding
\r
1652 # canonical entry.
\r
1653 proxyOverride = proxyOverride.split(';')
\r
1655 while i < len(proxyOverride):
\r
1656 if proxyOverride[i] == '<local>':
\r
1657 proxyOverride[i:i+1] = ['localhost',
\r
1659 socket.gethostname(),
\r
1660 socket.gethostbyname(
\r
1661 socket.gethostname())]
\r
1663 # print proxyOverride
\r
1664 # now check if we match one of the registry values.
\r
1665 for test in proxyOverride:
\r
1666 test = test.replace(".", r"\.") # mask dots
\r
1667 test = test.replace("*", r".*") # change glob sequence
\r
1668 test = test.replace("?", r".") # change glob char
\r
1670 # print "%s <--> %s" %( test, val )
\r
1671 if re.match(test, val, re.I):
\r
1675 def proxy_bypass(host):
\r
1676 """Return a dictionary of scheme -> proxy server URL mappings.
\r
1678 Returns settings gathered from the environment, if specified,
\r
1682 if getproxies_environment():
\r
1683 return proxy_bypass_environment(host)
\r
1685 return proxy_bypass_registry(host)
\r
1688 # By default use environment variables
\r
1689 getproxies = getproxies_environment
\r
1690 proxy_bypass = proxy_bypass_environment
\r
1692 # Test and time quote() and unquote()
\r
1695 for i in range(256): s = s + chr(i)
\r
1706 print round(t1 - t0, 3), 'sec'
\r
1709 def reporthook(blocknum, blocksize, totalsize):
\r
1710 # Report during remote transfers
\r
1711 print "Block number: %d, Block size: %d, Total size: %d" % (
\r
1712 blocknum, blocksize, totalsize)
\r
1715 def test(args=[]):
\r
1719 'file:/etc/passwd',
\r
1720 'file://localhost/etc/passwd',
\r
1721 'ftp://ftp.gnu.org/pub/README',
\r
1722 'http://www.python.org/index.html',
\r
1724 if hasattr(URLopener, "open_https"):
\r
1725 args.append('https://synergy.as.cmu.edu/~geek/')
\r
1728 print '-'*10, url, '-'*10
\r
1729 fn, h = urlretrieve(url, None, reporthook)
\r
1733 for k in h.keys(): print k + ':', h[k]
\r
1735 fp = open(fn, 'rb')
\r
1739 table = string.maketrans("", "")
\r
1740 data = data.translate(table, "\r")
\r
1742 fn, h = None, None
\r
1748 import getopt, sys
\r
1750 opts, args = getopt.getopt(sys.argv[1:], "th")
\r
1751 except getopt.error, msg:
\r
1753 print "Use -h for help"
\r
1760 print "Usage: python urllib.py [-t] [url ...]"
\r
1761 print "-t runs self-test;",
\r
1762 print "otherwise, contents of urls are printed"
\r
1770 print "Use -h for help"
\r
1772 print urlopen(url).read(),
\r
1774 # Run test program when run as a script
\r
1775 if __name__ == '__main__':
\r