1 """PyPI and direct package downloading."""
19 from functools
import wraps
22 from pkg_resources
import (
38 from distutils
import log
39 from distutils
.errors
import DistutilsError
40 from fnmatch
import translate
41 from setuptools
.wheel
import Wheel
42 from setuptools
.extern
.more_itertools
import unique_everseen
45 EGG_FRAGMENT
= re
.compile(r
'^egg=([-A-Za-z0-9_.+!]+)$')
46 HREF
= re
.compile(r
"""href\s*=\s*['"]?([^'"> ]+)""", re
.I
)
47 PYPI_MD5
= re
.compile(
48 r
'<a href="([^"#]+)">([^<]+)</a>\n\s+\(<a (?:title="MD5 hash"\n\s+)'
49 r
'href="[^?]+\?:action=show_md5&digest=([0-9a-f]{32})">md5</a>\)'
51 URL_SCHEME
= re
.compile('([-+.a-z0-9]{2,}):', re
.I
).match
52 EXTENSIONS
= ".tar.gz .tar.bz2 .tar .zip .tgz".split()
57 'parse_bdist_wininst',
58 'interpret_distro_name',
63 _tmpl
= "setuptools/{setuptools.__version__} Python-urllib/{py_major}"
64 user_agent
= _tmpl
.format(
65 py_major
='{}.{}'.format(*sys
.version_info
), setuptools
=setuptools
69 def parse_requirement_arg(spec
):
71 return Requirement
.parse(spec
)
72 except ValueError as e
:
74 "Not a URL, existing file, or requirement spec: %r" % (spec
,)
78 def parse_bdist_wininst(name
):
79 """Return (base,pyversion) or (None,None) for possible .exe name"""
82 base
, py_ver
, plat
= None, None, None
84 if lower
.endswith('.exe'):
85 if lower
.endswith('.win32.exe'):
88 elif lower
.startswith('.win32-py', -16):
92 elif lower
.endswith('.win-amd64.exe'):
95 elif lower
.startswith('.win-amd64-py', -20):
99 return base
, py_ver
, plat
102 def egg_info_for_url(url
):
103 parts
= urllib
.parse
.urlparse(url
)
104 scheme
, server
, path
, parameters
, query
, fragment
= parts
105 base
= urllib
.parse
.unquote(path
.split('/')[-1])
106 if server
== 'sourceforge.net' and base
== 'download': # XXX Yuck
107 base
= urllib
.parse
.unquote(path
.split('/')[-2])
109 base
, fragment
= base
.split('#', 1)
110 return base
, fragment
113 def distros_for_url(url
, metadata
=None):
114 """Yield egg or source distribution objects that might be found at a URL"""
115 base
, fragment
= egg_info_for_url(url
)
116 for dist
in distros_for_location(url
, base
, metadata
):
119 match
= EGG_FRAGMENT
.match(fragment
)
121 for dist
in interpret_distro_name(
122 url
, match
.group(1), metadata
, precedence
=CHECKOUT_DIST
127 def distros_for_location(location
, basename
, metadata
=None):
128 """Yield egg or source distribution objects based on basename"""
129 if basename
.endswith('.egg.zip'):
130 basename
= basename
[:-4] # strip the .zip
131 if basename
.endswith('.egg') and '-' in basename
:
132 # only one, unambiguous interpretation
133 return [Distribution
.from_location(location
, basename
, metadata
)]
134 if basename
.endswith('.whl') and '-' in basename
:
135 wheel
= Wheel(basename
)
136 if not wheel
.is_compatible():
141 project_name
=wheel
.project_name
,
142 version
=wheel
.version
,
143 # Increase priority over eggs.
144 precedence
=EGG_DIST
+ 1,
147 if basename
.endswith('.exe'):
148 win_base
, py_ver
, platform
= parse_bdist_wininst(basename
)
149 if win_base
is not None:
150 return interpret_distro_name(
151 location
, win_base
, metadata
, py_ver
, BINARY_DIST
, platform
153 # Try source distro extensions (.zip, .tgz, etc.)
155 for ext
in EXTENSIONS
:
156 if basename
.endswith(ext
):
157 basename
= basename
[: -len(ext
)]
158 return interpret_distro_name(location
, basename
, metadata
)
159 return [] # no extension matched
162 def distros_for_filename(filename
, metadata
=None):
163 """Yield possible egg or source distribution objects based on a filename"""
164 return distros_for_location(
165 normalize_path(filename
), os
.path
.basename(filename
), metadata
169 def interpret_distro_name(
170 location
, basename
, metadata
, py_version
=None, precedence
=SOURCE_DIST
, platform
=None
172 """Generate the interpretation of a source distro name
174 Note: if `location` is a filesystem filename, you should call
175 ``pkg_resources.normalize_path()`` on it before passing it to this
179 parts
= basename
.split('-')
180 if not py_version
and any(re
.match(r
'py\d\.\d$', p
) for p
in parts
[2:]):
181 # it is a bdist_dumb, not an sdist -- bail out
184 # find the pivot (p) that splits the name from the version.
185 # infer the version as the first item that has a digit.
186 for p
in range(len(parts
)):
187 if parts
[p
][:1].isdigit():
197 py_version
=py_version
,
198 precedence
=precedence
,
203 def unique_values(func
):
205 Wrap a function returning an iterable such that the resulting iterable
206 only ever yields unique items.
210 def wrapper(*args
, **kwargs
):
211 return unique_everseen(func(*args
, **kwargs
))
216 REL
= re
.compile(r
"""<([^>]*\srel\s{0,10}=\s{0,10}['"]?([^'" >]+)[^>]*)>""", re
.I
)
218 Regex for an HTML tag with 'rel="val"' attributes.
223 def find_external_links(url
, page
):
224 """Find rel="homepage" and rel="download" links in `page`, yielding URLs"""
226 for match
in REL
.finditer(page
):
227 tag
, rel
= match
.groups()
228 rels
= set(map(str.strip
, rel
.lower().split(',')))
229 if 'homepage' in rels
or 'download' in rels
:
230 for match
in HREF
.finditer(tag
):
231 yield urllib
.parse
.urljoin(url
, htmldecode(match
.group(1)))
233 for tag
in ("<th>Home Page", "<th>Download URL"):
236 match
= HREF
.search(page
, pos
)
238 yield urllib
.parse
.urljoin(url
, htmldecode(match
.group(1)))
241 class ContentChecker
:
243 A null content checker that defines the interface for checking content
246 def feed(self
, block
):
248 Feed a block of data to the hash.
254 Check the hash. Return False if validation fails.
258 def report(self
, reporter
, template
):
260 Call reporter with information about the checker (hash name)
261 substituted into the template.
266 class HashChecker(ContentChecker
):
267 pattern
= re
.compile(
268 r
'(?P<hash_name>sha1|sha224|sha384|sha256|sha512|md5)='
269 r
'(?P<expected>[a-f0-9]+)'
272 def __init__(self
, hash_name
, expected
):
273 self
.hash_name
= hash_name
274 self
.hash = hashlib
.new(hash_name
)
275 self
.expected
= expected
278 def from_url(cls
, url
):
279 "Construct a (possibly null) ContentChecker from a URL"
280 fragment
= urllib
.parse
.urlparse(url
)[-1]
282 return ContentChecker()
283 match
= cls
.pattern
.search(fragment
)
285 return ContentChecker()
286 return cls(**match
.groupdict())
288 def feed(self
, block
):
289 self
.hash.update(block
)
292 return self
.hash.hexdigest() == self
.expected
294 def report(self
, reporter
, template
):
295 msg
= template
% self
.hash_name
299 class PackageIndex(Environment
):
300 """A distribution index that scans web pages for download URLs"""
304 index_url
="https://pypi.org/simple/",
311 super().__init
__(*args
, **kw
)
312 self
.index_url
= index_url
+ "/"[: not index_url
.endswith('/')]
313 self
.scanned_urls
= {}
314 self
.fetched_urls
= {}
315 self
.package_pages
= {}
316 self
.allows
= re
.compile('|'.join(map(translate
, hosts
))).match
318 self
.opener
= urllib
.request
.urlopen
321 # ignore invalid versions
323 parse_version(dist
.version
)
326 return super().add(dist
)
328 # FIXME: 'PackageIndex.process_url' is too complex (14)
329 def process_url(self
, url
, retrieve
=False): # noqa: C901
330 """Evaluate a URL as a possible download, and maybe retrieve it"""
331 if url
in self
.scanned_urls
and not retrieve
:
333 self
.scanned_urls
[url
] = True
334 if not URL_SCHEME(url
):
335 self
.process_filename(url
)
338 dists
= list(distros_for_url(url
))
340 if not self
.url_ok(url
):
342 self
.debug("Found link: %s", url
)
344 if dists
or not retrieve
or url
in self
.fetched_urls
:
345 list(map(self
.add
, dists
))
346 return # don't need the actual page
348 if not self
.url_ok(url
):
349 self
.fetched_urls
[url
] = True
352 self
.info("Reading %s", url
)
353 self
.fetched_urls
[url
] = True # prevent multiple fetch attempts
354 tmpl
= "Download error on %s: %%s -- Some packages may not be found!"
355 f
= self
.open_url(url
, tmpl
% url
)
358 if isinstance(f
, urllib
.error
.HTTPError
) and f
.code
== 401:
359 self
.info("Authentication error: %s" % f
.msg
)
360 self
.fetched_urls
[f
.url
] = True
361 if 'html' not in f
.headers
.get('content-type', '').lower():
362 f
.close() # not html, we can't process it
365 base
= f
.url
# handle redirects
367 if not isinstance(page
, str):
368 # In Python 3 and got bytes but want str.
369 if isinstance(f
, urllib
.error
.HTTPError
):
370 # Errors have no charset, assume latin1:
373 charset
= f
.headers
.get_param('charset') or 'latin-1'
374 page
= page
.decode(charset
, "ignore")
376 for match
in HREF
.finditer(page
):
377 link
= urllib
.parse
.urljoin(base
, htmldecode(match
.group(1)))
378 self
.process_url(link
)
379 if url
.startswith(self
.index_url
) and getattr(f
, 'code', None) != 404:
380 page
= self
.process_index(url
, page
)
382 def process_filename(self
, fn
, nested
=False):
383 # process filenames or directories
384 if not os
.path
.exists(fn
):
385 self
.warn("Not found: %s", fn
)
388 if os
.path
.isdir(fn
) and not nested
:
389 path
= os
.path
.realpath(fn
)
390 for item
in os
.listdir(path
):
391 self
.process_filename(os
.path
.join(path
, item
), True)
393 dists
= distros_for_filename(fn
)
395 self
.debug("Found: %s", fn
)
396 list(map(self
.add
, dists
))
398 def url_ok(self
, url
, fatal
=False):
400 is_file
= s
and s
.group(1).lower() == 'file'
401 if is_file
or self
.allows(urllib
.parse
.urlparse(url
)[1]):
404 "\nNote: Bypassing %s (disallowed host; see "
405 "http://bit.ly/2hrImnY for details).\n"
408 raise DistutilsError(msg
% url
)
412 def scan_egg_links(self
, search_path
):
413 dirs
= filter(os
.path
.isdir
, search_path
)
417 for entry
in os
.listdir(path
)
418 if entry
.endswith('.egg-link')
420 list(itertools
.starmap(self
.scan_egg_link
, egg_links
))
422 def scan_egg_link(self
, path
, entry
):
423 with
open(os
.path
.join(path
, entry
)) as raw_lines
:
424 # filter non-empty lines
425 lines
= list(filter(None, map(str.strip
, raw_lines
)))
428 # format is not recognized; punt
431 egg_path
, setup_path
= lines
433 for dist
in find_distributions(os
.path
.join(path
, egg_path
)):
434 dist
.location
= os
.path
.join(path
, *lines
)
435 dist
.precedence
= SOURCE_DIST
438 def _scan(self
, link
):
439 # Process a URL to see if it's for a package page
440 NO_MATCH_SENTINEL
= None, None
441 if not link
.startswith(self
.index_url
):
442 return NO_MATCH_SENTINEL
444 parts
= list(map(urllib
.parse
.unquote
, link
[len(self
.index_url
) :].split('/')))
445 if len(parts
) != 2 or '#' in parts
[1]:
446 return NO_MATCH_SENTINEL
448 # it's a package page, sanitize and index it
449 pkg
= safe_name(parts
[0])
450 ver
= safe_version(parts
[1])
451 self
.package_pages
.setdefault(pkg
.lower(), {})[link
] = True
452 return to_filename(pkg
), to_filename(ver
)
454 def process_index(self
, url
, page
):
455 """Process the contents of a PyPI page"""
457 # process an index page into the package-page index
458 for match
in HREF
.finditer(page
):
460 self
._scan
(urllib
.parse
.urljoin(url
, htmldecode(match
.group(1))))
464 pkg
, ver
= self
._scan
(url
) # ensure this page is in the page index
466 return "" # no sense double-scanning non-package pages
468 # process individual package page
469 for new_url
in find_external_links(url
, page
):
470 # Process the found URL
471 base
, frag
= egg_info_for_url(new_url
)
472 if base
.endswith('.py') and not frag
:
474 new_url
+= '#egg=%s-%s' % (pkg
, ver
)
476 self
.need_version_info(url
)
477 self
.scan_url(new_url
)
480 lambda m
: '<a href="%s#md5=%s">%s</a>' % m
.group(1, 3, 2), page
483 def need_version_info(self
, url
):
485 "Page at %s links to .py file(s) without version info; an index "
490 def scan_all(self
, msg
=None, *args
):
491 if self
.index_url
not in self
.fetched_urls
:
493 self
.warn(msg
, *args
)
494 self
.info("Scanning index of all packages (this may take a while)")
495 self
.scan_url(self
.index_url
)
497 def find_packages(self
, requirement
):
498 self
.scan_url(self
.index_url
+ requirement
.unsafe_name
+ '/')
500 if not self
.package_pages
.get(requirement
.key
):
501 # Fall back to safe version of the name
502 self
.scan_url(self
.index_url
+ requirement
.project_name
+ '/')
504 if not self
.package_pages
.get(requirement
.key
):
505 # We couldn't find the target package, so search the index page too
506 self
.not_found_in_index(requirement
)
508 for url
in list(self
.package_pages
.get(requirement
.key
, ())):
509 # scan each page that might be related to the desired package
512 def obtain(self
, requirement
, installer
=None):
514 self
.find_packages(requirement
)
515 for dist
in self
[requirement
.key
]:
516 if dist
in requirement
:
518 self
.debug("%s does not match %s", requirement
, dist
)
519 return super(PackageIndex
, self
).obtain(requirement
, installer
)
521 def check_hash(self
, checker
, filename
, tfp
):
523 checker is a ContentChecker
525 checker
.report(self
.debug
, "Validating %%s checksum for %s" % filename
)
526 if not checker
.is_valid():
529 raise DistutilsError(
530 "%s validation failed for %s; "
531 "possible download problem?"
532 % (checker
.hash.name
, os
.path
.basename(filename
))
535 def add_find_links(self
, urls
):
536 """Add `urls` to the list that will be prescanned for searches"""
539 self
.to_scan
is None # if we have already "gone online"
540 or not URL_SCHEME(url
) # or it's a local file/directory
541 or url
.startswith('file:')
542 or list(distros_for_url(url
)) # or a direct package link
544 # then go ahead and process it now
547 # otherwise, defer retrieval till later
548 self
.to_scan
.append(url
)
551 """Scan urls scheduled for prescanning (e.g. --find-links)"""
553 list(map(self
.scan_url
, self
.to_scan
))
554 self
.to_scan
= None # from now on, go ahead and process immediately
556 def not_found_in_index(self
, requirement
):
557 if self
[requirement
.key
]: # we've seen at least one distro
558 meth
, msg
= self
.info
, "Couldn't retrieve index page for %r"
559 else: # no distros seen for this name, might be misspelled
562 "Couldn't find index page for %r (maybe misspelled?)",
564 meth(msg
, requirement
.unsafe_name
)
567 def download(self
, spec
, tmpdir
):
568 """Locate and/or download `spec` to `tmpdir`, returning a local path
570 `spec` may be a ``Requirement`` object, or a string containing a URL,
571 an existing local filename, or a project/version requirement spec
572 (i.e. the string form of a ``Requirement`` object). If it is the URL
573 of a .py file with an unambiguous ``#egg=name-version`` tag (i.e., one
574 that escapes ``-`` as ``_`` throughout), a trivial ``setup.py`` is
575 automatically created alongside the downloaded file.
577 If `spec` is a ``Requirement`` object or a string containing a
578 project/version requirement spec, this method returns the location of
579 a matching distribution (possibly after downloading it to `tmpdir`).
580 If `spec` is a locally existing file or directory name, it is simply
581 returned unchanged. If `spec` is a URL, it is downloaded to a subpath
582 of `tmpdir`, and the local filename is returned. Various errors may be
583 raised if a problem occurs during downloading.
585 if not isinstance(spec
, Requirement
):
586 scheme
= URL_SCHEME(spec
)
588 # It's a url, download it to tmpdir
589 found
= self
._download
_url
(scheme
.group(1), spec
, tmpdir
)
590 base
, fragment
= egg_info_for_url(spec
)
591 if base
.endswith('.py'):
592 found
= self
.gen_setup(found
, fragment
, tmpdir
)
594 elif os
.path
.exists(spec
):
595 # Existing file or directory, just return it
598 spec
= parse_requirement_arg(spec
)
599 return getattr(self
.fetch_distribution(spec
, tmpdir
), 'location', None)
601 def fetch_distribution( # noqa: C901 # is too complex (14) # FIXME
610 """Obtain a distribution suitable for fulfilling `requirement`
612 `requirement` must be a ``pkg_resources.Requirement`` instance.
613 If necessary, or if the `force_scan` flag is set, the requirement is
614 searched for in the (online) package index as well as the locally
615 installed packages. If a distribution matching `requirement` is found,
616 the returned distribution's ``location`` is the value you would have
617 gotten from calling the ``download()`` method with the matching
618 distribution's URL or filename. If no matching distribution is found,
619 ``None`` is returned.
621 If the `source` flag is set, only source distributions and source
622 checkout links will be considered. Unless the `develop_ok` flag is
623 set, development and system eggs (i.e., those using the ``.egg-info``
624 format) will be ignored.
626 # process a Requirement
627 self
.info("Searching for %s", requirement
)
631 def find(req
, env
=None):
634 # Find a matching distribution; may be called more than once
636 for dist
in env
[req
.key
]:
638 if dist
.precedence
== DEVELOP_DIST
and not develop_ok
:
639 if dist
not in skipped
:
641 "Skipping development or system egg: %s",
647 test
= dist
in req
and (dist
.precedence
<= SOURCE_DIST
or not source
)
649 loc
= self
.download(dist
.location
, tmpdir
)
650 dist
.download_location
= loc
651 if os
.path
.exists(dist
.download_location
):
656 self
.find_packages(requirement
)
657 dist
= find(requirement
)
659 if not dist
and local_index
is not None:
660 dist
= find(requirement
, local_index
)
663 if self
.to_scan
is not None:
665 dist
= find(requirement
)
667 if dist
is None and not force_scan
:
668 self
.find_packages(requirement
)
669 dist
= find(requirement
)
673 "No local packages or working download links found for %s%s",
674 (source
and "a source distribution of " or ""),
678 self
.info("Best match: %s", dist
)
679 return dist
.clone(location
=dist
.download_location
)
681 def fetch(self
, requirement
, tmpdir
, force_scan
=False, source
=False):
682 """Obtain a file suitable for fulfilling `requirement`
684 DEPRECATED; use the ``fetch_distribution()`` method now instead. For
685 backward compatibility, this routine is identical but returns the
686 ``location`` of the downloaded distribution instead of a distribution
689 dist
= self
.fetch_distribution(requirement
, tmpdir
, force_scan
, source
)
694 def gen_setup(self
, filename
, fragment
, tmpdir
):
695 match
= EGG_FRAGMENT
.match(fragment
)
700 for d
in interpret_distro_name(filename
, match
.group(1), None)
706 if len(dists
) == 1: # unambiguous ``#egg`` fragment
707 basename
= os
.path
.basename(filename
)
709 # Make sure the file has been downloaded to the temp dir.
710 if os
.path
.dirname(filename
) != tmpdir
:
711 dst
= os
.path
.join(tmpdir
, basename
)
712 if not (os
.path
.exists(dst
) and os
.path
.samefile(filename
, dst
)):
713 shutil
.copy2(filename
, dst
)
716 with
open(os
.path
.join(tmpdir
, 'setup.py'), 'w') as file:
718 "from setuptools import setup\n"
719 "setup(name=%r, version=%r, py_modules=[%r])\n"
721 dists
[0].project_name
,
723 os
.path
.splitext(basename
)[0],
729 raise DistutilsError(
730 "Can't unambiguously interpret project/version identifier %r; "
731 "any dashes in the name or version should be escaped using "
732 "underscores. %r" % (fragment
, dists
)
735 raise DistutilsError(
736 "Can't process plain .py files without an '#egg=name-version'"
737 " suffix to enable automatic setup script generation."
742 def _download_to(self
, url
, filename
):
743 self
.info("Downloading %s", url
)
747 checker
= HashChecker
.from_url(url
)
748 fp
= self
.open_url(url
)
749 if isinstance(fp
, urllib
.error
.HTTPError
):
750 raise DistutilsError(
751 "Can't download %s: %s %s" % (url
, fp
.code
, fp
.msg
)
755 bs
= self
.dl_blocksize
757 if "content-length" in headers
:
758 # Some servers return multiple Content-Length headers :(
759 sizes
= headers
.get_all('Content-Length')
760 size
= max(map(int, sizes
))
761 self
.reporthook(url
, filename
, blocknum
, bs
, size
)
762 with
open(filename
, 'wb') as tfp
:
769 self
.reporthook(url
, filename
, blocknum
, bs
, size
)
772 self
.check_hash(checker
, filename
, tfp
)
778 def reporthook(self
, url
, filename
, blocknum
, blksize
, size
):
782 def open_url(self
, url
, warning
=None): # noqa: C901 # is too complex (12)
783 if url
.startswith('file:'):
784 return local_open(url
)
786 return open_with_auth(url
, self
.opener
)
787 except (ValueError, http
.client
.InvalidURL
) as v
:
788 msg
= ' '.join([str(arg
) for arg
in v
.args
])
790 self
.warn(warning
, msg
)
792 raise DistutilsError('%s %s' % (url
, msg
)) from v
793 except urllib
.error
.HTTPError
as v
:
795 except urllib
.error
.URLError
as v
:
797 self
.warn(warning
, v
.reason
)
799 raise DistutilsError(
800 "Download error for %s: %s" % (url
, v
.reason
)
802 except http
.client
.BadStatusLine
as v
:
804 self
.warn(warning
, v
.line
)
806 raise DistutilsError(
807 '%s returned a bad status line. The server might be '
808 'down, %s' % (url
, v
.line
)
810 except (http
.client
.HTTPException
, socket
.error
) as v
:
812 self
.warn(warning
, v
)
814 raise DistutilsError("Download error for %s: %s" % (url
, v
)) from v
816 def _download_url(self
, scheme
, url
, tmpdir
):
817 # Determine download filename
819 name
, fragment
= egg_info_for_url(url
)
822 name
= name
.replace('..', '.').replace('\\', '_')
824 name
= "__downloaded__" # default if URL has no path contents
826 if name
.endswith('.egg.zip'):
827 name
= name
[:-4] # strip the extra .zip before download
829 filename
= os
.path
.join(tmpdir
, name
)
833 if scheme
== 'svn' or scheme
.startswith('svn+'):
834 return self
._download
_svn
(url
, filename
)
835 elif scheme
== 'git' or scheme
.startswith('git+'):
836 return self
._download
_git
(url
, filename
)
837 elif scheme
.startswith('hg+'):
838 return self
._download
_hg
(url
, filename
)
839 elif scheme
== 'file':
840 return urllib
.request
.url2pathname(urllib
.parse
.urlparse(url
)[2])
842 self
.url_ok(url
, True) # raises error if not allowed
843 return self
._attempt
_download
(url
, filename
)
845 def scan_url(self
, url
):
846 self
.process_url(url
, True)
848 def _attempt_download(self
, url
, filename
):
849 headers
= self
._download
_to
(url
, filename
)
850 if 'html' in headers
.get('content-type', '').lower():
851 return self
._download
_html
(url
, headers
, filename
)
855 def _download_html(self
, url
, headers
, filename
):
856 file = open(filename
)
859 # Check for a subversion index page
860 if re
.search(r
'<title>([^- ]+ - )?Revision \d+:', line
):
861 # it's a subversion index page:
864 return self
._download
_svn
(url
, filename
)
865 break # not an index page
868 raise DistutilsError("Unexpected HTML page found at " + url
)
870 def _download_svn(self
, url
, filename
):
871 warnings
.warn("SVN download support is deprecated", UserWarning)
872 url
= url
.split('#', 1)[0] # remove any fragment for svn's sake
874 if url
.lower().startswith('svn:') and '@' in url
:
875 scheme
, netloc
, path
, p
, q
, f
= urllib
.parse
.urlparse(url
)
876 if not netloc
and path
.startswith('//') and '/' in path
[2:]:
877 netloc
, path
= path
[2:].split('/', 1)
878 auth
, host
= _splituser(netloc
)
881 user
, pw
= auth
.split(':', 1)
882 creds
= " --username=%s --password=%s" % (user
, pw
)
884 creds
= " --username=" + auth
886 parts
= scheme
, netloc
, url
, p
, q
, f
887 url
= urllib
.parse
.urlunparse(parts
)
888 self
.info("Doing subversion checkout from %s to %s", url
, filename
)
889 os
.system("svn checkout%s -q %s %s" % (creds
, url
, filename
))
893 def _vcs_split_rev_from_url(url
, pop_prefix
=False):
894 scheme
, netloc
, path
, query
, frag
= urllib
.parse
.urlsplit(url
)
896 scheme
= scheme
.split('+', 1)[-1]
898 # Some fragment identification fails
899 path
= path
.split('#', 1)[0]
903 path
, rev
= path
.rsplit('@', 1)
905 # Also, discard fragment
906 url
= urllib
.parse
.urlunsplit((scheme
, netloc
, path
, query
, ''))
910 def _download_git(self
, url
, filename
):
911 filename
= filename
.split('#', 1)[0]
912 url
, rev
= self
._vcs
_split
_rev
_from
_url
(url
, pop_prefix
=True)
914 self
.info("Doing git clone from %s to %s", url
, filename
)
915 os
.system("git clone --quiet %s %s" % (url
, filename
))
918 self
.info("Checking out %s", rev
)
920 "git -C %s checkout --quiet %s"
929 def _download_hg(self
, url
, filename
):
930 filename
= filename
.split('#', 1)[0]
931 url
, rev
= self
._vcs
_split
_rev
_from
_url
(url
, pop_prefix
=True)
933 self
.info("Doing hg clone from %s to %s", url
, filename
)
934 os
.system("hg clone --quiet %s %s" % (url
, filename
))
937 self
.info("Updating to %s", rev
)
939 "hg --cwd %s up -C -r %s -q"
948 def debug(self
, msg
, *args
):
949 log
.debug(msg
, *args
)
951 def info(self
, msg
, *args
):
954 def warn(self
, msg
, *args
):
958 # This pattern matches a character entity reference (a decimal numeric
959 # references, a hexadecimal numeric reference, or a named reference).
960 entity_sub
= re
.compile(r
'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
963 def decode_entity(match
):
964 what
= match
.group(0)
965 return html
.unescape(what
)
968 def htmldecode(text
):
970 Decode HTML entities in the given text.
973 ... 'https://../package_name-0.1.2.tar.gz'
974 ... '?tokena=A&tokenb=B">package_name-0.1.2.tar.gz')
975 'https://../package_name-0.1.2.tar.gz?tokena=A&tokenb=B">package_name-0.1.2.tar.gz'
977 return entity_sub(decode_entity
, text
)
980 def socket_timeout(timeout
=15):
981 def _socket_timeout(func
):
982 def _socket_timeout(*args
, **kwargs
):
983 old_timeout
= socket
.getdefaulttimeout()
984 socket
.setdefaulttimeout(timeout
)
986 return func(*args
, **kwargs
)
988 socket
.setdefaulttimeout(old_timeout
)
990 return _socket_timeout
992 return _socket_timeout
995 def _encode_auth(auth
):
997 Encode auth from a URL suitable for an HTTP header.
998 >>> str(_encode_auth('username%3Apassword'))
999 'dXNlcm5hbWU6cGFzc3dvcmQ='
1001 Long auth strings should not cause a newline to be inserted.
1002 >>> long_auth = 'username:' + 'password'*10
1003 >>> chr(10) in str(_encode_auth(long_auth))
1006 auth_s
= urllib
.parse
.unquote(auth
)
1008 auth_bytes
= auth_s
.encode()
1009 encoded_bytes
= base64
.b64encode(auth_bytes
)
1010 # convert back to a string
1011 encoded
= encoded_bytes
.decode()
1012 # strip the trailing carriage return
1013 return encoded
.replace('\n', '')
1018 A username/password pair. Use like a namedtuple.
1021 def __init__(self
, username
, password
):
1022 self
.username
= username
1023 self
.password
= password
1030 return '%(username)s:%(password)s' % vars(self
)
1033 class PyPIConfig(configparser
.RawConfigParser
):
1038 defaults
= dict.fromkeys(['username', 'password', 'repository'], '')
1039 super().__init
__(defaults
)
1041 rc
= os
.path
.join(os
.path
.expanduser('~'), '.pypirc')
1042 if os
.path
.exists(rc
):
1046 def creds_by_repository(self
):
1047 sections_with_repositories
= [
1049 for section
in self
.sections()
1050 if self
.get(section
, 'repository').strip()
1053 return dict(map(self
._get
_repo
_cred
, sections_with_repositories
))
1055 def _get_repo_cred(self
, section
):
1056 repo
= self
.get(section
, 'repository').strip()
1057 return repo
, Credential(
1058 self
.get(section
, 'username').strip(),
1059 self
.get(section
, 'password').strip(),
1062 def find_credential(self
, url
):
1064 If the URL indicated appears to be a repository defined in this
1065 config, return the credential for that repository.
1067 for repository
, cred
in self
.creds_by_repository
.items():
1068 if url
.startswith(repository
):
1072 def open_with_auth(url
, opener
=urllib
.request
.urlopen
):
1073 """Open a urllib2 request, handling HTTP authentication"""
1075 parsed
= urllib
.parse
.urlparse(url
)
1076 scheme
, netloc
, path
, params
, query
, frag
= parsed
1078 # Double scheme does not raise on macOS as revealed by a
1079 # failing test. We would expect "nonnumeric port". Refs #20.
1080 if netloc
.endswith(':'):
1081 raise http
.client
.InvalidURL("nonnumeric port: ''")
1083 if scheme
in ('http', 'https'):
1084 auth
, address
= _splituser(netloc
)
1089 cred
= PyPIConfig().find_credential(url
)
1092 info
= cred
.username
, url
1093 log
.info('Authenticating as %s for %s (from .pypirc)', *info
)
1096 auth
= "Basic " + _encode_auth(auth
)
1097 parts
= scheme
, address
, path
, params
, query
, frag
1098 new_url
= urllib
.parse
.urlunparse(parts
)
1099 request
= urllib
.request
.Request(new_url
)
1100 request
.add_header("Authorization", auth
)
1102 request
= urllib
.request
.Request(url
)
1104 request
.add_header('User-Agent', user_agent
)
1105 fp
= opener(request
)
1108 # Put authentication info back into request URL if same host,
1109 # so that links found on the page will work
1110 s2
, h2
, path2
, param2
, query2
, frag2
= urllib
.parse
.urlparse(fp
.url
)
1111 if s2
== scheme
and h2
== address
:
1112 parts
= s2
, netloc
, path2
, param2
, query2
, frag2
1113 fp
.url
= urllib
.parse
.urlunparse(parts
)
1118 # copy of urllib.parse._splituser from Python 3.8
1119 def _splituser(host
):
1120 """splituser('user[:passwd]@host[:port]')
1121 --> 'user[:passwd]', 'host[:port]'."""
1122 user
, delim
, host
= host
.rpartition('@')
1123 return (user
if delim
else None), host
1126 # adding a timeout to avoid freezing package_index
1127 open_with_auth
= socket_timeout(_SOCKET_TIMEOUT
)(open_with_auth
)
1130 def fix_sf_url(url
):
1131 return url
# backward compatibility
1134 def local_open(url
):
1135 """Read a local path, with special support for directories"""
1136 scheme
, server
, path
, param
, query
, frag
= urllib
.parse
.urlparse(url
)
1137 filename
= urllib
.request
.url2pathname(path
)
1138 if os
.path
.isfile(filename
):
1139 return urllib
.request
.urlopen(url
)
1140 elif path
.endswith('/') and os
.path
.isdir(filename
):
1142 for f
in os
.listdir(filename
):
1143 filepath
= os
.path
.join(filename
, f
)
1144 if f
== 'index.html':
1145 with
open(filepath
, 'r') as fp
:
1148 elif os
.path
.isdir(filepath
):
1150 files
.append('<a href="{name}">{name}</a>'.format(name
=f
))
1153 "<html><head><title>{url}</title>" "</head><body>{files}</body></html>"
1155 body
= tmpl
.format(url
=url
, files
='\n'.join(files
))
1156 status
, message
= 200, "OK"
1158 status
, message
, body
= 404, "Path not found", "Not found"
1160 headers
= {'content-type': 'text/html'}
1161 body_stream
= io
.StringIO(body
)
1162 return urllib
.error
.HTTPError(url
, status
, message
, headers
, body_stream
)