]> crepu.dev Git - config.git/blob - djavu-asus/emacs/elpy/rpc-venv/lib/python3.11/site-packages/setuptools/package_index.py
Reorganización de directorios
[config.git] / djavu-asus / emacs / elpy / rpc-venv / lib / python3.11 / site-packages / setuptools / package_index.py
1 """PyPI and direct package downloading."""
2
3 import sys
4 import os
5 import re
6 import io
7 import shutil
8 import socket
9 import base64
10 import hashlib
11 import itertools
12 import warnings
13 import configparser
14 import html
15 import http.client
16 import urllib.parse
17 import urllib.request
18 import urllib.error
19 from functools import wraps
20
21 import setuptools
22 from pkg_resources import (
23 CHECKOUT_DIST,
24 Distribution,
25 BINARY_DIST,
26 normalize_path,
27 SOURCE_DIST,
28 Environment,
29 find_distributions,
30 safe_name,
31 safe_version,
32 to_filename,
33 Requirement,
34 DEVELOP_DIST,
35 EGG_DIST,
36 parse_version,
37 )
38 from distutils import log
39 from distutils.errors import DistutilsError
40 from fnmatch import translate
41 from setuptools.wheel import Wheel
42 from setuptools.extern.more_itertools import unique_everseen
43
44
45 EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.+!]+)$')
46 HREF = re.compile(r"""href\s*=\s*['"]?([^'"> ]+)""", re.I)
47 PYPI_MD5 = re.compile(
48 r'<a href="([^"#]+)">([^<]+)</a>\n\s+\(<a (?:title="MD5 hash"\n\s+)'
49 r'href="[^?]+\?:action=show_md5&amp;digest=([0-9a-f]{32})">md5</a>\)'
50 )
51 URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):', re.I).match
52 EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()
53
54 __all__ = [
55 'PackageIndex',
56 'distros_for_url',
57 'parse_bdist_wininst',
58 'interpret_distro_name',
59 ]
60
61 _SOCKET_TIMEOUT = 15
62
63 _tmpl = "setuptools/{setuptools.__version__} Python-urllib/{py_major}"
64 user_agent = _tmpl.format(
65 py_major='{}.{}'.format(*sys.version_info), setuptools=setuptools
66 )
67
68
69 def parse_requirement_arg(spec):
70 try:
71 return Requirement.parse(spec)
72 except ValueError as e:
73 raise DistutilsError(
74 "Not a URL, existing file, or requirement spec: %r" % (spec,)
75 ) from e
76
77
78 def parse_bdist_wininst(name):
79 """Return (base,pyversion) or (None,None) for possible .exe name"""
80
81 lower = name.lower()
82 base, py_ver, plat = None, None, None
83
84 if lower.endswith('.exe'):
85 if lower.endswith('.win32.exe'):
86 base = name[:-10]
87 plat = 'win32'
88 elif lower.startswith('.win32-py', -16):
89 py_ver = name[-7:-4]
90 base = name[:-16]
91 plat = 'win32'
92 elif lower.endswith('.win-amd64.exe'):
93 base = name[:-14]
94 plat = 'win-amd64'
95 elif lower.startswith('.win-amd64-py', -20):
96 py_ver = name[-7:-4]
97 base = name[:-20]
98 plat = 'win-amd64'
99 return base, py_ver, plat
100
101
102 def egg_info_for_url(url):
103 parts = urllib.parse.urlparse(url)
104 scheme, server, path, parameters, query, fragment = parts
105 base = urllib.parse.unquote(path.split('/')[-1])
106 if server == 'sourceforge.net' and base == 'download': # XXX Yuck
107 base = urllib.parse.unquote(path.split('/')[-2])
108 if '#' in base:
109 base, fragment = base.split('#', 1)
110 return base, fragment
111
112
113 def distros_for_url(url, metadata=None):
114 """Yield egg or source distribution objects that might be found at a URL"""
115 base, fragment = egg_info_for_url(url)
116 for dist in distros_for_location(url, base, metadata):
117 yield dist
118 if fragment:
119 match = EGG_FRAGMENT.match(fragment)
120 if match:
121 for dist in interpret_distro_name(
122 url, match.group(1), metadata, precedence=CHECKOUT_DIST
123 ):
124 yield dist
125
126
127 def distros_for_location(location, basename, metadata=None):
128 """Yield egg or source distribution objects based on basename"""
129 if basename.endswith('.egg.zip'):
130 basename = basename[:-4] # strip the .zip
131 if basename.endswith('.egg') and '-' in basename:
132 # only one, unambiguous interpretation
133 return [Distribution.from_location(location, basename, metadata)]
134 if basename.endswith('.whl') and '-' in basename:
135 wheel = Wheel(basename)
136 if not wheel.is_compatible():
137 return []
138 return [
139 Distribution(
140 location=location,
141 project_name=wheel.project_name,
142 version=wheel.version,
143 # Increase priority over eggs.
144 precedence=EGG_DIST + 1,
145 )
146 ]
147 if basename.endswith('.exe'):
148 win_base, py_ver, platform = parse_bdist_wininst(basename)
149 if win_base is not None:
150 return interpret_distro_name(
151 location, win_base, metadata, py_ver, BINARY_DIST, platform
152 )
153 # Try source distro extensions (.zip, .tgz, etc.)
154 #
155 for ext in EXTENSIONS:
156 if basename.endswith(ext):
157 basename = basename[: -len(ext)]
158 return interpret_distro_name(location, basename, metadata)
159 return [] # no extension matched
160
161
162 def distros_for_filename(filename, metadata=None):
163 """Yield possible egg or source distribution objects based on a filename"""
164 return distros_for_location(
165 normalize_path(filename), os.path.basename(filename), metadata
166 )
167
168
169 def interpret_distro_name(
170 location, basename, metadata, py_version=None, precedence=SOURCE_DIST, platform=None
171 ):
172 """Generate the interpretation of a source distro name
173
174 Note: if `location` is a filesystem filename, you should call
175 ``pkg_resources.normalize_path()`` on it before passing it to this
176 routine!
177 """
178
179 parts = basename.split('-')
180 if not py_version and any(re.match(r'py\d\.\d$', p) for p in parts[2:]):
181 # it is a bdist_dumb, not an sdist -- bail out
182 return
183
184 # find the pivot (p) that splits the name from the version.
185 # infer the version as the first item that has a digit.
186 for p in range(len(parts)):
187 if parts[p][:1].isdigit():
188 break
189 else:
190 p = len(parts)
191
192 yield Distribution(
193 location,
194 metadata,
195 '-'.join(parts[:p]),
196 '-'.join(parts[p:]),
197 py_version=py_version,
198 precedence=precedence,
199 platform=platform
200 )
201
202
203 def unique_values(func):
204 """
205 Wrap a function returning an iterable such that the resulting iterable
206 only ever yields unique items.
207 """
208
209 @wraps(func)
210 def wrapper(*args, **kwargs):
211 return unique_everseen(func(*args, **kwargs))
212
213 return wrapper
214
215
216 REL = re.compile(r"""<([^>]*\srel\s{0,10}=\s{0,10}['"]?([^'" >]+)[^>]*)>""", re.I)
217 """
218 Regex for an HTML tag with 'rel="val"' attributes.
219 """
220
221
222 @unique_values
223 def find_external_links(url, page):
224 """Find rel="homepage" and rel="download" links in `page`, yielding URLs"""
225
226 for match in REL.finditer(page):
227 tag, rel = match.groups()
228 rels = set(map(str.strip, rel.lower().split(',')))
229 if 'homepage' in rels or 'download' in rels:
230 for match in HREF.finditer(tag):
231 yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
232
233 for tag in ("<th>Home Page", "<th>Download URL"):
234 pos = page.find(tag)
235 if pos != -1:
236 match = HREF.search(page, pos)
237 if match:
238 yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
239
240
241 class ContentChecker:
242 """
243 A null content checker that defines the interface for checking content
244 """
245
246 def feed(self, block):
247 """
248 Feed a block of data to the hash.
249 """
250 return
251
252 def is_valid(self):
253 """
254 Check the hash. Return False if validation fails.
255 """
256 return True
257
258 def report(self, reporter, template):
259 """
260 Call reporter with information about the checker (hash name)
261 substituted into the template.
262 """
263 return
264
265
266 class HashChecker(ContentChecker):
267 pattern = re.compile(
268 r'(?P<hash_name>sha1|sha224|sha384|sha256|sha512|md5)='
269 r'(?P<expected>[a-f0-9]+)'
270 )
271
272 def __init__(self, hash_name, expected):
273 self.hash_name = hash_name
274 self.hash = hashlib.new(hash_name)
275 self.expected = expected
276
277 @classmethod
278 def from_url(cls, url):
279 "Construct a (possibly null) ContentChecker from a URL"
280 fragment = urllib.parse.urlparse(url)[-1]
281 if not fragment:
282 return ContentChecker()
283 match = cls.pattern.search(fragment)
284 if not match:
285 return ContentChecker()
286 return cls(**match.groupdict())
287
288 def feed(self, block):
289 self.hash.update(block)
290
291 def is_valid(self):
292 return self.hash.hexdigest() == self.expected
293
294 def report(self, reporter, template):
295 msg = template % self.hash_name
296 return reporter(msg)
297
298
299 class PackageIndex(Environment):
300 """A distribution index that scans web pages for download URLs"""
301
302 def __init__(
303 self,
304 index_url="https://pypi.org/simple/",
305 hosts=('*',),
306 ca_bundle=None,
307 verify_ssl=True,
308 *args,
309 **kw
310 ):
311 super().__init__(*args, **kw)
312 self.index_url = index_url + "/"[: not index_url.endswith('/')]
313 self.scanned_urls = {}
314 self.fetched_urls = {}
315 self.package_pages = {}
316 self.allows = re.compile('|'.join(map(translate, hosts))).match
317 self.to_scan = []
318 self.opener = urllib.request.urlopen
319
320 def add(self, dist):
321 # ignore invalid versions
322 try:
323 parse_version(dist.version)
324 except Exception:
325 return
326 return super().add(dist)
327
328 # FIXME: 'PackageIndex.process_url' is too complex (14)
329 def process_url(self, url, retrieve=False): # noqa: C901
330 """Evaluate a URL as a possible download, and maybe retrieve it"""
331 if url in self.scanned_urls and not retrieve:
332 return
333 self.scanned_urls[url] = True
334 if not URL_SCHEME(url):
335 self.process_filename(url)
336 return
337 else:
338 dists = list(distros_for_url(url))
339 if dists:
340 if not self.url_ok(url):
341 return
342 self.debug("Found link: %s", url)
343
344 if dists or not retrieve or url in self.fetched_urls:
345 list(map(self.add, dists))
346 return # don't need the actual page
347
348 if not self.url_ok(url):
349 self.fetched_urls[url] = True
350 return
351
352 self.info("Reading %s", url)
353 self.fetched_urls[url] = True # prevent multiple fetch attempts
354 tmpl = "Download error on %s: %%s -- Some packages may not be found!"
355 f = self.open_url(url, tmpl % url)
356 if f is None:
357 return
358 if isinstance(f, urllib.error.HTTPError) and f.code == 401:
359 self.info("Authentication error: %s" % f.msg)
360 self.fetched_urls[f.url] = True
361 if 'html' not in f.headers.get('content-type', '').lower():
362 f.close() # not html, we can't process it
363 return
364
365 base = f.url # handle redirects
366 page = f.read()
367 if not isinstance(page, str):
368 # In Python 3 and got bytes but want str.
369 if isinstance(f, urllib.error.HTTPError):
370 # Errors have no charset, assume latin1:
371 charset = 'latin-1'
372 else:
373 charset = f.headers.get_param('charset') or 'latin-1'
374 page = page.decode(charset, "ignore")
375 f.close()
376 for match in HREF.finditer(page):
377 link = urllib.parse.urljoin(base, htmldecode(match.group(1)))
378 self.process_url(link)
379 if url.startswith(self.index_url) and getattr(f, 'code', None) != 404:
380 page = self.process_index(url, page)
381
382 def process_filename(self, fn, nested=False):
383 # process filenames or directories
384 if not os.path.exists(fn):
385 self.warn("Not found: %s", fn)
386 return
387
388 if os.path.isdir(fn) and not nested:
389 path = os.path.realpath(fn)
390 for item in os.listdir(path):
391 self.process_filename(os.path.join(path, item), True)
392
393 dists = distros_for_filename(fn)
394 if dists:
395 self.debug("Found: %s", fn)
396 list(map(self.add, dists))
397
398 def url_ok(self, url, fatal=False):
399 s = URL_SCHEME(url)
400 is_file = s and s.group(1).lower() == 'file'
401 if is_file or self.allows(urllib.parse.urlparse(url)[1]):
402 return True
403 msg = (
404 "\nNote: Bypassing %s (disallowed host; see "
405 "http://bit.ly/2hrImnY for details).\n"
406 )
407 if fatal:
408 raise DistutilsError(msg % url)
409 else:
410 self.warn(msg, url)
411
412 def scan_egg_links(self, search_path):
413 dirs = filter(os.path.isdir, search_path)
414 egg_links = (
415 (path, entry)
416 for path in dirs
417 for entry in os.listdir(path)
418 if entry.endswith('.egg-link')
419 )
420 list(itertools.starmap(self.scan_egg_link, egg_links))
421
422 def scan_egg_link(self, path, entry):
423 with open(os.path.join(path, entry)) as raw_lines:
424 # filter non-empty lines
425 lines = list(filter(None, map(str.strip, raw_lines)))
426
427 if len(lines) != 2:
428 # format is not recognized; punt
429 return
430
431 egg_path, setup_path = lines
432
433 for dist in find_distributions(os.path.join(path, egg_path)):
434 dist.location = os.path.join(path, *lines)
435 dist.precedence = SOURCE_DIST
436 self.add(dist)
437
438 def _scan(self, link):
439 # Process a URL to see if it's for a package page
440 NO_MATCH_SENTINEL = None, None
441 if not link.startswith(self.index_url):
442 return NO_MATCH_SENTINEL
443
444 parts = list(map(urllib.parse.unquote, link[len(self.index_url) :].split('/')))
445 if len(parts) != 2 or '#' in parts[1]:
446 return NO_MATCH_SENTINEL
447
448 # it's a package page, sanitize and index it
449 pkg = safe_name(parts[0])
450 ver = safe_version(parts[1])
451 self.package_pages.setdefault(pkg.lower(), {})[link] = True
452 return to_filename(pkg), to_filename(ver)
453
454 def process_index(self, url, page):
455 """Process the contents of a PyPI page"""
456
457 # process an index page into the package-page index
458 for match in HREF.finditer(page):
459 try:
460 self._scan(urllib.parse.urljoin(url, htmldecode(match.group(1))))
461 except ValueError:
462 pass
463
464 pkg, ver = self._scan(url) # ensure this page is in the page index
465 if not pkg:
466 return "" # no sense double-scanning non-package pages
467
468 # process individual package page
469 for new_url in find_external_links(url, page):
470 # Process the found URL
471 base, frag = egg_info_for_url(new_url)
472 if base.endswith('.py') and not frag:
473 if ver:
474 new_url += '#egg=%s-%s' % (pkg, ver)
475 else:
476 self.need_version_info(url)
477 self.scan_url(new_url)
478
479 return PYPI_MD5.sub(
480 lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page
481 )
482
483 def need_version_info(self, url):
484 self.scan_all(
485 "Page at %s links to .py file(s) without version info; an index "
486 "scan is required.",
487 url,
488 )
489
490 def scan_all(self, msg=None, *args):
491 if self.index_url not in self.fetched_urls:
492 if msg:
493 self.warn(msg, *args)
494 self.info("Scanning index of all packages (this may take a while)")
495 self.scan_url(self.index_url)
496
497 def find_packages(self, requirement):
498 self.scan_url(self.index_url + requirement.unsafe_name + '/')
499
500 if not self.package_pages.get(requirement.key):
501 # Fall back to safe version of the name
502 self.scan_url(self.index_url + requirement.project_name + '/')
503
504 if not self.package_pages.get(requirement.key):
505 # We couldn't find the target package, so search the index page too
506 self.not_found_in_index(requirement)
507
508 for url in list(self.package_pages.get(requirement.key, ())):
509 # scan each page that might be related to the desired package
510 self.scan_url(url)
511
512 def obtain(self, requirement, installer=None):
513 self.prescan()
514 self.find_packages(requirement)
515 for dist in self[requirement.key]:
516 if dist in requirement:
517 return dist
518 self.debug("%s does not match %s", requirement, dist)
519 return super(PackageIndex, self).obtain(requirement, installer)
520
521 def check_hash(self, checker, filename, tfp):
522 """
523 checker is a ContentChecker
524 """
525 checker.report(self.debug, "Validating %%s checksum for %s" % filename)
526 if not checker.is_valid():
527 tfp.close()
528 os.unlink(filename)
529 raise DistutilsError(
530 "%s validation failed for %s; "
531 "possible download problem?"
532 % (checker.hash.name, os.path.basename(filename))
533 )
534
535 def add_find_links(self, urls):
536 """Add `urls` to the list that will be prescanned for searches"""
537 for url in urls:
538 if (
539 self.to_scan is None # if we have already "gone online"
540 or not URL_SCHEME(url) # or it's a local file/directory
541 or url.startswith('file:')
542 or list(distros_for_url(url)) # or a direct package link
543 ):
544 # then go ahead and process it now
545 self.scan_url(url)
546 else:
547 # otherwise, defer retrieval till later
548 self.to_scan.append(url)
549
550 def prescan(self):
551 """Scan urls scheduled for prescanning (e.g. --find-links)"""
552 if self.to_scan:
553 list(map(self.scan_url, self.to_scan))
554 self.to_scan = None # from now on, go ahead and process immediately
555
556 def not_found_in_index(self, requirement):
557 if self[requirement.key]: # we've seen at least one distro
558 meth, msg = self.info, "Couldn't retrieve index page for %r"
559 else: # no distros seen for this name, might be misspelled
560 meth, msg = (
561 self.warn,
562 "Couldn't find index page for %r (maybe misspelled?)",
563 )
564 meth(msg, requirement.unsafe_name)
565 self.scan_all()
566
567 def download(self, spec, tmpdir):
568 """Locate and/or download `spec` to `tmpdir`, returning a local path
569
570 `spec` may be a ``Requirement`` object, or a string containing a URL,
571 an existing local filename, or a project/version requirement spec
572 (i.e. the string form of a ``Requirement`` object). If it is the URL
573 of a .py file with an unambiguous ``#egg=name-version`` tag (i.e., one
574 that escapes ``-`` as ``_`` throughout), a trivial ``setup.py`` is
575 automatically created alongside the downloaded file.
576
577 If `spec` is a ``Requirement`` object or a string containing a
578 project/version requirement spec, this method returns the location of
579 a matching distribution (possibly after downloading it to `tmpdir`).
580 If `spec` is a locally existing file or directory name, it is simply
581 returned unchanged. If `spec` is a URL, it is downloaded to a subpath
582 of `tmpdir`, and the local filename is returned. Various errors may be
583 raised if a problem occurs during downloading.
584 """
585 if not isinstance(spec, Requirement):
586 scheme = URL_SCHEME(spec)
587 if scheme:
588 # It's a url, download it to tmpdir
589 found = self._download_url(scheme.group(1), spec, tmpdir)
590 base, fragment = egg_info_for_url(spec)
591 if base.endswith('.py'):
592 found = self.gen_setup(found, fragment, tmpdir)
593 return found
594 elif os.path.exists(spec):
595 # Existing file or directory, just return it
596 return spec
597 else:
598 spec = parse_requirement_arg(spec)
599 return getattr(self.fetch_distribution(spec, tmpdir), 'location', None)
600
601 def fetch_distribution( # noqa: C901 # is too complex (14) # FIXME
602 self,
603 requirement,
604 tmpdir,
605 force_scan=False,
606 source=False,
607 develop_ok=False,
608 local_index=None,
609 ):
610 """Obtain a distribution suitable for fulfilling `requirement`
611
612 `requirement` must be a ``pkg_resources.Requirement`` instance.
613 If necessary, or if the `force_scan` flag is set, the requirement is
614 searched for in the (online) package index as well as the locally
615 installed packages. If a distribution matching `requirement` is found,
616 the returned distribution's ``location`` is the value you would have
617 gotten from calling the ``download()`` method with the matching
618 distribution's URL or filename. If no matching distribution is found,
619 ``None`` is returned.
620
621 If the `source` flag is set, only source distributions and source
622 checkout links will be considered. Unless the `develop_ok` flag is
623 set, development and system eggs (i.e., those using the ``.egg-info``
624 format) will be ignored.
625 """
626 # process a Requirement
627 self.info("Searching for %s", requirement)
628 skipped = {}
629 dist = None
630
631 def find(req, env=None):
632 if env is None:
633 env = self
634 # Find a matching distribution; may be called more than once
635
636 for dist in env[req.key]:
637
638 if dist.precedence == DEVELOP_DIST and not develop_ok:
639 if dist not in skipped:
640 self.warn(
641 "Skipping development or system egg: %s",
642 dist,
643 )
644 skipped[dist] = 1
645 continue
646
647 test = dist in req and (dist.precedence <= SOURCE_DIST or not source)
648 if test:
649 loc = self.download(dist.location, tmpdir)
650 dist.download_location = loc
651 if os.path.exists(dist.download_location):
652 return dist
653
654 if force_scan:
655 self.prescan()
656 self.find_packages(requirement)
657 dist = find(requirement)
658
659 if not dist and local_index is not None:
660 dist = find(requirement, local_index)
661
662 if dist is None:
663 if self.to_scan is not None:
664 self.prescan()
665 dist = find(requirement)
666
667 if dist is None and not force_scan:
668 self.find_packages(requirement)
669 dist = find(requirement)
670
671 if dist is None:
672 self.warn(
673 "No local packages or working download links found for %s%s",
674 (source and "a source distribution of " or ""),
675 requirement,
676 )
677 else:
678 self.info("Best match: %s", dist)
679 return dist.clone(location=dist.download_location)
680
681 def fetch(self, requirement, tmpdir, force_scan=False, source=False):
682 """Obtain a file suitable for fulfilling `requirement`
683
684 DEPRECATED; use the ``fetch_distribution()`` method now instead. For
685 backward compatibility, this routine is identical but returns the
686 ``location`` of the downloaded distribution instead of a distribution
687 object.
688 """
689 dist = self.fetch_distribution(requirement, tmpdir, force_scan, source)
690 if dist is not None:
691 return dist.location
692 return None
693
694 def gen_setup(self, filename, fragment, tmpdir):
695 match = EGG_FRAGMENT.match(fragment)
696 dists = (
697 match
698 and [
699 d
700 for d in interpret_distro_name(filename, match.group(1), None)
701 if d.version
702 ]
703 or []
704 )
705
706 if len(dists) == 1: # unambiguous ``#egg`` fragment
707 basename = os.path.basename(filename)
708
709 # Make sure the file has been downloaded to the temp dir.
710 if os.path.dirname(filename) != tmpdir:
711 dst = os.path.join(tmpdir, basename)
712 if not (os.path.exists(dst) and os.path.samefile(filename, dst)):
713 shutil.copy2(filename, dst)
714 filename = dst
715
716 with open(os.path.join(tmpdir, 'setup.py'), 'w') as file:
717 file.write(
718 "from setuptools import setup\n"
719 "setup(name=%r, version=%r, py_modules=[%r])\n"
720 % (
721 dists[0].project_name,
722 dists[0].version,
723 os.path.splitext(basename)[0],
724 )
725 )
726 return filename
727
728 elif match:
729 raise DistutilsError(
730 "Can't unambiguously interpret project/version identifier %r; "
731 "any dashes in the name or version should be escaped using "
732 "underscores. %r" % (fragment, dists)
733 )
734 else:
735 raise DistutilsError(
736 "Can't process plain .py files without an '#egg=name-version'"
737 " suffix to enable automatic setup script generation."
738 )
739
740 dl_blocksize = 8192
741
742 def _download_to(self, url, filename):
743 self.info("Downloading %s", url)
744 # Download the file
745 fp = None
746 try:
747 checker = HashChecker.from_url(url)
748 fp = self.open_url(url)
749 if isinstance(fp, urllib.error.HTTPError):
750 raise DistutilsError(
751 "Can't download %s: %s %s" % (url, fp.code, fp.msg)
752 )
753 headers = fp.info()
754 blocknum = 0
755 bs = self.dl_blocksize
756 size = -1
757 if "content-length" in headers:
758 # Some servers return multiple Content-Length headers :(
759 sizes = headers.get_all('Content-Length')
760 size = max(map(int, sizes))
761 self.reporthook(url, filename, blocknum, bs, size)
762 with open(filename, 'wb') as tfp:
763 while True:
764 block = fp.read(bs)
765 if block:
766 checker.feed(block)
767 tfp.write(block)
768 blocknum += 1
769 self.reporthook(url, filename, blocknum, bs, size)
770 else:
771 break
772 self.check_hash(checker, filename, tfp)
773 return headers
774 finally:
775 if fp:
776 fp.close()
777
778 def reporthook(self, url, filename, blocknum, blksize, size):
779 pass # no-op
780
781 # FIXME:
782 def open_url(self, url, warning=None): # noqa: C901 # is too complex (12)
783 if url.startswith('file:'):
784 return local_open(url)
785 try:
786 return open_with_auth(url, self.opener)
787 except (ValueError, http.client.InvalidURL) as v:
788 msg = ' '.join([str(arg) for arg in v.args])
789 if warning:
790 self.warn(warning, msg)
791 else:
792 raise DistutilsError('%s %s' % (url, msg)) from v
793 except urllib.error.HTTPError as v:
794 return v
795 except urllib.error.URLError as v:
796 if warning:
797 self.warn(warning, v.reason)
798 else:
799 raise DistutilsError(
800 "Download error for %s: %s" % (url, v.reason)
801 ) from v
802 except http.client.BadStatusLine as v:
803 if warning:
804 self.warn(warning, v.line)
805 else:
806 raise DistutilsError(
807 '%s returned a bad status line. The server might be '
808 'down, %s' % (url, v.line)
809 ) from v
810 except (http.client.HTTPException, socket.error) as v:
811 if warning:
812 self.warn(warning, v)
813 else:
814 raise DistutilsError("Download error for %s: %s" % (url, v)) from v
815
816 def _download_url(self, scheme, url, tmpdir):
817 # Determine download filename
818 #
819 name, fragment = egg_info_for_url(url)
820 if name:
821 while '..' in name:
822 name = name.replace('..', '.').replace('\\', '_')
823 else:
824 name = "__downloaded__" # default if URL has no path contents
825
826 if name.endswith('.egg.zip'):
827 name = name[:-4] # strip the extra .zip before download
828
829 filename = os.path.join(tmpdir, name)
830
831 # Download the file
832 #
833 if scheme == 'svn' or scheme.startswith('svn+'):
834 return self._download_svn(url, filename)
835 elif scheme == 'git' or scheme.startswith('git+'):
836 return self._download_git(url, filename)
837 elif scheme.startswith('hg+'):
838 return self._download_hg(url, filename)
839 elif scheme == 'file':
840 return urllib.request.url2pathname(urllib.parse.urlparse(url)[2])
841 else:
842 self.url_ok(url, True) # raises error if not allowed
843 return self._attempt_download(url, filename)
844
845 def scan_url(self, url):
846 self.process_url(url, True)
847
848 def _attempt_download(self, url, filename):
849 headers = self._download_to(url, filename)
850 if 'html' in headers.get('content-type', '').lower():
851 return self._download_html(url, headers, filename)
852 else:
853 return filename
854
855 def _download_html(self, url, headers, filename):
856 file = open(filename)
857 for line in file:
858 if line.strip():
859 # Check for a subversion index page
860 if re.search(r'<title>([^- ]+ - )?Revision \d+:', line):
861 # it's a subversion index page:
862 file.close()
863 os.unlink(filename)
864 return self._download_svn(url, filename)
865 break # not an index page
866 file.close()
867 os.unlink(filename)
868 raise DistutilsError("Unexpected HTML page found at " + url)
869
870 def _download_svn(self, url, filename):
871 warnings.warn("SVN download support is deprecated", UserWarning)
872 url = url.split('#', 1)[0] # remove any fragment for svn's sake
873 creds = ''
874 if url.lower().startswith('svn:') and '@' in url:
875 scheme, netloc, path, p, q, f = urllib.parse.urlparse(url)
876 if not netloc and path.startswith('//') and '/' in path[2:]:
877 netloc, path = path[2:].split('/', 1)
878 auth, host = _splituser(netloc)
879 if auth:
880 if ':' in auth:
881 user, pw = auth.split(':', 1)
882 creds = " --username=%s --password=%s" % (user, pw)
883 else:
884 creds = " --username=" + auth
885 netloc = host
886 parts = scheme, netloc, url, p, q, f
887 url = urllib.parse.urlunparse(parts)
888 self.info("Doing subversion checkout from %s to %s", url, filename)
889 os.system("svn checkout%s -q %s %s" % (creds, url, filename))
890 return filename
891
892 @staticmethod
893 def _vcs_split_rev_from_url(url, pop_prefix=False):
894 scheme, netloc, path, query, frag = urllib.parse.urlsplit(url)
895
896 scheme = scheme.split('+', 1)[-1]
897
898 # Some fragment identification fails
899 path = path.split('#', 1)[0]
900
901 rev = None
902 if '@' in path:
903 path, rev = path.rsplit('@', 1)
904
905 # Also, discard fragment
906 url = urllib.parse.urlunsplit((scheme, netloc, path, query, ''))
907
908 return url, rev
909
910 def _download_git(self, url, filename):
911 filename = filename.split('#', 1)[0]
912 url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
913
914 self.info("Doing git clone from %s to %s", url, filename)
915 os.system("git clone --quiet %s %s" % (url, filename))
916
917 if rev is not None:
918 self.info("Checking out %s", rev)
919 os.system(
920 "git -C %s checkout --quiet %s"
921 % (
922 filename,
923 rev,
924 )
925 )
926
927 return filename
928
929 def _download_hg(self, url, filename):
930 filename = filename.split('#', 1)[0]
931 url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
932
933 self.info("Doing hg clone from %s to %s", url, filename)
934 os.system("hg clone --quiet %s %s" % (url, filename))
935
936 if rev is not None:
937 self.info("Updating to %s", rev)
938 os.system(
939 "hg --cwd %s up -C -r %s -q"
940 % (
941 filename,
942 rev,
943 )
944 )
945
946 return filename
947
948 def debug(self, msg, *args):
949 log.debug(msg, *args)
950
951 def info(self, msg, *args):
952 log.info(msg, *args)
953
954 def warn(self, msg, *args):
955 log.warn(msg, *args)
956
957
958 # This pattern matches a character entity reference (a decimal numeric
959 # references, a hexadecimal numeric reference, or a named reference).
960 entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
961
962
963 def decode_entity(match):
964 what = match.group(0)
965 return html.unescape(what)
966
967
968 def htmldecode(text):
969 """
970 Decode HTML entities in the given text.
971
972 >>> htmldecode(
973 ... 'https://../package_name-0.1.2.tar.gz'
974 ... '?tokena=A&amp;tokenb=B">package_name-0.1.2.tar.gz')
975 'https://../package_name-0.1.2.tar.gz?tokena=A&tokenb=B">package_name-0.1.2.tar.gz'
976 """
977 return entity_sub(decode_entity, text)
978
979
980 def socket_timeout(timeout=15):
981 def _socket_timeout(func):
982 def _socket_timeout(*args, **kwargs):
983 old_timeout = socket.getdefaulttimeout()
984 socket.setdefaulttimeout(timeout)
985 try:
986 return func(*args, **kwargs)
987 finally:
988 socket.setdefaulttimeout(old_timeout)
989
990 return _socket_timeout
991
992 return _socket_timeout
993
994
995 def _encode_auth(auth):
996 """
997 Encode auth from a URL suitable for an HTTP header.
998 >>> str(_encode_auth('username%3Apassword'))
999 'dXNlcm5hbWU6cGFzc3dvcmQ='
1000
1001 Long auth strings should not cause a newline to be inserted.
1002 >>> long_auth = 'username:' + 'password'*10
1003 >>> chr(10) in str(_encode_auth(long_auth))
1004 False
1005 """
1006 auth_s = urllib.parse.unquote(auth)
1007 # convert to bytes
1008 auth_bytes = auth_s.encode()
1009 encoded_bytes = base64.b64encode(auth_bytes)
1010 # convert back to a string
1011 encoded = encoded_bytes.decode()
1012 # strip the trailing carriage return
1013 return encoded.replace('\n', '')
1014
1015
1016 class Credential:
1017 """
1018 A username/password pair. Use like a namedtuple.
1019 """
1020
1021 def __init__(self, username, password):
1022 self.username = username
1023 self.password = password
1024
1025 def __iter__(self):
1026 yield self.username
1027 yield self.password
1028
1029 def __str__(self):
1030 return '%(username)s:%(password)s' % vars(self)
1031
1032
1033 class PyPIConfig(configparser.RawConfigParser):
1034 def __init__(self):
1035 """
1036 Load from ~/.pypirc
1037 """
1038 defaults = dict.fromkeys(['username', 'password', 'repository'], '')
1039 super().__init__(defaults)
1040
1041 rc = os.path.join(os.path.expanduser('~'), '.pypirc')
1042 if os.path.exists(rc):
1043 self.read(rc)
1044
1045 @property
1046 def creds_by_repository(self):
1047 sections_with_repositories = [
1048 section
1049 for section in self.sections()
1050 if self.get(section, 'repository').strip()
1051 ]
1052
1053 return dict(map(self._get_repo_cred, sections_with_repositories))
1054
1055 def _get_repo_cred(self, section):
1056 repo = self.get(section, 'repository').strip()
1057 return repo, Credential(
1058 self.get(section, 'username').strip(),
1059 self.get(section, 'password').strip(),
1060 )
1061
1062 def find_credential(self, url):
1063 """
1064 If the URL indicated appears to be a repository defined in this
1065 config, return the credential for that repository.
1066 """
1067 for repository, cred in self.creds_by_repository.items():
1068 if url.startswith(repository):
1069 return cred
1070
1071
1072 def open_with_auth(url, opener=urllib.request.urlopen):
1073 """Open a urllib2 request, handling HTTP authentication"""
1074
1075 parsed = urllib.parse.urlparse(url)
1076 scheme, netloc, path, params, query, frag = parsed
1077
1078 # Double scheme does not raise on macOS as revealed by a
1079 # failing test. We would expect "nonnumeric port". Refs #20.
1080 if netloc.endswith(':'):
1081 raise http.client.InvalidURL("nonnumeric port: ''")
1082
1083 if scheme in ('http', 'https'):
1084 auth, address = _splituser(netloc)
1085 else:
1086 auth = None
1087
1088 if not auth:
1089 cred = PyPIConfig().find_credential(url)
1090 if cred:
1091 auth = str(cred)
1092 info = cred.username, url
1093 log.info('Authenticating as %s for %s (from .pypirc)', *info)
1094
1095 if auth:
1096 auth = "Basic " + _encode_auth(auth)
1097 parts = scheme, address, path, params, query, frag
1098 new_url = urllib.parse.urlunparse(parts)
1099 request = urllib.request.Request(new_url)
1100 request.add_header("Authorization", auth)
1101 else:
1102 request = urllib.request.Request(url)
1103
1104 request.add_header('User-Agent', user_agent)
1105 fp = opener(request)
1106
1107 if auth:
1108 # Put authentication info back into request URL if same host,
1109 # so that links found on the page will work
1110 s2, h2, path2, param2, query2, frag2 = urllib.parse.urlparse(fp.url)
1111 if s2 == scheme and h2 == address:
1112 parts = s2, netloc, path2, param2, query2, frag2
1113 fp.url = urllib.parse.urlunparse(parts)
1114
1115 return fp
1116
1117
1118 # copy of urllib.parse._splituser from Python 3.8
1119 def _splituser(host):
1120 """splituser('user[:passwd]@host[:port]')
1121 --> 'user[:passwd]', 'host[:port]'."""
1122 user, delim, host = host.rpartition('@')
1123 return (user if delim else None), host
1124
1125
1126 # adding a timeout to avoid freezing package_index
1127 open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth)
1128
1129
1130 def fix_sf_url(url):
1131 return url # backward compatibility
1132
1133
1134 def local_open(url):
1135 """Read a local path, with special support for directories"""
1136 scheme, server, path, param, query, frag = urllib.parse.urlparse(url)
1137 filename = urllib.request.url2pathname(path)
1138 if os.path.isfile(filename):
1139 return urllib.request.urlopen(url)
1140 elif path.endswith('/') and os.path.isdir(filename):
1141 files = []
1142 for f in os.listdir(filename):
1143 filepath = os.path.join(filename, f)
1144 if f == 'index.html':
1145 with open(filepath, 'r') as fp:
1146 body = fp.read()
1147 break
1148 elif os.path.isdir(filepath):
1149 f += '/'
1150 files.append('<a href="{name}">{name}</a>'.format(name=f))
1151 else:
1152 tmpl = (
1153 "<html><head><title>{url}</title>" "</head><body>{files}</body></html>"
1154 )
1155 body = tmpl.format(url=url, files='\n'.join(files))
1156 status, message = 200, "OK"
1157 else:
1158 status, message, body = 404, "Path not found", "Not found"
1159
1160 headers = {'content-type': 'text/html'}
1161 body_stream = io.StringIO(body)
1162 return urllib.error.HTTPError(url, status, message, headers, body_stream)