crepu.dev Git - config.git/blame_incremental - djavu-asus/elpy/rpc-venv/lib/python3.11/site-packages/setuptools/package

... / ...

Commit	Line	Data
	1	"""PyPI and direct package downloading."""
	2
	3	import sys
	4	import os
	5	import re
	6	import io
	7	import shutil
	8	import socket
	9	import base64
	10	import hashlib
	11	import itertools
	12	import warnings
	13	import configparser
	14	import html
	15	import http.client
	16	import urllib.parse
	17	import urllib.request
	18	import urllib.error
	19	from functools import wraps
	20
	21	import setuptools
	22	from pkg_resources import (
	23	CHECKOUT_DIST,
	24	Distribution,
	25	BINARY_DIST,
	26	normalize_path,
	27	SOURCE_DIST,
	28	Environment,
	29	find_distributions,
	30	safe_name,
	31	safe_version,
	32	to_filename,
	33	Requirement,
	34	DEVELOP_DIST,
	35	EGG_DIST,
	36	parse_version,
	37	)
	38	from distutils import log
	39	from distutils.errors import DistutilsError
	40	from fnmatch import translate
	41	from setuptools.wheel import Wheel
	42	from setuptools.extern.more_itertools import unique_everseen
	43
	44
	45	EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.+!]+)$')
	46	HREF = re.compile(r"""href\s=\s['"]?([^'"> ]+)""", re.I)
	47	PYPI_MD5 = re.compile(
	48	r'<a href="([^"#]+)">([^<]+)</a>\n\s+\(<a (?:title="MD5 hash"\n\s+)'
	49	r'href="[^?]+\?:action=show_md5&digest=([0-9a-f]{32})">md5</a>\)'
	50	)
	51	URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):', re.I).match
	52	EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()
	53
	54	__all__ = [
	55	'PackageIndex',
	56	'distros_for_url',
	57	'parse_bdist_wininst',
	58	'interpret_distro_name',
	59	]
	60
	61	_SOCKET_TIMEOUT = 15
	62
	63	_tmpl = "setuptools/{setuptools.__version__} Python-urllib/{py_major}"
	64	user_agent = _tmpl.format(
	65	py_major='{}.{}'.format(*sys.version_info), setuptools=setuptools
	66	)
	67
	68
	69	def parse_requirement_arg(spec):
	70	try:
	71	return Requirement.parse(spec)
	72	except ValueError as e:
	73	raise DistutilsError(
	74	"Not a URL, existing file, or requirement spec: %r" % (spec,)
	75	) from e
	76
	77
	78	def parse_bdist_wininst(name):
	79	"""Return (base,pyversion) or (None,None) for possible .exe name"""
	80
	81	lower = name.lower()
	82	base, py_ver, plat = None, None, None
	83
	84	if lower.endswith('.exe'):
	85	if lower.endswith('.win32.exe'):
	86	base = name[:-10]
	87	plat = 'win32'
	88	elif lower.startswith('.win32-py', -16):
	89	py_ver = name[-7:-4]
	90	base = name[:-16]
	91	plat = 'win32'
	92	elif lower.endswith('.win-amd64.exe'):
	93	base = name[:-14]
	94	plat = 'win-amd64'
	95	elif lower.startswith('.win-amd64-py', -20):
	96	py_ver = name[-7:-4]
	97	base = name[:-20]
	98	plat = 'win-amd64'
	99	return base, py_ver, plat
	100
	101
	102	def egg_info_for_url(url):
	103	parts = urllib.parse.urlparse(url)
	104	scheme, server, path, parameters, query, fragment = parts
	105	base = urllib.parse.unquote(path.split('/')[-1])
	106	if server == 'sourceforge.net' and base == 'download': # XXX Yuck
	107	base = urllib.parse.unquote(path.split('/')[-2])
	108	if '#' in base:
	109	base, fragment = base.split('#', 1)
	110	return base, fragment
	111
	112
	113	def distros_for_url(url, metadata=None):
	114	"""Yield egg or source distribution objects that might be found at a URL"""
	115	base, fragment = egg_info_for_url(url)
	116	for dist in distros_for_location(url, base, metadata):
	117	yield dist
	118	if fragment:
	119	match = EGG_FRAGMENT.match(fragment)
	120	if match:
	121	for dist in interpret_distro_name(
	122	url, match.group(1), metadata, precedence=CHECKOUT_DIST
	123	):
	124	yield dist
	125
	126
	127	def distros_for_location(location, basename, metadata=None):
	128	"""Yield egg or source distribution objects based on basename"""
	129	if basename.endswith('.egg.zip'):
	130	basename = basename[:-4] # strip the .zip
	131	if basename.endswith('.egg') and '-' in basename:
	132	# only one, unambiguous interpretation
	133	return [Distribution.from_location(location, basename, metadata)]
	134	if basename.endswith('.whl') and '-' in basename:
	135	wheel = Wheel(basename)
	136	if not wheel.is_compatible():
	137	return []
	138	return [
	139	Distribution(
	140	location=location,
	141	project_name=wheel.project_name,
	142	version=wheel.version,
	143	# Increase priority over eggs.
	144	precedence=EGG_DIST + 1,
	145	)
	146	]
	147	if basename.endswith('.exe'):
	148	win_base, py_ver, platform = parse_bdist_wininst(basename)
	149	if win_base is not None:
	150	return interpret_distro_name(
	151	location, win_base, metadata, py_ver, BINARY_DIST, platform
	152	)
	153	# Try source distro extensions (.zip, .tgz, etc.)
	154	#
	155	for ext in EXTENSIONS:
	156	if basename.endswith(ext):
	157	basename = basename[: -len(ext)]
	158	return interpret_distro_name(location, basename, metadata)
	159	return [] # no extension matched
	160
	161
	162	def distros_for_filename(filename, metadata=None):
	163	"""Yield possible egg or source distribution objects based on a filename"""
	164	return distros_for_location(
	165	normalize_path(filename), os.path.basename(filename), metadata
	166	)
	167
	168
	169	def interpret_distro_name(
	170	location, basename, metadata, py_version=None, precedence=SOURCE_DIST, platform=None
	171	):
	172	"""Generate the interpretation of a source distro name
	173
	174	Note: if `location` is a filesystem filename, you should call
	175	``pkg_resources.normalize_path()`` on it before passing it to this
	176	routine!
	177	"""
	178
	179	parts = basename.split('-')
	180	if not py_version and any(re.match(r'py\d\.\d$', p) for p in parts[2:]):
	181	# it is a bdist_dumb, not an sdist -- bail out
	182	return
	183
	184	# find the pivot (p) that splits the name from the version.
	185	# infer the version as the first item that has a digit.
	186	for p in range(len(parts)):
	187	if parts[p][:1].isdigit():
	188	break
	189	else:
	190	p = len(parts)
	191
	192	yield Distribution(
	193	location,
	194	metadata,
	195	'-'.join(parts[:p]),
	196	'-'.join(parts[p:]),
	197	py_version=py_version,
	198	precedence=precedence,
	199	platform=platform
	200	)
	201
	202
	203	def unique_values(func):
	204	"""
	205	Wrap a function returning an iterable such that the resulting iterable
	206	only ever yields unique items.
	207	"""
	208
	209	@wraps(func)
	210	def wrapper(args, *kwargs):
	211	return unique_everseen(func(args, *kwargs))
	212
	213	return wrapper
	214
	215
	216	REL = re.compile(r"""<([^>]\srel\s{0,10}=\s{0,10}['"]?([^'" >]+)[^>])>""", re.I)
	217	"""
	218	Regex for an HTML tag with 'rel="val"' attributes.
	219	"""
	220
	221
	222	@unique_values
	223	def find_external_links(url, page):
	224	"""Find rel="homepage" and rel="download" links in `page`, yielding URLs"""
	225
	226	for match in REL.finditer(page):
	227	tag, rel = match.groups()
	228	rels = set(map(str.strip, rel.lower().split(',')))
	229	if 'homepage' in rels or 'download' in rels:
	230	for match in HREF.finditer(tag):
	231	yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
	232
	233	for tag in ("<th>Home Page", "<th>Download URL"):
	234	pos = page.find(tag)
	235	if pos != -1:
	236	match = HREF.search(page, pos)
	237	if match:
	238	yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
	239
	240
	241	class ContentChecker:
	242	"""
	243	A null content checker that defines the interface for checking content
	244	"""
	245
	246	def feed(self, block):
	247	"""
	248	Feed a block of data to the hash.
	249	"""
	250	return
	251
	252	def is_valid(self):
	253	"""
	254	Check the hash. Return False if validation fails.
	255	"""
	256	return True
	257
	258	def report(self, reporter, template):
	259	"""
	260	Call reporter with information about the checker (hash name)
	261	substituted into the template.
	262	"""
	263	return
	264
	265
	266	class HashChecker(ContentChecker):
	267	pattern = re.compile(
	268	r'(?P<hash_name>sha1\|sha224\|sha384\|sha256\|sha512\|md5)='
	269	r'(?P<expected>[a-f0-9]+)'
	270	)
	271
	272	def __init__(self, hash_name, expected):
	273	self.hash_name = hash_name
	274	self.hash = hashlib.new(hash_name)
	275	self.expected = expected
	276
	277	@classmethod
	278	def from_url(cls, url):
	279	"Construct a (possibly null) ContentChecker from a URL"
	280	fragment = urllib.parse.urlparse(url)[-1]
	281	if not fragment:
	282	return ContentChecker()
	283	match = cls.pattern.search(fragment)
	284	if not match:
	285	return ContentChecker()
	286	return cls(**match.groupdict())
	287
	288	def feed(self, block):
	289	self.hash.update(block)
	290
	291	def is_valid(self):
	292	return self.hash.hexdigest() == self.expected
	293
	294	def report(self, reporter, template):
	295	msg = template % self.hash_name
	296	return reporter(msg)
	297
	298
	299	class PackageIndex(Environment):
	300	"""A distribution index that scans web pages for download URLs"""
	301
	302	def __init__(
	303	self,
	304	index_url="https://pypi.org/simple/",
	305	hosts=('*',),
	306	ca_bundle=None,
	307	verify_ssl=True,
	308	*args,
	309	**kw
	310	):
	311	super().__init__(args, *kw)
	312	self.index_url = index_url + "/"[: not index_url.endswith('/')]
	313	self.scanned_urls = {}
	314	self.fetched_urls = {}
	315	self.package_pages = {}
	316	self.allows = re.compile('\|'.join(map(translate, hosts))).match
	317	self.to_scan = []
	318	self.opener = urllib.request.urlopen
	319
	320	def add(self, dist):
	321	# ignore invalid versions
	322	try:
	323	parse_version(dist.version)
	324	except Exception:
	325	return
	326	return super().add(dist)
	327
	328	# FIXME: 'PackageIndex.process_url' is too complex (14)
	329	def process_url(self, url, retrieve=False): # noqa: C901
	330	"""Evaluate a URL as a possible download, and maybe retrieve it"""
	331	if url in self.scanned_urls and not retrieve:
	332	return
	333	self.scanned_urls[url] = True
	334	if not URL_SCHEME(url):
	335	self.process_filename(url)
	336	return
	337	else:
	338	dists = list(distros_for_url(url))
	339	if dists:
	340	if not self.url_ok(url):
	341	return
	342	self.debug("Found link: %s", url)
	343
	344	if dists or not retrieve or url in self.fetched_urls:
	345	list(map(self.add, dists))
	346	return # don't need the actual page
	347
	348	if not self.url_ok(url):
	349	self.fetched_urls[url] = True
	350	return
	351
	352	self.info("Reading %s", url)
	353	self.fetched_urls[url] = True # prevent multiple fetch attempts
	354	tmpl = "Download error on %s: %%s -- Some packages may not be found!"
	355	f = self.open_url(url, tmpl % url)
	356	if f is None:
	357	return
	358	if isinstance(f, urllib.error.HTTPError) and f.code == 401:
	359	self.info("Authentication error: %s" % f.msg)
	360	self.fetched_urls[f.url] = True
	361	if 'html' not in f.headers.get('content-type', '').lower():
	362	f.close() # not html, we can't process it
	363	return
	364
	365	base = f.url # handle redirects
	366	page = f.read()
	367	if not isinstance(page, str):
	368	# In Python 3 and got bytes but want str.
	369	if isinstance(f, urllib.error.HTTPError):
	370	# Errors have no charset, assume latin1:
	371	charset = 'latin-1'
	372	else:
	373	charset = f.headers.get_param('charset') or 'latin-1'
	374	page = page.decode(charset, "ignore")
	375	f.close()
	376	for match in HREF.finditer(page):
	377	link = urllib.parse.urljoin(base, htmldecode(match.group(1)))
	378	self.process_url(link)
	379	if url.startswith(self.index_url) and getattr(f, 'code', None) != 404:
	380	page = self.process_index(url, page)
	381
	382	def process_filename(self, fn, nested=False):
	383	# process filenames or directories
	384	if not os.path.exists(fn):
	385	self.warn("Not found: %s", fn)
	386	return
	387
	388	if os.path.isdir(fn) and not nested:
	389	path = os.path.realpath(fn)
	390	for item in os.listdir(path):
	391	self.process_filename(os.path.join(path, item), True)
	392
	393	dists = distros_for_filename(fn)
	394	if dists:
	395	self.debug("Found: %s", fn)
	396	list(map(self.add, dists))
	397
	398	def url_ok(self, url, fatal=False):
	399	s = URL_SCHEME(url)
	400	is_file = s and s.group(1).lower() == 'file'
	401	if is_file or self.allows(urllib.parse.urlparse(url)[1]):
	402	return True
	403	msg = (
	404	"\nNote: Bypassing %s (disallowed host; see "
	405	"http://bit.ly/2hrImnY for details).\n"
	406	)
	407	if fatal:
	408	raise DistutilsError(msg % url)
	409	else:
	410	self.warn(msg, url)
	411
	412	def scan_egg_links(self, search_path):
	413	dirs = filter(os.path.isdir, search_path)
	414	egg_links = (
	415	(path, entry)
	416	for path in dirs
	417	for entry in os.listdir(path)
	418	if entry.endswith('.egg-link')
	419	)
	420	list(itertools.starmap(self.scan_egg_link, egg_links))
	421
	422	def scan_egg_link(self, path, entry):
	423	with open(os.path.join(path, entry)) as raw_lines:
	424	# filter non-empty lines
	425	lines = list(filter(None, map(str.strip, raw_lines)))
	426
	427	if len(lines) != 2:
	428	# format is not recognized; punt
	429	return
	430
	431	egg_path, setup_path = lines
	432
	433	for dist in find_distributions(os.path.join(path, egg_path)):
	434	dist.location = os.path.join(path, *lines)
	435	dist.precedence = SOURCE_DIST
	436	self.add(dist)
	437
	438	def _scan(self, link):
	439	# Process a URL to see if it's for a package page
	440	NO_MATCH_SENTINEL = None, None
	441	if not link.startswith(self.index_url):
	442	return NO_MATCH_SENTINEL
	443
	444	parts = list(map(urllib.parse.unquote, link[len(self.index_url) :].split('/')))
	445	if len(parts) != 2 or '#' in parts[1]:
	446	return NO_MATCH_SENTINEL
	447
	448	# it's a package page, sanitize and index it
	449	pkg = safe_name(parts[0])
	450	ver = safe_version(parts[1])
	451	self.package_pages.setdefault(pkg.lower(), {})[link] = True
	452	return to_filename(pkg), to_filename(ver)
	453
	454	def process_index(self, url, page):
	455	"""Process the contents of a PyPI page"""
	456
	457	# process an index page into the package-page index
	458	for match in HREF.finditer(page):
	459	try:
	460	self._scan(urllib.parse.urljoin(url, htmldecode(match.group(1))))
	461	except ValueError:
	462	pass
	463
	464	pkg, ver = self._scan(url) # ensure this page is in the page index
	465	if not pkg:
	466	return "" # no sense double-scanning non-package pages
	467
	468	# process individual package page
	469	for new_url in find_external_links(url, page):
	470	# Process the found URL
	471	base, frag = egg_info_for_url(new_url)
	472	if base.endswith('.py') and not frag:
	473	if ver:
	474	new_url += '#egg=%s-%s' % (pkg, ver)
	475	else:
	476	self.need_version_info(url)
	477	self.scan_url(new_url)
	478
	479	return PYPI_MD5.sub(
	480	lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page
	481	)
	482
	483	def need_version_info(self, url):
	484	self.scan_all(
	485	"Page at %s links to .py file(s) without version info; an index "
	486	"scan is required.",
	487	url,
	488	)
	489
	490	def scan_all(self, msg=None, *args):
	491	if self.index_url not in self.fetched_urls:
	492	if msg:
	493	self.warn(msg, *args)
	494	self.info("Scanning index of all packages (this may take a while)")
	495	self.scan_url(self.index_url)
	496
	497	def find_packages(self, requirement):
	498	self.scan_url(self.index_url + requirement.unsafe_name + '/')
	499
	500	if not self.package_pages.get(requirement.key):
	501	# Fall back to safe version of the name
	502	self.scan_url(self.index_url + requirement.project_name + '/')
	503
	504	if not self.package_pages.get(requirement.key):
	505	# We couldn't find the target package, so search the index page too
	506	self.not_found_in_index(requirement)
	507
	508	for url in list(self.package_pages.get(requirement.key, ())):
	509	# scan each page that might be related to the desired package
	510	self.scan_url(url)
	511
	512	def obtain(self, requirement, installer=None):
	513	self.prescan()
	514	self.find_packages(requirement)
	515	for dist in self[requirement.key]:
	516	if dist in requirement:
	517	return dist
	518	self.debug("%s does not match %s", requirement, dist)
	519	return super(PackageIndex, self).obtain(requirement, installer)
	520
	521	def check_hash(self, checker, filename, tfp):
	522	"""
	523	checker is a ContentChecker
	524	"""
	525	checker.report(self.debug, "Validating %%s checksum for %s" % filename)
	526	if not checker.is_valid():
	527	tfp.close()
	528	os.unlink(filename)
	529	raise DistutilsError(
	530	"%s validation failed for %s; "
	531	"possible download problem?"
	532	% (checker.hash.name, os.path.basename(filename))
	533	)
	534
	535	def add_find_links(self, urls):
	536	"""Add `urls` to the list that will be prescanned for searches"""
	537	for url in urls:
	538	if (
	539	self.to_scan is None # if we have already "gone online"
	540	or not URL_SCHEME(url) # or it's a local file/directory
	541	or url.startswith('file:')
	542	or list(distros_for_url(url)) # or a direct package link
	543	):
	544	# then go ahead and process it now
	545	self.scan_url(url)
	546	else:
	547	# otherwise, defer retrieval till later
	548	self.to_scan.append(url)
	549
	550	def prescan(self):
	551	"""Scan urls scheduled for prescanning (e.g. --find-links)"""
	552	if self.to_scan:
	553	list(map(self.scan_url, self.to_scan))
	554	self.to_scan = None # from now on, go ahead and process immediately
	555
	556	def not_found_in_index(self, requirement):
	557	if self[requirement.key]: # we've seen at least one distro
	558	meth, msg = self.info, "Couldn't retrieve index page for %r"
	559	else: # no distros seen for this name, might be misspelled
	560	meth, msg = (
	561	self.warn,
	562	"Couldn't find index page for %r (maybe misspelled?)",
	563	)
	564	meth(msg, requirement.unsafe_name)
	565	self.scan_all()
	566
	567	def download(self, spec, tmpdir):
	568	"""Locate and/or download `spec` to `tmpdir`, returning a local path
	569
	570	`spec` may be a ``Requirement`` object, or a string containing a URL,
	571	an existing local filename, or a project/version requirement spec
	572	(i.e. the string form of a ``Requirement`` object). If it is the URL
	573	of a .py file with an unambiguous ``#egg=name-version`` tag (i.e., one
	574	that escapes ``-`` as ``_`` throughout), a trivial ``setup.py`` is
	575	automatically created alongside the downloaded file.
	576
	577	If `spec` is a ``Requirement`` object or a string containing a
	578	project/version requirement spec, this method returns the location of
	579	a matching distribution (possibly after downloading it to `tmpdir`).
	580	If `spec` is a locally existing file or directory name, it is simply
	581	returned unchanged. If `spec` is a URL, it is downloaded to a subpath
	582	of `tmpdir`, and the local filename is returned. Various errors may be
	583	raised if a problem occurs during downloading.
	584	"""
	585	if not isinstance(spec, Requirement):
	586	scheme = URL_SCHEME(spec)
	587	if scheme:
	588	# It's a url, download it to tmpdir
	589	found = self._download_url(scheme.group(1), spec, tmpdir)
	590	base, fragment = egg_info_for_url(spec)
	591	if base.endswith('.py'):
	592	found = self.gen_setup(found, fragment, tmpdir)
	593	return found
	594	elif os.path.exists(spec):
	595	# Existing file or directory, just return it
	596	return spec
	597	else:
	598	spec = parse_requirement_arg(spec)
	599	return getattr(self.fetch_distribution(spec, tmpdir), 'location', None)
	600
	601	def fetch_distribution( # noqa: C901 # is too complex (14) # FIXME
	602	self,
	603	requirement,
	604	tmpdir,
	605	force_scan=False,
	606	source=False,
	607	develop_ok=False,
	608	local_index=None,
	609	):
	610	"""Obtain a distribution suitable for fulfilling `requirement`
	611
	612	`requirement` must be a ``pkg_resources.Requirement`` instance.
	613	If necessary, or if the `force_scan` flag is set, the requirement is
	614	searched for in the (online) package index as well as the locally
	615	installed packages. If a distribution matching `requirement` is found,
	616	the returned distribution's ``location`` is the value you would have
	617	gotten from calling the ``download()`` method with the matching
	618	distribution's URL or filename. If no matching distribution is found,
	619	``None`` is returned.
	620
	621	If the `source` flag is set, only source distributions and source
	622	checkout links will be considered. Unless the `develop_ok` flag is
	623	set, development and system eggs (i.e., those using the ``.egg-info``
	624	format) will be ignored.
	625	"""
	626	# process a Requirement
	627	self.info("Searching for %s", requirement)
	628	skipped = {}
	629	dist = None
	630
	631	def find(req, env=None):
	632	if env is None:
	633	env = self
	634	# Find a matching distribution; may be called more than once
	635
	636	for dist in env[req.key]:
	637
	638	if dist.precedence == DEVELOP_DIST and not develop_ok:
	639	if dist not in skipped:
	640	self.warn(
	641	"Skipping development or system egg: %s",
	642	dist,
	643	)
	644	skipped[dist] = 1
	645	continue
	646
	647	test = dist in req and (dist.precedence <= SOURCE_DIST or not source)
	648	if test:
	649	loc = self.download(dist.location, tmpdir)
	650	dist.download_location = loc
	651	if os.path.exists(dist.download_location):
	652	return dist
	653
	654	if force_scan:
	655	self.prescan()
	656	self.find_packages(requirement)
	657	dist = find(requirement)
	658
	659	if not dist and local_index is not None:
	660	dist = find(requirement, local_index)
	661
	662	if dist is None:
	663	if self.to_scan is not None:
	664	self.prescan()
	665	dist = find(requirement)
	666
	667	if dist is None and not force_scan:
	668	self.find_packages(requirement)
	669	dist = find(requirement)
	670
	671	if dist is None:
	672	self.warn(
	673	"No local packages or working download links found for %s%s",
	674	(source and "a source distribution of " or ""),
	675	requirement,
	676	)
	677	else:
	678	self.info("Best match: %s", dist)
	679	return dist.clone(location=dist.download_location)
	680
	681	def fetch(self, requirement, tmpdir, force_scan=False, source=False):
	682	"""Obtain a file suitable for fulfilling `requirement`
	683
	684	DEPRECATED; use the ``fetch_distribution()`` method now instead. For
	685	backward compatibility, this routine is identical but returns the
	686	``location`` of the downloaded distribution instead of a distribution
	687	object.
	688	"""
	689	dist = self.fetch_distribution(requirement, tmpdir, force_scan, source)
	690	if dist is not None:
	691	return dist.location
	692	return None
	693
	694	def gen_setup(self, filename, fragment, tmpdir):
	695	match = EGG_FRAGMENT.match(fragment)
	696	dists = (
	697	match
	698	and [
	699	d
	700	for d in interpret_distro_name(filename, match.group(1), None)
	701	if d.version
	702	]
	703	or []
	704	)
	705
	706	if len(dists) == 1: # unambiguous ``#egg`` fragment
	707	basename = os.path.basename(filename)
	708
	709	# Make sure the file has been downloaded to the temp dir.
	710	if os.path.dirname(filename) != tmpdir:
	711	dst = os.path.join(tmpdir, basename)
	712	if not (os.path.exists(dst) and os.path.samefile(filename, dst)):
	713	shutil.copy2(filename, dst)
	714	filename = dst
	715
	716	with open(os.path.join(tmpdir, 'setup.py'), 'w') as file:
	717	file.write(
	718	"from setuptools import setup\n"
	719	"setup(name=%r, version=%r, py_modules=[%r])\n"
	720	% (
	721	dists[0].project_name,
	722	dists[0].version,
	723	os.path.splitext(basename)[0],
	724	)
	725	)
	726	return filename
	727
	728	elif match:
	729	raise DistutilsError(
	730	"Can't unambiguously interpret project/version identifier %r; "
	731	"any dashes in the name or version should be escaped using "
	732	"underscores. %r" % (fragment, dists)
	733	)
	734	else:
	735	raise DistutilsError(
	736	"Can't process plain .py files without an '#egg=name-version'"
	737	" suffix to enable automatic setup script generation."
	738	)
	739
	740	dl_blocksize = 8192
	741
	742	def _download_to(self, url, filename):
	743	self.info("Downloading %s", url)
	744	# Download the file
	745	fp = None
	746	try:
	747	checker = HashChecker.from_url(url)
	748	fp = self.open_url(url)
	749	if isinstance(fp, urllib.error.HTTPError):
	750	raise DistutilsError(
	751	"Can't download %s: %s %s" % (url, fp.code, fp.msg)
	752	)
	753	headers = fp.info()
	754	blocknum = 0
	755	bs = self.dl_blocksize
	756	size = -1
	757	if "content-length" in headers:
	758	# Some servers return multiple Content-Length headers :(
	759	sizes = headers.get_all('Content-Length')
	760	size = max(map(int, sizes))
	761	self.reporthook(url, filename, blocknum, bs, size)
	762	with open(filename, 'wb') as tfp:
	763	while True:
	764	block = fp.read(bs)
	765	if block:
	766	checker.feed(block)
	767	tfp.write(block)
	768	blocknum += 1
	769	self.reporthook(url, filename, blocknum, bs, size)
	770	else:
	771	break
	772	self.check_hash(checker, filename, tfp)
	773	return headers
	774	finally:
	775	if fp:
	776	fp.close()
	777
	778	def reporthook(self, url, filename, blocknum, blksize, size):
	779	pass # no-op
	780
	781	# FIXME:
	782	def open_url(self, url, warning=None): # noqa: C901 # is too complex (12)
	783	if url.startswith('file:'):
	784	return local_open(url)
	785	try:
	786	return open_with_auth(url, self.opener)
	787	except (ValueError, http.client.InvalidURL) as v:
	788	msg = ' '.join([str(arg) for arg in v.args])
	789	if warning:
	790	self.warn(warning, msg)
	791	else:
	792	raise DistutilsError('%s %s' % (url, msg)) from v
	793	except urllib.error.HTTPError as v:
	794	return v
	795	except urllib.error.URLError as v:
	796	if warning:
	797	self.warn(warning, v.reason)
	798	else:
	799	raise DistutilsError(
	800	"Download error for %s: %s" % (url, v.reason)
	801	) from v
	802	except http.client.BadStatusLine as v:
	803	if warning:
	804	self.warn(warning, v.line)
	805	else:
	806	raise DistutilsError(
	807	'%s returned a bad status line. The server might be '
	808	'down, %s' % (url, v.line)
	809	) from v
	810	except (http.client.HTTPException, socket.error) as v:
	811	if warning:
	812	self.warn(warning, v)
	813	else:
	814	raise DistutilsError("Download error for %s: %s" % (url, v)) from v
	815
	816	def _download_url(self, scheme, url, tmpdir):
	817	# Determine download filename
	818	#
	819	name, fragment = egg_info_for_url(url)
	820	if name:
	821	while '..' in name:
	822	name = name.replace('..', '.').replace('\\', '_')
	823	else:
	824	name = "__downloaded__" # default if URL has no path contents
	825
	826	if name.endswith('.egg.zip'):
	827	name = name[:-4] # strip the extra .zip before download
	828
	829	filename = os.path.join(tmpdir, name)
	830
	831	# Download the file
	832	#
	833	if scheme == 'svn' or scheme.startswith('svn+'):
	834	return self._download_svn(url, filename)
	835	elif scheme == 'git' or scheme.startswith('git+'):
	836	return self._download_git(url, filename)
	837	elif scheme.startswith('hg+'):
	838	return self._download_hg(url, filename)
	839	elif scheme == 'file':
	840	return urllib.request.url2pathname(urllib.parse.urlparse(url)[2])
	841	else:
	842	self.url_ok(url, True) # raises error if not allowed
	843	return self._attempt_download(url, filename)
	844
	845	def scan_url(self, url):
	846	self.process_url(url, True)
	847
	848	def _attempt_download(self, url, filename):
	849	headers = self._download_to(url, filename)
	850	if 'html' in headers.get('content-type', '').lower():
	851	return self._download_html(url, headers, filename)
	852	else:
	853	return filename
	854
	855	def _download_html(self, url, headers, filename):
	856	file = open(filename)
	857	for line in file:
	858	if line.strip():
	859	# Check for a subversion index page
	860	if re.search(r'<title>([^- ]+ - )?Revision \d+:', line):
	861	# it's a subversion index page:
	862	file.close()
	863	os.unlink(filename)
	864	return self._download_svn(url, filename)
	865	break # not an index page
	866	file.close()
	867	os.unlink(filename)
	868	raise DistutilsError("Unexpected HTML page found at " + url)
	869
	870	def _download_svn(self, url, filename):
	871	warnings.warn("SVN download support is deprecated", UserWarning)
	872	url = url.split('#', 1)[0] # remove any fragment for svn's sake
	873	creds = ''
	874	if url.lower().startswith('svn:') and '@' in url:
	875	scheme, netloc, path, p, q, f = urllib.parse.urlparse(url)
	876	if not netloc and path.startswith('//') and '/' in path[2:]:
	877	netloc, path = path[2:].split('/', 1)
	878	auth, host = _splituser(netloc)
	879	if auth:
	880	if ':' in auth:
	881	user, pw = auth.split(':', 1)
	882	creds = " --username=%s --password=%s" % (user, pw)
	883	else:
	884	creds = " --username=" + auth
	885	netloc = host
	886	parts = scheme, netloc, url, p, q, f
	887	url = urllib.parse.urlunparse(parts)
	888	self.info("Doing subversion checkout from %s to %s", url, filename)
	889	os.system("svn checkout%s -q %s %s" % (creds, url, filename))
	890	return filename
	891
	892	@staticmethod
	893	def _vcs_split_rev_from_url(url, pop_prefix=False):
	894	scheme, netloc, path, query, frag = urllib.parse.urlsplit(url)
	895
	896	scheme = scheme.split('+', 1)[-1]
	897
	898	# Some fragment identification fails
	899	path = path.split('#', 1)[0]
	900
	901	rev = None
	902	if '@' in path:
	903	path, rev = path.rsplit('@', 1)
	904
	905	# Also, discard fragment
	906	url = urllib.parse.urlunsplit((scheme, netloc, path, query, ''))
	907
	908	return url, rev
	909
	910	def _download_git(self, url, filename):
	911	filename = filename.split('#', 1)[0]
	912	url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
	913
	914	self.info("Doing git clone from %s to %s", url, filename)
	915	os.system("git clone --quiet %s %s" % (url, filename))
	916
	917	if rev is not None:
	918	self.info("Checking out %s", rev)
	919	os.system(
	920	"git -C %s checkout --quiet %s"
	921	% (
	922	filename,
	923	rev,
	924	)
	925	)
	926
	927	return filename
	928
	929	def _download_hg(self, url, filename):
	930	filename = filename.split('#', 1)[0]
	931	url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
	932
	933	self.info("Doing hg clone from %s to %s", url, filename)
	934	os.system("hg clone --quiet %s %s" % (url, filename))
	935
	936	if rev is not None:
	937	self.info("Updating to %s", rev)
	938	os.system(
	939	"hg --cwd %s up -C -r %s -q"
	940	% (
	941	filename,
	942	rev,
	943	)
	944	)
	945
	946	return filename
	947
	948	def debug(self, msg, *args):
	949	log.debug(msg, *args)
	950
	951	def info(self, msg, *args):
	952	log.info(msg, *args)
	953
	954	def warn(self, msg, *args):
	955	log.warn(msg, *args)
	956
	957
	958	# This pattern matches a character entity reference (a decimal numeric
	959	# references, a hexadecimal numeric reference, or a named reference).
	960	entity_sub = re.compile(r'&(#(\d+\|x[\da-fA-F]+)\|[\w.:-]+);?').sub
	961
	962
	963	def decode_entity(match):
	964	what = match.group(0)
	965	return html.unescape(what)
	966
	967
	968	def htmldecode(text):
	969	"""
	970	Decode HTML entities in the given text.
	971
	972	>>> htmldecode(
	973	... 'https://../package_name-0.1.2.tar.gz'
	974	... '?tokena=A&tokenb=B">package_name-0.1.2.tar.gz')
	975	'https://../package_name-0.1.2.tar.gz?tokena=A&tokenb=B">package_name-0.1.2.tar.gz'
	976	"""
	977	return entity_sub(decode_entity, text)
	978
	979
	980	def socket_timeout(timeout=15):
	981	def _socket_timeout(func):
	982	def _socket_timeout(args, *kwargs):
	983	old_timeout = socket.getdefaulttimeout()
	984	socket.setdefaulttimeout(timeout)
	985	try:
	986	return func(args, *kwargs)
	987	finally:
	988	socket.setdefaulttimeout(old_timeout)
	989
	990	return _socket_timeout
	991
	992	return _socket_timeout
	993
	994
	995	def _encode_auth(auth):
	996	"""
	997	Encode auth from a URL suitable for an HTTP header.
	998	>>> str(_encode_auth('username%3Apassword'))
	999	'dXNlcm5hbWU6cGFzc3dvcmQ='
	1000
	1001	Long auth strings should not cause a newline to be inserted.
	1002	>>> long_auth = 'username:' + 'password'*10
	1003	>>> chr(10) in str(_encode_auth(long_auth))
	1004	False
	1005	"""
	1006	auth_s = urllib.parse.unquote(auth)
	1007	# convert to bytes
	1008	auth_bytes = auth_s.encode()
	1009	encoded_bytes = base64.b64encode(auth_bytes)
	1010	# convert back to a string
	1011	encoded = encoded_bytes.decode()
	1012	# strip the trailing carriage return
	1013	return encoded.replace('\n', '')
	1014
	1015
	1016	class Credential:
	1017	"""
	1018	A username/password pair. Use like a namedtuple.
	1019	"""
	1020
	1021	def __init__(self, username, password):
	1022	self.username = username
	1023	self.password = password
	1024
	1025	def __iter__(self):
	1026	yield self.username
	1027	yield self.password
	1028
	1029	def __str__(self):
	1030	return '%(username)s:%(password)s' % vars(self)
	1031
	1032
	1033	class PyPIConfig(configparser.RawConfigParser):
	1034	def __init__(self):
	1035	"""
	1036	Load from ~/.pypirc
	1037	"""
	1038	defaults = dict.fromkeys(['username', 'password', 'repository'], '')
	1039	super().__init__(defaults)
	1040
	1041	rc = os.path.join(os.path.expanduser('~'), '.pypirc')
	1042	if os.path.exists(rc):
	1043	self.read(rc)
	1044
	1045	@property
	1046	def creds_by_repository(self):
	1047	sections_with_repositories = [
	1048	section
	1049	for section in self.sections()
	1050	if self.get(section, 'repository').strip()
	1051	]
	1052
	1053	return dict(map(self._get_repo_cred, sections_with_repositories))
	1054
	1055	def _get_repo_cred(self, section):
	1056	repo = self.get(section, 'repository').strip()
	1057	return repo, Credential(
	1058	self.get(section, 'username').strip(),
	1059	self.get(section, 'password').strip(),
	1060	)
	1061
	1062	def find_credential(self, url):
	1063	"""
	1064	If the URL indicated appears to be a repository defined in this
	1065	config, return the credential for that repository.
	1066	"""
	1067	for repository, cred in self.creds_by_repository.items():
	1068	if url.startswith(repository):
	1069	return cred
	1070
	1071
	1072	def open_with_auth(url, opener=urllib.request.urlopen):
	1073	"""Open a urllib2 request, handling HTTP authentication"""
	1074
	1075	parsed = urllib.parse.urlparse(url)
	1076	scheme, netloc, path, params, query, frag = parsed
	1077
	1078	# Double scheme does not raise on macOS as revealed by a
	1079	# failing test. We would expect "nonnumeric port". Refs #20.
	1080	if netloc.endswith(':'):
	1081	raise http.client.InvalidURL("nonnumeric port: ''")
	1082
	1083	if scheme in ('http', 'https'):
	1084	auth, address = _splituser(netloc)
	1085	else:
	1086	auth = None
	1087
	1088	if not auth:
	1089	cred = PyPIConfig().find_credential(url)
	1090	if cred:
	1091	auth = str(cred)
	1092	info = cred.username, url
	1093	log.info('Authenticating as %s for %s (from .pypirc)', *info)
	1094
	1095	if auth:
	1096	auth = "Basic " + _encode_auth(auth)
	1097	parts = scheme, address, path, params, query, frag
	1098	new_url = urllib.parse.urlunparse(parts)
	1099	request = urllib.request.Request(new_url)
	1100	request.add_header("Authorization", auth)
	1101	else:
	1102	request = urllib.request.Request(url)
	1103
	1104	request.add_header('User-Agent', user_agent)
	1105	fp = opener(request)
	1106
	1107	if auth:
	1108	# Put authentication info back into request URL if same host,
	1109	# so that links found on the page will work
	1110	s2, h2, path2, param2, query2, frag2 = urllib.parse.urlparse(fp.url)
	1111	if s2 == scheme and h2 == address:
	1112	parts = s2, netloc, path2, param2, query2, frag2
	1113	fp.url = urllib.parse.urlunparse(parts)
	1114
	1115	return fp
	1116
	1117
	1118	# copy of urllib.parse._splituser from Python 3.8
	1119	def _splituser(host):
	1120	"""splituser('user[:passwd]@host[:port]')
	1121	--> 'user[:passwd]', 'host[:port]'."""
	1122	user, delim, host = host.rpartition('@')
	1123	return (user if delim else None), host
	1124
	1125
	1126	# adding a timeout to avoid freezing package_index
	1127	open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth)
	1128
	1129
	1130	def fix_sf_url(url):
	1131	return url # backward compatibility
	1132
	1133
	1134	def local_open(url):
	1135	"""Read a local path, with special support for directories"""
	1136	scheme, server, path, param, query, frag = urllib.parse.urlparse(url)
	1137	filename = urllib.request.url2pathname(path)
	1138	if os.path.isfile(filename):
	1139	return urllib.request.urlopen(url)
	1140	elif path.endswith('/') and os.path.isdir(filename):
	1141	files = []
	1142	for f in os.listdir(filename):
	1143	filepath = os.path.join(filename, f)
	1144	if f == 'index.html':
	1145	with open(filepath, 'r') as fp:
	1146	body = fp.read()
	1147	break
	1148	elif os.path.isdir(filepath):
	1149	f += '/'
	1150	files.append('<a href="{name}">{name}</a>'.format(name=f))
	1151	else:
	1152	tmpl = (
	1153	"<html><head><title>{url}</title>" "</head><body>{files}</body></html>"
	1154	)
	1155	body = tmpl.format(url=url, files='\n'.join(files))
	1156	status, message = 200, "OK"
	1157	else:
	1158	status, message, body = 404, "Path not found", "Not found"
	1159
	1160	headers = {'content-type': 'text/html'}
	1161	body_stream = io.StringIO(body)
	1162	return urllib.error.HTTPError(url, status, message, headers, body_stream)