]>
Commit | Line | Data |
---|---|---|
1 | import email.feedparser | |
2 | import email.header | |
3 | import email.message | |
4 | import email.parser | |
5 | import email.policy | |
6 | import sys | |
7 | import typing | |
8 | from typing import ( | |
9 | Any, | |
10 | Callable, | |
11 | Dict, | |
12 | Generic, | |
13 | List, | |
14 | Optional, | |
15 | Tuple, | |
16 | Type, | |
17 | Union, | |
18 | cast, | |
19 | ) | |
20 | ||
21 | from . import requirements, specifiers, utils, version as version_module | |
22 | ||
23 | T = typing.TypeVar("T") | |
24 | if sys.version_info[:2] >= (3, 8): # pragma: no cover | |
25 | from typing import Literal, TypedDict | |
26 | else: # pragma: no cover | |
27 | if typing.TYPE_CHECKING: | |
28 | from typing_extensions import Literal, TypedDict | |
29 | else: | |
30 | try: | |
31 | from typing_extensions import Literal, TypedDict | |
32 | except ImportError: | |
33 | ||
34 | class Literal: | |
35 | def __init_subclass__(*_args, **_kwargs): | |
36 | pass | |
37 | ||
38 | class TypedDict: | |
39 | def __init_subclass__(*_args, **_kwargs): | |
40 | pass | |
41 | ||
42 | ||
43 | try: | |
44 | ExceptionGroup = __builtins__.ExceptionGroup # type: ignore[attr-defined] | |
45 | except AttributeError: | |
46 | ||
47 | class ExceptionGroup(Exception): # type: ignore[no-redef] # noqa: N818 | |
48 | """A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11. | |
49 | ||
50 | If :external:exc:`ExceptionGroup` is already defined by Python itself, | |
51 | that version is used instead. | |
52 | """ | |
53 | ||
54 | message: str | |
55 | exceptions: List[Exception] | |
56 | ||
57 | def __init__(self, message: str, exceptions: List[Exception]) -> None: | |
58 | self.message = message | |
59 | self.exceptions = exceptions | |
60 | ||
61 | def __repr__(self) -> str: | |
62 | return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})" | |
63 | ||
64 | ||
65 | class InvalidMetadata(ValueError): | |
66 | """A metadata field contains invalid data.""" | |
67 | ||
68 | field: str | |
69 | """The name of the field that contains invalid data.""" | |
70 | ||
71 | def __init__(self, field: str, message: str) -> None: | |
72 | self.field = field | |
73 | super().__init__(message) | |
74 | ||
75 | ||
76 | # The RawMetadata class attempts to make as few assumptions about the underlying | |
77 | # serialization formats as possible. The idea is that as long as a serialization | |
78 | # formats offer some very basic primitives in *some* way then we can support | |
79 | # serializing to and from that format. | |
80 | class RawMetadata(TypedDict, total=False): | |
81 | """A dictionary of raw core metadata. | |
82 | ||
83 | Each field in core metadata maps to a key of this dictionary (when data is | |
84 | provided). The key is lower-case and underscores are used instead of dashes | |
85 | compared to the equivalent core metadata field. Any core metadata field that | |
86 | can be specified multiple times or can hold multiple values in a single | |
87 | field have a key with a plural name. See :class:`Metadata` whose attributes | |
88 | match the keys of this dictionary. | |
89 | ||
90 | Core metadata fields that can be specified multiple times are stored as a | |
91 | list or dict depending on which is appropriate for the field. Any fields | |
92 | which hold multiple values in a single field are stored as a list. | |
93 | ||
94 | """ | |
95 | ||
96 | # Metadata 1.0 - PEP 241 | |
97 | metadata_version: str | |
98 | name: str | |
99 | version: str | |
100 | platforms: List[str] | |
101 | summary: str | |
102 | description: str | |
103 | keywords: List[str] | |
104 | home_page: str | |
105 | author: str | |
106 | author_email: str | |
107 | license: str | |
108 | ||
109 | # Metadata 1.1 - PEP 314 | |
110 | supported_platforms: List[str] | |
111 | download_url: str | |
112 | classifiers: List[str] | |
113 | requires: List[str] | |
114 | provides: List[str] | |
115 | obsoletes: List[str] | |
116 | ||
117 | # Metadata 1.2 - PEP 345 | |
118 | maintainer: str | |
119 | maintainer_email: str | |
120 | requires_dist: List[str] | |
121 | provides_dist: List[str] | |
122 | obsoletes_dist: List[str] | |
123 | requires_python: str | |
124 | requires_external: List[str] | |
125 | project_urls: Dict[str, str] | |
126 | ||
127 | # Metadata 2.0 | |
128 | # PEP 426 attempted to completely revamp the metadata format | |
129 | # but got stuck without ever being able to build consensus on | |
130 | # it and ultimately ended up withdrawn. | |
131 | # | |
132 | # However, a number of tools had started emitting METADATA with | |
133 | # `2.0` Metadata-Version, so for historical reasons, this version | |
134 | # was skipped. | |
135 | ||
136 | # Metadata 2.1 - PEP 566 | |
137 | description_content_type: str | |
138 | provides_extra: List[str] | |
139 | ||
140 | # Metadata 2.2 - PEP 643 | |
141 | dynamic: List[str] | |
142 | ||
143 | # Metadata 2.3 - PEP 685 | |
144 | # No new fields were added in PEP 685, just some edge case were | |
145 | # tightened up to provide better interoptability. | |
146 | ||
147 | ||
148 | _STRING_FIELDS = { | |
149 | "author", | |
150 | "author_email", | |
151 | "description", | |
152 | "description_content_type", | |
153 | "download_url", | |
154 | "home_page", | |
155 | "license", | |
156 | "maintainer", | |
157 | "maintainer_email", | |
158 | "metadata_version", | |
159 | "name", | |
160 | "requires_python", | |
161 | "summary", | |
162 | "version", | |
163 | } | |
164 | ||
165 | _LIST_FIELDS = { | |
166 | "classifiers", | |
167 | "dynamic", | |
168 | "obsoletes", | |
169 | "obsoletes_dist", | |
170 | "platforms", | |
171 | "provides", | |
172 | "provides_dist", | |
173 | "provides_extra", | |
174 | "requires", | |
175 | "requires_dist", | |
176 | "requires_external", | |
177 | "supported_platforms", | |
178 | } | |
179 | ||
180 | _DICT_FIELDS = { | |
181 | "project_urls", | |
182 | } | |
183 | ||
184 | ||
185 | def _parse_keywords(data: str) -> List[str]: | |
186 | """Split a string of comma-separate keyboards into a list of keywords.""" | |
187 | return [k.strip() for k in data.split(",")] | |
188 | ||
189 | ||
190 | def _parse_project_urls(data: List[str]) -> Dict[str, str]: | |
191 | """Parse a list of label/URL string pairings separated by a comma.""" | |
192 | urls = {} | |
193 | for pair in data: | |
194 | # Our logic is slightly tricky here as we want to try and do | |
195 | # *something* reasonable with malformed data. | |
196 | # | |
197 | # The main thing that we have to worry about, is data that does | |
198 | # not have a ',' at all to split the label from the Value. There | |
199 | # isn't a singular right answer here, and we will fail validation | |
200 | # later on (if the caller is validating) so it doesn't *really* | |
201 | # matter, but since the missing value has to be an empty str | |
202 | # and our return value is dict[str, str], if we let the key | |
203 | # be the missing value, then they'd have multiple '' values that | |
204 | # overwrite each other in a accumulating dict. | |
205 | # | |
206 | # The other potentional issue is that it's possible to have the | |
207 | # same label multiple times in the metadata, with no solid "right" | |
208 | # answer with what to do in that case. As such, we'll do the only | |
209 | # thing we can, which is treat the field as unparseable and add it | |
210 | # to our list of unparsed fields. | |
211 | parts = [p.strip() for p in pair.split(",", 1)] | |
212 | parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items | |
213 | ||
214 | # TODO: The spec doesn't say anything about if the keys should be | |
215 | # considered case sensitive or not... logically they should | |
216 | # be case-preserving and case-insensitive, but doing that | |
217 | # would open up more cases where we might have duplicate | |
218 | # entries. | |
219 | label, url = parts | |
220 | if label in urls: | |
221 | # The label already exists in our set of urls, so this field | |
222 | # is unparseable, and we can just add the whole thing to our | |
223 | # unparseable data and stop processing it. | |
224 | raise KeyError("duplicate labels in project urls") | |
225 | urls[label] = url | |
226 | ||
227 | return urls | |
228 | ||
229 | ||
230 | def _get_payload(msg: email.message.Message, source: Union[bytes, str]) -> str: | |
231 | """Get the body of the message.""" | |
232 | # If our source is a str, then our caller has managed encodings for us, | |
233 | # and we don't need to deal with it. | |
234 | if isinstance(source, str): | |
235 | payload: str = msg.get_payload() | |
236 | return payload | |
237 | # If our source is a bytes, then we're managing the encoding and we need | |
238 | # to deal with it. | |
239 | else: | |
240 | bpayload: bytes = msg.get_payload(decode=True) | |
241 | try: | |
242 | return bpayload.decode("utf8", "strict") | |
243 | except UnicodeDecodeError: | |
244 | raise ValueError("payload in an invalid encoding") | |
245 | ||
246 | ||
247 | # The various parse_FORMAT functions here are intended to be as lenient as | |
248 | # possible in their parsing, while still returning a correctly typed | |
249 | # RawMetadata. | |
250 | # | |
251 | # To aid in this, we also generally want to do as little touching of the | |
252 | # data as possible, except where there are possibly some historic holdovers | |
253 | # that make valid data awkward to work with. | |
254 | # | |
255 | # While this is a lower level, intermediate format than our ``Metadata`` | |
256 | # class, some light touch ups can make a massive difference in usability. | |
257 | ||
258 | # Map METADATA fields to RawMetadata. | |
259 | _EMAIL_TO_RAW_MAPPING = { | |
260 | "author": "author", | |
261 | "author-email": "author_email", | |
262 | "classifier": "classifiers", | |
263 | "description": "description", | |
264 | "description-content-type": "description_content_type", | |
265 | "download-url": "download_url", | |
266 | "dynamic": "dynamic", | |
267 | "home-page": "home_page", | |
268 | "keywords": "keywords", | |
269 | "license": "license", | |
270 | "maintainer": "maintainer", | |
271 | "maintainer-email": "maintainer_email", | |
272 | "metadata-version": "metadata_version", | |
273 | "name": "name", | |
274 | "obsoletes": "obsoletes", | |
275 | "obsoletes-dist": "obsoletes_dist", | |
276 | "platform": "platforms", | |
277 | "project-url": "project_urls", | |
278 | "provides": "provides", | |
279 | "provides-dist": "provides_dist", | |
280 | "provides-extra": "provides_extra", | |
281 | "requires": "requires", | |
282 | "requires-dist": "requires_dist", | |
283 | "requires-external": "requires_external", | |
284 | "requires-python": "requires_python", | |
285 | "summary": "summary", | |
286 | "supported-platform": "supported_platforms", | |
287 | "version": "version", | |
288 | } | |
289 | _RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()} | |
290 | ||
291 | ||
292 | def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[str]]]: | |
293 | """Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``). | |
294 | ||
295 | This function returns a two-item tuple of dicts. The first dict is of | |
296 | recognized fields from the core metadata specification. Fields that can be | |
297 | parsed and translated into Python's built-in types are converted | |
298 | appropriately. All other fields are left as-is. Fields that are allowed to | |
299 | appear multiple times are stored as lists. | |
300 | ||
301 | The second dict contains all other fields from the metadata. This includes | |
302 | any unrecognized fields. It also includes any fields which are expected to | |
303 | be parsed into a built-in type but were not formatted appropriately. Finally, | |
304 | any fields that are expected to appear only once but are repeated are | |
305 | included in this dict. | |
306 | ||
307 | """ | |
308 | raw: Dict[str, Union[str, List[str], Dict[str, str]]] = {} | |
309 | unparsed: Dict[str, List[str]] = {} | |
310 | ||
311 | if isinstance(data, str): | |
312 | parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data) | |
313 | else: | |
314 | parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data) | |
315 | ||
316 | # We have to wrap parsed.keys() in a set, because in the case of multiple | |
317 | # values for a key (a list), the key will appear multiple times in the | |
318 | # list of keys, but we're avoiding that by using get_all(). | |
319 | for name in frozenset(parsed.keys()): | |
320 | # Header names in RFC are case insensitive, so we'll normalize to all | |
321 | # lower case to make comparisons easier. | |
322 | name = name.lower() | |
323 | ||
324 | # We use get_all() here, even for fields that aren't multiple use, | |
325 | # because otherwise someone could have e.g. two Name fields, and we | |
326 | # would just silently ignore it rather than doing something about it. | |
327 | headers = parsed.get_all(name) or [] | |
328 | ||
329 | # The way the email module works when parsing bytes is that it | |
330 | # unconditionally decodes the bytes as ascii using the surrogateescape | |
331 | # handler. When you pull that data back out (such as with get_all() ), | |
332 | # it looks to see if the str has any surrogate escapes, and if it does | |
333 | # it wraps it in a Header object instead of returning the string. | |
334 | # | |
335 | # As such, we'll look for those Header objects, and fix up the encoding. | |
336 | value = [] | |
337 | # Flag if we have run into any issues processing the headers, thus | |
338 | # signalling that the data belongs in 'unparsed'. | |
339 | valid_encoding = True | |
340 | for h in headers: | |
341 | # It's unclear if this can return more types than just a Header or | |
342 | # a str, so we'll just assert here to make sure. | |
343 | assert isinstance(h, (email.header.Header, str)) | |
344 | ||
345 | # If it's a header object, we need to do our little dance to get | |
346 | # the real data out of it. In cases where there is invalid data | |
347 | # we're going to end up with mojibake, but there's no obvious, good | |
348 | # way around that without reimplementing parts of the Header object | |
349 | # ourselves. | |
350 | # | |
351 | # That should be fine since, if mojibacked happens, this key is | |
352 | # going into the unparsed dict anyways. | |
353 | if isinstance(h, email.header.Header): | |
354 | # The Header object stores it's data as chunks, and each chunk | |
355 | # can be independently encoded, so we'll need to check each | |
356 | # of them. | |
357 | chunks: List[Tuple[bytes, Optional[str]]] = [] | |
358 | for bin, encoding in email.header.decode_header(h): | |
359 | try: | |
360 | bin.decode("utf8", "strict") | |
361 | except UnicodeDecodeError: | |
362 | # Enable mojibake. | |
363 | encoding = "latin1" | |
364 | valid_encoding = False | |
365 | else: | |
366 | encoding = "utf8" | |
367 | chunks.append((bin, encoding)) | |
368 | ||
369 | # Turn our chunks back into a Header object, then let that | |
370 | # Header object do the right thing to turn them into a | |
371 | # string for us. | |
372 | value.append(str(email.header.make_header(chunks))) | |
373 | # This is already a string, so just add it. | |
374 | else: | |
375 | value.append(h) | |
376 | ||
377 | # We've processed all of our values to get them into a list of str, | |
378 | # but we may have mojibake data, in which case this is an unparsed | |
379 | # field. | |
380 | if not valid_encoding: | |
381 | unparsed[name] = value | |
382 | continue | |
383 | ||
384 | raw_name = _EMAIL_TO_RAW_MAPPING.get(name) | |
385 | if raw_name is None: | |
386 | # This is a bit of a weird situation, we've encountered a key that | |
387 | # we don't know what it means, so we don't know whether it's meant | |
388 | # to be a list or not. | |
389 | # | |
390 | # Since we can't really tell one way or another, we'll just leave it | |
391 | # as a list, even though it may be a single item list, because that's | |
392 | # what makes the most sense for email headers. | |
393 | unparsed[name] = value | |
394 | continue | |
395 | ||
396 | # If this is one of our string fields, then we'll check to see if our | |
397 | # value is a list of a single item. If it is then we'll assume that | |
398 | # it was emitted as a single string, and unwrap the str from inside | |
399 | # the list. | |
400 | # | |
401 | # If it's any other kind of data, then we haven't the faintest clue | |
402 | # what we should parse it as, and we have to just add it to our list | |
403 | # of unparsed stuff. | |
404 | if raw_name in _STRING_FIELDS and len(value) == 1: | |
405 | raw[raw_name] = value[0] | |
406 | # If this is one of our list of string fields, then we can just assign | |
407 | # the value, since email *only* has strings, and our get_all() call | |
408 | # above ensures that this is a list. | |
409 | elif raw_name in _LIST_FIELDS: | |
410 | raw[raw_name] = value | |
411 | # Special Case: Keywords | |
412 | # The keywords field is implemented in the metadata spec as a str, | |
413 | # but it conceptually is a list of strings, and is serialized using | |
414 | # ", ".join(keywords), so we'll do some light data massaging to turn | |
415 | # this into what it logically is. | |
416 | elif raw_name == "keywords" and len(value) == 1: | |
417 | raw[raw_name] = _parse_keywords(value[0]) | |
418 | # Special Case: Project-URL | |
419 | # The project urls is implemented in the metadata spec as a list of | |
420 | # specially-formatted strings that represent a key and a value, which | |
421 | # is fundamentally a mapping, however the email format doesn't support | |
422 | # mappings in a sane way, so it was crammed into a list of strings | |
423 | # instead. | |
424 | # | |
425 | # We will do a little light data massaging to turn this into a map as | |
426 | # it logically should be. | |
427 | elif raw_name == "project_urls": | |
428 | try: | |
429 | raw[raw_name] = _parse_project_urls(value) | |
430 | except KeyError: | |
431 | unparsed[name] = value | |
432 | # Nothing that we've done has managed to parse this, so it'll just | |
433 | # throw it in our unparseable data and move on. | |
434 | else: | |
435 | unparsed[name] = value | |
436 | ||
437 | # We need to support getting the Description from the message payload in | |
438 | # addition to getting it from the the headers. This does mean, though, there | |
439 | # is the possibility of it being set both ways, in which case we put both | |
440 | # in 'unparsed' since we don't know which is right. | |
441 | try: | |
442 | payload = _get_payload(parsed, data) | |
443 | except ValueError: | |
444 | unparsed.setdefault("description", []).append( | |
445 | parsed.get_payload(decode=isinstance(data, bytes)) | |
446 | ) | |
447 | else: | |
448 | if payload: | |
449 | # Check to see if we've already got a description, if so then both | |
450 | # it, and this body move to unparseable. | |
451 | if "description" in raw: | |
452 | description_header = cast(str, raw.pop("description")) | |
453 | unparsed.setdefault("description", []).extend( | |
454 | [description_header, payload] | |
455 | ) | |
456 | elif "description" in unparsed: | |
457 | unparsed["description"].append(payload) | |
458 | else: | |
459 | raw["description"] = payload | |
460 | ||
461 | # We need to cast our `raw` to a metadata, because a TypedDict only support | |
462 | # literal key names, but we're computing our key names on purpose, but the | |
463 | # way this function is implemented, our `TypedDict` can only have valid key | |
464 | # names. | |
465 | return cast(RawMetadata, raw), unparsed | |
466 | ||
467 | ||
468 | _NOT_FOUND = object() | |
469 | ||
470 | ||
471 | # Keep the two values in sync. | |
472 | _VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"] | |
473 | _MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"] | |
474 | ||
475 | _REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"]) | |
476 | ||
477 | ||
478 | class _Validator(Generic[T]): | |
479 | """Validate a metadata field. | |
480 | ||
481 | All _process_*() methods correspond to a core metadata field. The method is | |
482 | called with the field's raw value. If the raw value is valid it is returned | |
483 | in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field). | |
484 | If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause | |
485 | as appropriate). | |
486 | """ | |
487 | ||
488 | name: str | |
489 | raw_name: str | |
490 | added: _MetadataVersion | |
491 | ||
492 | def __init__( | |
493 | self, | |
494 | *, | |
495 | added: _MetadataVersion = "1.0", | |
496 | ) -> None: | |
497 | self.added = added | |
498 | ||
499 | def __set_name__(self, _owner: "Metadata", name: str) -> None: | |
500 | self.name = name | |
501 | self.raw_name = _RAW_TO_EMAIL_MAPPING[name] | |
502 | ||
503 | def __get__(self, instance: "Metadata", _owner: Type["Metadata"]) -> T: | |
504 | # With Python 3.8, the caching can be replaced with functools.cached_property(). | |
505 | # No need to check the cache as attribute lookup will resolve into the | |
506 | # instance's __dict__ before __get__ is called. | |
507 | cache = instance.__dict__ | |
508 | try: | |
509 | value = instance._raw[self.name] # type: ignore[literal-required] | |
510 | except KeyError: | |
511 | if self.name in _STRING_FIELDS: | |
512 | value = "" | |
513 | elif self.name in _LIST_FIELDS: | |
514 | value = [] | |
515 | elif self.name in _DICT_FIELDS: | |
516 | value = {} | |
517 | else: # pragma: no cover | |
518 | assert False | |
519 | ||
520 | try: | |
521 | converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}") | |
522 | except AttributeError: | |
523 | pass | |
524 | else: | |
525 | value = converter(value) | |
526 | ||
527 | cache[self.name] = value | |
528 | try: | |
529 | del instance._raw[self.name] # type: ignore[misc] | |
530 | except KeyError: | |
531 | pass | |
532 | ||
533 | return cast(T, value) | |
534 | ||
535 | def _invalid_metadata( | |
536 | self, msg: str, cause: Optional[Exception] = None | |
537 | ) -> InvalidMetadata: | |
538 | exc = InvalidMetadata( | |
539 | self.raw_name, msg.format_map({"field": repr(self.raw_name)}) | |
540 | ) | |
541 | exc.__cause__ = cause | |
542 | return exc | |
543 | ||
544 | def _process_metadata_version(self, value: str) -> _MetadataVersion: | |
545 | # Implicitly makes Metadata-Version required. | |
546 | if value not in _VALID_METADATA_VERSIONS: | |
547 | raise self._invalid_metadata(f"{value!r} is not a valid metadata version") | |
548 | return cast(_MetadataVersion, value) | |
549 | ||
550 | def _process_name(self, value: str) -> str: | |
551 | if not value: | |
552 | raise self._invalid_metadata("{field} is a required field") | |
553 | # Validate the name as a side-effect. | |
554 | try: | |
555 | utils.canonicalize_name(value, validate=True) | |
556 | except utils.InvalidName as exc: | |
557 | raise self._invalid_metadata( | |
558 | f"{value!r} is invalid for {{field}}", cause=exc | |
559 | ) | |
560 | else: | |
561 | return value | |
562 | ||
563 | def _process_version(self, value: str) -> version_module.Version: | |
564 | if not value: | |
565 | raise self._invalid_metadata("{field} is a required field") | |
566 | try: | |
567 | return version_module.parse(value) | |
568 | except version_module.InvalidVersion as exc: | |
569 | raise self._invalid_metadata( | |
570 | f"{value!r} is invalid for {{field}}", cause=exc | |
571 | ) | |
572 | ||
573 | def _process_summary(self, value: str) -> str: | |
574 | """Check the field contains no newlines.""" | |
575 | if "\n" in value: | |
576 | raise self._invalid_metadata("{field} must be a single line") | |
577 | return value | |
578 | ||
579 | def _process_description_content_type(self, value: str) -> str: | |
580 | content_types = {"text/plain", "text/x-rst", "text/markdown"} | |
581 | message = email.message.EmailMessage() | |
582 | message["content-type"] = value | |
583 | ||
584 | content_type, parameters = ( | |
585 | # Defaults to `text/plain` if parsing failed. | |
586 | message.get_content_type().lower(), | |
587 | message["content-type"].params, | |
588 | ) | |
589 | # Check if content-type is valid or defaulted to `text/plain` and thus was | |
590 | # not parseable. | |
591 | if content_type not in content_types or content_type not in value.lower(): | |
592 | raise self._invalid_metadata( | |
593 | f"{{field}} must be one of {list(content_types)}, not {value!r}" | |
594 | ) | |
595 | ||
596 | charset = parameters.get("charset", "UTF-8") | |
597 | if charset != "UTF-8": | |
598 | raise self._invalid_metadata( | |
599 | f"{{field}} can only specify the UTF-8 charset, not {list(charset)}" | |
600 | ) | |
601 | ||
602 | markdown_variants = {"GFM", "CommonMark"} | |
603 | variant = parameters.get("variant", "GFM") # Use an acceptable default. | |
604 | if content_type == "text/markdown" and variant not in markdown_variants: | |
605 | raise self._invalid_metadata( | |
606 | f"valid Markdown variants for {{field}} are {list(markdown_variants)}, " | |
607 | f"not {variant!r}", | |
608 | ) | |
609 | return value | |
610 | ||
611 | def _process_dynamic(self, value: List[str]) -> List[str]: | |
612 | for dynamic_field in map(str.lower, value): | |
613 | if dynamic_field in {"name", "version", "metadata-version"}: | |
614 | raise self._invalid_metadata( | |
615 | f"{value!r} is not allowed as a dynamic field" | |
616 | ) | |
617 | elif dynamic_field not in _EMAIL_TO_RAW_MAPPING: | |
618 | raise self._invalid_metadata(f"{value!r} is not a valid dynamic field") | |
619 | return list(map(str.lower, value)) | |
620 | ||
621 | def _process_provides_extra( | |
622 | self, | |
623 | value: List[str], | |
624 | ) -> List[utils.NormalizedName]: | |
625 | normalized_names = [] | |
626 | try: | |
627 | for name in value: | |
628 | normalized_names.append(utils.canonicalize_name(name, validate=True)) | |
629 | except utils.InvalidName as exc: | |
630 | raise self._invalid_metadata( | |
631 | f"{name!r} is invalid for {{field}}", cause=exc | |
632 | ) | |
633 | else: | |
634 | return normalized_names | |
635 | ||
636 | def _process_requires_python(self, value: str) -> specifiers.SpecifierSet: | |
637 | try: | |
638 | return specifiers.SpecifierSet(value) | |
639 | except specifiers.InvalidSpecifier as exc: | |
640 | raise self._invalid_metadata( | |
641 | f"{value!r} is invalid for {{field}}", cause=exc | |
642 | ) | |
643 | ||
644 | def _process_requires_dist( | |
645 | self, | |
646 | value: List[str], | |
647 | ) -> List[requirements.Requirement]: | |
648 | reqs = [] | |
649 | try: | |
650 | for req in value: | |
651 | reqs.append(requirements.Requirement(req)) | |
652 | except requirements.InvalidRequirement as exc: | |
653 | raise self._invalid_metadata(f"{req!r} is invalid for {{field}}", cause=exc) | |
654 | else: | |
655 | return reqs | |
656 | ||
657 | ||
658 | class Metadata: | |
659 | """Representation of distribution metadata. | |
660 | ||
661 | Compared to :class:`RawMetadata`, this class provides objects representing | |
662 | metadata fields instead of only using built-in types. Any invalid metadata | |
663 | will cause :exc:`InvalidMetadata` to be raised (with a | |
664 | :py:attr:`~BaseException.__cause__` attribute as appropriate). | |
665 | """ | |
666 | ||
667 | _raw: RawMetadata | |
668 | ||
669 | @classmethod | |
670 | def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> "Metadata": | |
671 | """Create an instance from :class:`RawMetadata`. | |
672 | ||
673 | If *validate* is true, all metadata will be validated. All exceptions | |
674 | related to validation will be gathered and raised as an :class:`ExceptionGroup`. | |
675 | """ | |
676 | ins = cls() | |
677 | ins._raw = data.copy() # Mutations occur due to caching enriched values. | |
678 | ||
679 | if validate: | |
680 | exceptions: List[InvalidMetadata] = [] | |
681 | try: | |
682 | metadata_version = ins.metadata_version | |
683 | metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version) | |
684 | except InvalidMetadata as metadata_version_exc: | |
685 | exceptions.append(metadata_version_exc) | |
686 | metadata_version = None | |
687 | ||
688 | # Make sure to check for the fields that are present, the required | |
689 | # fields (so their absence can be reported). | |
690 | fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS | |
691 | # Remove fields that have already been checked. | |
692 | fields_to_check -= {"metadata_version"} | |
693 | ||
694 | for key in fields_to_check: | |
695 | try: | |
696 | if metadata_version: | |
697 | # Can't use getattr() as that triggers descriptor protocol which | |
698 | # will fail due to no value for the instance argument. | |
699 | try: | |
700 | field_metadata_version = cls.__dict__[key].added | |
701 | except KeyError: | |
702 | exc = InvalidMetadata(key, f"unrecognized field: {key!r}") | |
703 | exceptions.append(exc) | |
704 | continue | |
705 | field_age = _VALID_METADATA_VERSIONS.index( | |
706 | field_metadata_version | |
707 | ) | |
708 | if field_age > metadata_age: | |
709 | field = _RAW_TO_EMAIL_MAPPING[key] | |
710 | exc = InvalidMetadata( | |
711 | field, | |
712 | "{field} introduced in metadata version " | |
713 | "{field_metadata_version}, not {metadata_version}", | |
714 | ) | |
715 | exceptions.append(exc) | |
716 | continue | |
717 | getattr(ins, key) | |
718 | except InvalidMetadata as exc: | |
719 | exceptions.append(exc) | |
720 | ||
721 | if exceptions: | |
722 | raise ExceptionGroup("invalid metadata", exceptions) | |
723 | ||
724 | return ins | |
725 | ||
726 | @classmethod | |
727 | def from_email( | |
728 | cls, data: Union[bytes, str], *, validate: bool = True | |
729 | ) -> "Metadata": | |
730 | """Parse metadata from email headers. | |
731 | ||
732 | If *validate* is true, the metadata will be validated. All exceptions | |
733 | related to validation will be gathered and raised as an :class:`ExceptionGroup`. | |
734 | """ | |
735 | exceptions: list[InvalidMetadata] = [] | |
736 | raw, unparsed = parse_email(data) | |
737 | ||
738 | if validate: | |
739 | for unparsed_key in unparsed: | |
740 | if unparsed_key in _EMAIL_TO_RAW_MAPPING: | |
741 | message = f"{unparsed_key!r} has invalid data" | |
742 | else: | |
743 | message = f"unrecognized field: {unparsed_key!r}" | |
744 | exceptions.append(InvalidMetadata(unparsed_key, message)) | |
745 | ||
746 | if exceptions: | |
747 | raise ExceptionGroup("unparsed", exceptions) | |
748 | ||
749 | try: | |
750 | return cls.from_raw(raw, validate=validate) | |
751 | except ExceptionGroup as exc_group: | |
752 | exceptions.extend(exc_group.exceptions) | |
753 | raise ExceptionGroup("invalid or unparsed metadata", exceptions) from None | |
754 | ||
755 | metadata_version: _Validator[_MetadataVersion] = _Validator() | |
756 | """:external:ref:`core-metadata-metadata-version` | |
757 | (required; validated to be a valid metadata version)""" | |
758 | name: _Validator[str] = _Validator() | |
759 | """:external:ref:`core-metadata-name` | |
760 | (required; validated using :func:`~packaging.utils.canonicalize_name` and its | |
761 | *validate* parameter)""" | |
762 | version: _Validator[version_module.Version] = _Validator() | |
763 | """:external:ref:`core-metadata-version` (required)""" | |
764 | dynamic: _Validator[List[str]] = _Validator( | |
765 | added="2.2", | |
766 | ) | |
767 | """:external:ref:`core-metadata-dynamic` | |
768 | (validated against core metadata field names and lowercased)""" | |
769 | platforms: _Validator[List[str]] = _Validator() | |
770 | """:external:ref:`core-metadata-platform`""" | |
771 | supported_platforms: _Validator[List[str]] = _Validator(added="1.1") | |
772 | """:external:ref:`core-metadata-supported-platform`""" | |
773 | summary: _Validator[str] = _Validator() | |
774 | """:external:ref:`core-metadata-summary` (validated to contain no newlines)""" | |
775 | description: _Validator[str] = _Validator() # TODO 2.1: can be in body | |
776 | """:external:ref:`core-metadata-description`""" | |
777 | description_content_type: _Validator[str] = _Validator(added="2.1") | |
778 | """:external:ref:`core-metadata-description-content-type` (validated)""" | |
779 | keywords: _Validator[List[str]] = _Validator() | |
780 | """:external:ref:`core-metadata-keywords`""" | |
781 | home_page: _Validator[str] = _Validator() | |
782 | """:external:ref:`core-metadata-home-page`""" | |
783 | download_url: _Validator[str] = _Validator(added="1.1") | |
784 | """:external:ref:`core-metadata-download-url`""" | |
785 | author: _Validator[str] = _Validator() | |
786 | """:external:ref:`core-metadata-author`""" | |
787 | author_email: _Validator[str] = _Validator() | |
788 | """:external:ref:`core-metadata-author-email`""" | |
789 | maintainer: _Validator[str] = _Validator(added="1.2") | |
790 | """:external:ref:`core-metadata-maintainer`""" | |
791 | maintainer_email: _Validator[str] = _Validator(added="1.2") | |
792 | """:external:ref:`core-metadata-maintainer-email`""" | |
793 | license: _Validator[str] = _Validator() | |
794 | """:external:ref:`core-metadata-license`""" | |
795 | classifiers: _Validator[List[str]] = _Validator(added="1.1") | |
796 | """:external:ref:`core-metadata-classifier`""" | |
797 | requires_dist: _Validator[List[requirements.Requirement]] = _Validator(added="1.2") | |
798 | """:external:ref:`core-metadata-requires-dist`""" | |
799 | requires_python: _Validator[specifiers.SpecifierSet] = _Validator(added="1.2") | |
800 | """:external:ref:`core-metadata-requires-python`""" | |
801 | # Because `Requires-External` allows for non-PEP 440 version specifiers, we | |
802 | # don't do any processing on the values. | |
803 | requires_external: _Validator[List[str]] = _Validator(added="1.2") | |
804 | """:external:ref:`core-metadata-requires-external`""" | |
805 | project_urls: _Validator[Dict[str, str]] = _Validator(added="1.2") | |
806 | """:external:ref:`core-metadata-project-url`""" | |
807 | # PEP 685 lets us raise an error if an extra doesn't pass `Name` validation | |
808 | # regardless of metadata version. | |
809 | provides_extra: _Validator[List[utils.NormalizedName]] = _Validator( | |
810 | added="2.1", | |
811 | ) | |
812 | """:external:ref:`core-metadata-provides-extra`""" | |
813 | provides_dist: _Validator[List[str]] = _Validator(added="1.2") | |
814 | """:external:ref:`core-metadata-provides-dist`""" | |
815 | obsoletes_dist: _Validator[List[str]] = _Validator(added="1.2") | |
816 | """:external:ref:`core-metadata-obsoletes-dist`""" | |
817 | requires: _Validator[List[str]] = _Validator(added="1.1") | |
818 | """``Requires`` (deprecated)""" | |
819 | provides: _Validator[List[str]] = _Validator(added="1.1") | |
820 | """``Provides`` (deprecated)""" | |
821 | obsoletes: _Validator[List[str]] = _Validator(added="1.1") | |
822 | """``Obsoletes`` (deprecated)""" |