]> crepu.dev Git - config.git/blame_incremental - djavu-asus/emacs/elpy/rpc-venv/lib/python3.11/site-packages/pathspec/patterns/gitwildmatch.py
Reorganización de directorios
[config.git] / djavu-asus / emacs / elpy / rpc-venv / lib / python3.11 / site-packages / pathspec / patterns / gitwildmatch.py
... / ...
CommitLineData
1"""
2This module implements Git's wildmatch pattern matching which itself is
3derived from Rsync's wildmatch. Git uses wildmatch for its ".gitignore"
4files.
5"""
6
7import re
8import warnings
9from typing import (
10 AnyStr,
11 Optional,
12 Tuple)
13
14from .. import util
15from ..pattern import RegexPattern
16
17_BYTES_ENCODING = 'latin1'
18"""
19The encoding to use when parsing a byte string pattern.
20"""
21
22_DIR_MARK = 'ps_d'
23"""
24The regex group name for the directory marker. This is only used by
25:class:`GitIgnoreSpec`.
26"""
27
28
29class GitWildMatchPatternError(ValueError):
30 """
31 The :class:`GitWildMatchPatternError` indicates an invalid git wild match
32 pattern.
33 """
34 pass
35
36
37class GitWildMatchPattern(RegexPattern):
38 """
39 The :class:`GitWildMatchPattern` class represents a compiled Git
40 wildmatch pattern.
41 """
42
43 # Keep the dict-less class hierarchy.
44 __slots__ = ()
45
46 @classmethod
47 def pattern_to_regex(
48 cls,
49 pattern: AnyStr,
50 ) -> Tuple[Optional[AnyStr], Optional[bool]]:
51 """
52 Convert the pattern into a regular expression.
53
54 *pattern* (:class:`str` or :class:`bytes`) is the pattern to convert
55 into a regular expression.
56
57 Returns the uncompiled regular expression (:class:`str`, :class:`bytes`,
58 or :data:`None`); and whether matched files should be included
59 (:data:`True`), excluded (:data:`False`), or if it is a
60 null-operation (:data:`None`).
61 """
62 if isinstance(pattern, str):
63 return_type = str
64 elif isinstance(pattern, bytes):
65 return_type = bytes
66 pattern = pattern.decode(_BYTES_ENCODING)
67 else:
68 raise TypeError(f"pattern:{pattern!r} is not a unicode or byte string.")
69
70 original_pattern = pattern
71
72 if pattern.endswith('\\ '):
73 # EDGE CASE: Spaces can be escaped with backslash.
74 # If a pattern that ends with backslash followed by a space,
75 # only strip from left.
76 pattern = pattern.lstrip()
77 else:
78 pattern = pattern.strip()
79
80 if pattern.startswith('#'):
81 # A pattern starting with a hash ('#') serves as a comment
82 # (neither includes nor excludes files). Escape the hash with a
83 # back-slash to match a literal hash (i.e., '\#').
84 regex = None
85 include = None
86
87 elif pattern == '/':
88 # EDGE CASE: According to `git check-ignore` (v2.4.1), a single
89 # '/' does not match any file.
90 regex = None
91 include = None
92
93 elif pattern:
94 if pattern.startswith('!'):
95 # A pattern starting with an exclamation mark ('!') negates the
96 # pattern (exclude instead of include). Escape the exclamation
97 # mark with a back-slash to match a literal exclamation mark
98 # (i.e., '\!').
99 include = False
100 # Remove leading exclamation mark.
101 pattern = pattern[1:]
102 else:
103 include = True
104
105 # Allow a regex override for edge cases that cannot be handled
106 # through normalization.
107 override_regex = None
108
109 # Split pattern into segments.
110 pattern_segs = pattern.split('/')
111
112 # Normalize pattern to make processing easier.
113
114 # EDGE CASE: Deal with duplicate double-asterisk sequences.
115 # Collapse each sequence down to one double-asterisk. Iterate over
116 # the segments in reverse and remove the duplicate double
117 # asterisks as we go.
118 for i in range(len(pattern_segs) - 1, 0, -1):
119 prev = pattern_segs[i-1]
120 seg = pattern_segs[i]
121 if prev == '**' and seg == '**':
122 del pattern_segs[i]
123
124 if len(pattern_segs) == 2 and pattern_segs[0] == '**' and not pattern_segs[1]:
125 # EDGE CASE: The '**/' pattern should match everything except
126 # individual files in the root directory. This case cannot be
127 # adequately handled through normalization. Use the override.
128 override_regex = f'^.+(?P<{_DIR_MARK}>/).*$'
129
130 if not pattern_segs[0]:
131 # A pattern beginning with a slash ('/') will only match paths
132 # directly on the root directory instead of any descendant
133 # paths. So, remove empty first segment to make pattern relative
134 # to root.
135 del pattern_segs[0]
136
137 elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 and not pattern_segs[1]):
138 # A single pattern without a beginning slash ('/') will match
139 # any descendant path. This is equivalent to "**/{pattern}". So,
140 # prepend with double-asterisks to make pattern relative to
141 # root.
142 # EDGE CASE: This also holds for a single pattern with a
143 # trailing slash (e.g. dir/).
144 if pattern_segs[0] != '**':
145 pattern_segs.insert(0, '**')
146
147 else:
148 # EDGE CASE: A pattern without a beginning slash ('/') but
149 # contains at least one prepended directory (e.g.
150 # "dir/{pattern}") should not match "**/dir/{pattern}",
151 # according to `git check-ignore` (v2.4.1).
152 pass
153
154 if not pattern_segs:
155 # After resolving the edge cases, we end up with no pattern at
156 # all. This must be because the pattern is invalid.
157 raise GitWildMatchPatternError(f"Invalid git pattern: {original_pattern!r}")
158
159 if not pattern_segs[-1] and len(pattern_segs) > 1:
160 # A pattern ending with a slash ('/') will match all descendant
161 # paths if it is a directory but not if it is a regular file.
162 # This is equivalent to "{pattern}/**". So, set last segment to
163 # a double-asterisk to include all descendants.
164 pattern_segs[-1] = '**'
165
166 if override_regex is None:
167 # Build regular expression from pattern.
168 output = ['^']
169 need_slash = False
170 end = len(pattern_segs) - 1
171 for i, seg in enumerate(pattern_segs):
172 if seg == '**':
173 if i == 0 and i == end:
174 # A pattern consisting solely of double-asterisks ('**')
175 # will match every path.
176 output.append(f'[^/]+(?:(?P<{_DIR_MARK}>/).*)?')
177 elif i == 0:
178 # A normalized pattern beginning with double-asterisks
179 # ('**') will match any leading path segments.
180 output.append('(?:.+/)?')
181 need_slash = False
182 elif i == end:
183 # A normalized pattern ending with double-asterisks ('**')
184 # will match any trailing path segments.
185 output.append(f'(?P<{_DIR_MARK}>/).*')
186 else:
187 # A pattern with inner double-asterisks ('**') will match
188 # multiple (or zero) inner path segments.
189 output.append('(?:/.+)?')
190 need_slash = True
191
192 elif seg == '*':
193 # Match single path segment.
194 if need_slash:
195 output.append('/')
196
197 output.append('[^/]+')
198
199 if i == end:
200 # A pattern ending without a slash ('/') will match a file
201 # or a directory (with paths underneath it). E.g., "foo"
202 # matches "foo", "foo/bar", "foo/bar/baz", etc.
203 output.append(f'(?:(?P<{_DIR_MARK}>/).*)?')
204
205 need_slash = True
206
207 else:
208 # Match segment glob pattern.
209 if need_slash:
210 output.append('/')
211
212 try:
213 output.append(cls._translate_segment_glob(seg))
214 except ValueError as e:
215 raise GitWildMatchPatternError(f"Invalid git pattern: {original_pattern!r}") from e
216
217 if i == end:
218 # A pattern ending without a slash ('/') will match a file
219 # or a directory (with paths underneath it). E.g., "foo"
220 # matches "foo", "foo/bar", "foo/bar/baz", etc.
221 output.append(f'(?:(?P<{_DIR_MARK}>/).*)?')
222
223 need_slash = True
224
225 output.append('$')
226 regex = ''.join(output)
227
228 else:
229 # Use regex override.
230 regex = override_regex
231
232 else:
233 # A blank pattern is a null-operation (neither includes nor
234 # excludes files).
235 regex = None
236 include = None
237
238 if regex is not None and return_type is bytes:
239 regex = regex.encode(_BYTES_ENCODING)
240
241 return regex, include
242
243 @staticmethod
244 def _translate_segment_glob(pattern: str) -> str:
245 """
246 Translates the glob pattern to a regular expression. This is used in
247 the constructor to translate a path segment glob pattern to its
248 corresponding regular expression.
249
250 *pattern* (:class:`str`) is the glob pattern.
251
252 Returns the regular expression (:class:`str`).
253 """
254 # NOTE: This is derived from `fnmatch.translate()` and is similar to
255 # the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.
256
257 escape = False
258 regex = ''
259 i, end = 0, len(pattern)
260 while i < end:
261 # Get next character.
262 char = pattern[i]
263 i += 1
264
265 if escape:
266 # Escape the character.
267 escape = False
268 regex += re.escape(char)
269
270 elif char == '\\':
271 # Escape character, escape next character.
272 escape = True
273
274 elif char == '*':
275 # Multi-character wildcard. Match any string (except slashes),
276 # including an empty string.
277 regex += '[^/]*'
278
279 elif char == '?':
280 # Single-character wildcard. Match any single character (except
281 # a slash).
282 regex += '[^/]'
283
284 elif char == '[':
285 # Bracket expression wildcard. Except for the beginning
286 # exclamation mark, the whole bracket expression can be used
287 # directly as regex but we have to find where the expression
288 # ends.
289 # - "[][!]" matches ']', '[' and '!'.
290 # - "[]-]" matches ']' and '-'.
291 # - "[!]a-]" matches any character except ']', 'a' and '-'.
292 j = i
293
294 # Pass bracket expression negation.
295 if j < end and (pattern[j] == '!' or pattern[j] == '^'):
296 j += 1
297
298 # Pass first closing bracket if it is at the beginning of the
299 # expression.
300 if j < end and pattern[j] == ']':
301 j += 1
302
303 # Find closing bracket. Stop once we reach the end or find it.
304 while j < end and pattern[j] != ']':
305 j += 1
306
307 if j < end:
308 # Found end of bracket expression. Increment j to be one past
309 # the closing bracket:
310 #
311 # [...]
312 # ^ ^
313 # i j
314 #
315 j += 1
316 expr = '['
317
318 if pattern[i] == '!':
319 # Bracket expression needs to be negated.
320 expr += '^'
321 i += 1
322 elif pattern[i] == '^':
323 # POSIX declares that the regex bracket expression negation
324 # "[^...]" is undefined in a glob pattern. Python's
325 # `fnmatch.translate()` escapes the caret ('^') as a
326 # literal. Git supports the using a caret for negation.
327 # Maintain consistency with Git because that is the expected
328 # behavior.
329 expr += '^'
330 i += 1
331
332 # Build regex bracket expression. Escape slashes so they are
333 # treated as literal slashes by regex as defined by POSIX.
334 expr += pattern[i:j].replace('\\', '\\\\')
335
336 # Add regex bracket expression to regex result.
337 regex += expr
338
339 # Set i to one past the closing bracket.
340 i = j
341
342 else:
343 # Failed to find closing bracket, treat opening bracket as a
344 # bracket literal instead of as an expression.
345 regex += '\\['
346
347 else:
348 # Regular character, escape it for regex.
349 regex += re.escape(char)
350
351 if escape:
352 raise ValueError(f"Escape character found with no next character to escape: {pattern!r}")
353
354 return regex
355
356 @staticmethod
357 def escape(s: AnyStr) -> AnyStr:
358 """
359 Escape special characters in the given string.
360
361 *s* (:class:`str` or :class:`bytes`) a filename or a string that you
362 want to escape, usually before adding it to a ".gitignore".
363
364 Returns the escaped string (:class:`str` or :class:`bytes`).
365 """
366 if isinstance(s, str):
367 return_type = str
368 string = s
369 elif isinstance(s, bytes):
370 return_type = bytes
371 string = s.decode(_BYTES_ENCODING)
372 else:
373 raise TypeError(f"s:{s!r} is not a unicode or byte string.")
374
375 # Reference: https://git-scm.com/docs/gitignore#_pattern_format
376 meta_characters = r"[]!*#?"
377
378 out_string = "".join("\\" + x if x in meta_characters else x for x in string)
379
380 if return_type is bytes:
381 return out_string.encode(_BYTES_ENCODING)
382 else:
383 return out_string
384
385util.register_pattern('gitwildmatch', GitWildMatchPattern)
386
387
388class GitIgnorePattern(GitWildMatchPattern):
389 """
390 The :class:`GitIgnorePattern` class is deprecated by :class:`GitWildMatchPattern`.
391 This class only exists to maintain compatibility with v0.4.
392 """
393
394 def __init__(self, *args, **kw) -> None:
395 """
396 Warn about deprecation.
397 """
398 self._deprecated()
399 super(GitIgnorePattern, self).__init__(*args, **kw)
400
401 @staticmethod
402 def _deprecated() -> None:
403 """
404 Warn about deprecation.
405 """
406 warnings.warn((
407 "GitIgnorePattern ('gitignore') is deprecated. Use "
408 "GitWildMatchPattern ('gitwildmatch') instead."
409 ), DeprecationWarning, stacklevel=3)
410
411 @classmethod
412 def pattern_to_regex(cls, *args, **kw):
413 """
414 Warn about deprecation.
415 """
416 cls._deprecated()
417 return super(GitIgnorePattern, cls).pattern_to_regex(*args, **kw)
418
419# Register `GitIgnorePattern` as "gitignore" for backward compatibility
420# with v0.4.
421util.register_pattern('gitignore', GitIgnorePattern)