]>
crepu.dev Git - config.git/blob - djavu-asus/elpy/rpc-venv/lib/python3.11/site-packages/pathspec/patterns/gitwildmatch.py
2 This module implements Git's wildmatch pattern matching which itself is
3 derived from Rsync's wildmatch. Git uses wildmatch for its ".gitignore"
15 from ..pattern
import RegexPattern
17 _BYTES_ENCODING
= 'latin1'
19 The encoding to use when parsing a byte string pattern.
24 The regex group name for the directory marker. This is only used by
25 :class:`GitIgnoreSpec`.
29 class GitWildMatchPatternError(ValueError):
31 The :class:`GitWildMatchPatternError` indicates an invalid git wild match
37 class GitWildMatchPattern(RegexPattern
):
39 The :class:`GitWildMatchPattern` class represents a compiled Git
43 # Keep the dict-less class hierarchy.
50 ) -> Tuple
[Optional
[AnyStr
], Optional
[bool]]:
52 Convert the pattern into a regular expression.
54 *pattern* (:class:`str` or :class:`bytes`) is the pattern to convert
55 into a regular expression.
57 Returns the uncompiled regular expression (:class:`str`, :class:`bytes`,
58 or :data:`None`); and whether matched files should be included
59 (:data:`True`), excluded (:data:`False`), or if it is a
60 null-operation (:data:`None`).
62 if isinstance(pattern
, str):
64 elif isinstance(pattern
, bytes
):
66 pattern
= pattern
.decode(_BYTES_ENCODING
)
68 raise TypeError(f
"pattern:{pattern!r} is not a unicode or byte string.")
70 original_pattern
= pattern
72 if pattern
.endswith('\\ '):
73 # EDGE CASE: Spaces can be escaped with backslash.
74 # If a pattern that ends with backslash followed by a space,
75 # only strip from left.
76 pattern
= pattern
.lstrip()
78 pattern
= pattern
.strip()
80 if pattern
.startswith('#'):
81 # A pattern starting with a hash ('#') serves as a comment
82 # (neither includes nor excludes files). Escape the hash with a
83 # back-slash to match a literal hash (i.e., '\#').
88 # EDGE CASE: According to `git check-ignore` (v2.4.1), a single
89 # '/' does not match any file.
94 if pattern
.startswith('!'):
95 # A pattern starting with an exclamation mark ('!') negates the
96 # pattern (exclude instead of include). Escape the exclamation
97 # mark with a back-slash to match a literal exclamation mark
100 # Remove leading exclamation mark.
101 pattern
= pattern
[1:]
105 # Allow a regex override for edge cases that cannot be handled
106 # through normalization.
107 override_regex
= None
109 # Split pattern into segments.
110 pattern_segs
= pattern
.split('/')
112 # Normalize pattern to make processing easier.
114 # EDGE CASE: Deal with duplicate double-asterisk sequences.
115 # Collapse each sequence down to one double-asterisk. Iterate over
116 # the segments in reverse and remove the duplicate double
117 # asterisks as we go.
118 for i
in range(len(pattern_segs
) - 1, 0, -1):
119 prev
= pattern_segs
[i
-1]
120 seg
= pattern_segs
[i
]
121 if prev
== '**' and seg
== '**':
124 if len(pattern_segs
) == 2 and pattern_segs
[0] == '**' and not pattern_segs
[1]:
125 # EDGE CASE: The '**/' pattern should match everything except
126 # individual files in the root directory. This case cannot be
127 # adequately handled through normalization. Use the override.
128 override_regex
= f
'^.+(?P<{_DIR_MARK}>/).*$'
130 if not pattern_segs
[0]:
131 # A pattern beginning with a slash ('/') will only match paths
132 # directly on the root directory instead of any descendant
133 # paths. So, remove empty first segment to make pattern relative
137 elif len(pattern_segs
) == 1 or (len(pattern_segs
) == 2 and not pattern_segs
[1]):
138 # A single pattern without a beginning slash ('/') will match
139 # any descendant path. This is equivalent to "**/{pattern}". So,
140 # prepend with double-asterisks to make pattern relative to
142 # EDGE CASE: This also holds for a single pattern with a
143 # trailing slash (e.g. dir/).
144 if pattern_segs
[0] != '**':
145 pattern_segs
.insert(0, '**')
148 # EDGE CASE: A pattern without a beginning slash ('/') but
149 # contains at least one prepended directory (e.g.
150 # "dir/{pattern}") should not match "**/dir/{pattern}",
151 # according to `git check-ignore` (v2.4.1).
155 # After resolving the edge cases, we end up with no pattern at
156 # all. This must be because the pattern is invalid.
157 raise GitWildMatchPatternError(f
"Invalid git pattern: {original_pattern!r}")
159 if not pattern_segs
[-1] and len(pattern_segs
) > 1:
160 # A pattern ending with a slash ('/') will match all descendant
161 # paths if it is a directory but not if it is a regular file.
162 # This is equivalent to "{pattern}/**". So, set last segment to
163 # a double-asterisk to include all descendants.
164 pattern_segs
[-1] = '**'
166 if override_regex
is None:
167 # Build regular expression from pattern.
170 end
= len(pattern_segs
) - 1
171 for i
, seg
in enumerate(pattern_segs
):
173 if i
== 0 and i
== end
:
174 # A pattern consisting solely of double-asterisks ('**')
175 # will match every path.
176 output
.append(f
'[^/]+(?:(?P<{_DIR_MARK}>/).*)?')
178 # A normalized pattern beginning with double-asterisks
179 # ('**') will match any leading path segments.
180 output
.append('(?:.+/)?')
183 # A normalized pattern ending with double-asterisks ('**')
184 # will match any trailing path segments.
185 output
.append(f
'(?P<{_DIR_MARK}>/).*')
187 # A pattern with inner double-asterisks ('**') will match
188 # multiple (or zero) inner path segments.
189 output
.append('(?:/.+)?')
193 # Match single path segment.
197 output
.append('[^/]+')
200 # A pattern ending without a slash ('/') will match a file
201 # or a directory (with paths underneath it). E.g., "foo"
202 # matches "foo", "foo/bar", "foo/bar/baz", etc.
203 output
.append(f
'(?:(?P<{_DIR_MARK}>/).*)?')
208 # Match segment glob pattern.
213 output
.append(cls
._translate
_segment
_glob
(seg
))
214 except ValueError as e
:
215 raise GitWildMatchPatternError(f
"Invalid git pattern: {original_pattern!r}") from e
218 # A pattern ending without a slash ('/') will match a file
219 # or a directory (with paths underneath it). E.g., "foo"
220 # matches "foo", "foo/bar", "foo/bar/baz", etc.
221 output
.append(f
'(?:(?P<{_DIR_MARK}>/).*)?')
226 regex
= ''.join(output
)
229 # Use regex override.
230 regex
= override_regex
233 # A blank pattern is a null-operation (neither includes nor
238 if regex
is not None and return_type
is bytes
:
239 regex
= regex
.encode(_BYTES_ENCODING
)
241 return regex
, include
244 def _translate_segment_glob(pattern
: str) -> str:
246 Translates the glob pattern to a regular expression. This is used in
247 the constructor to translate a path segment glob pattern to its
248 corresponding regular expression.
250 *pattern* (:class:`str`) is the glob pattern.
252 Returns the regular expression (:class:`str`).
254 # NOTE: This is derived from `fnmatch.translate()` and is similar to
255 # the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.
259 i
, end
= 0, len(pattern
)
261 # Get next character.
266 # Escape the character.
268 regex
+= re
.escape(char
)
271 # Escape character, escape next character.
275 # Multi-character wildcard. Match any string (except slashes),
276 # including an empty string.
280 # Single-character wildcard. Match any single character (except
285 # Bracket expression wildcard. Except for the beginning
286 # exclamation mark, the whole bracket expression can be used
287 # directly as regex but we have to find where the expression
289 # - "[][!]" matches ']', '[' and '!'.
290 # - "[]-]" matches ']' and '-'.
291 # - "[!]a-]" matches any character except ']', 'a' and '-'.
294 # Pass bracket expression negation.
295 if j
< end
and (pattern
[j
] == '!' or pattern
[j
] == '^'):
298 # Pass first closing bracket if it is at the beginning of the
300 if j
< end
and pattern
[j
] == ']':
303 # Find closing bracket. Stop once we reach the end or find it.
304 while j
< end
and pattern
[j
] != ']':
308 # Found end of bracket expression. Increment j to be one past
309 # the closing bracket:
318 if pattern
[i
] == '!':
319 # Bracket expression needs to be negated.
322 elif pattern
[i
] == '^':
323 # POSIX declares that the regex bracket expression negation
324 # "[^...]" is undefined in a glob pattern. Python's
325 # `fnmatch.translate()` escapes the caret ('^') as a
326 # literal. Git supports the using a caret for negation.
327 # Maintain consistency with Git because that is the expected
332 # Build regex bracket expression. Escape slashes so they are
333 # treated as literal slashes by regex as defined by POSIX.
334 expr
+= pattern
[i
:j
].replace('\\', '\\\\')
336 # Add regex bracket expression to regex result.
339 # Set i to one past the closing bracket.
343 # Failed to find closing bracket, treat opening bracket as a
344 # bracket literal instead of as an expression.
348 # Regular character, escape it for regex.
349 regex
+= re
.escape(char
)
352 raise ValueError(f
"Escape character found with no next character to escape: {pattern!r}")
357 def escape(s
: AnyStr
) -> AnyStr
:
359 Escape special characters in the given string.
361 *s* (:class:`str` or :class:`bytes`) a filename or a string that you
362 want to escape, usually before adding it to a ".gitignore".
364 Returns the escaped string (:class:`str` or :class:`bytes`).
366 if isinstance(s
, str):
369 elif isinstance(s
, bytes
):
371 string
= s
.decode(_BYTES_ENCODING
)
373 raise TypeError(f
"s:{s!r} is not a unicode or byte string.")
375 # Reference: https://git-scm.com/docs/gitignore#_pattern_format
376 meta_characters
= r
"[]!*#?"
378 out_string
= "".join("\\" + x
if x
in meta_characters
else x
for x
in string
)
380 if return_type
is bytes
:
381 return out_string
.encode(_BYTES_ENCODING
)
385 util
.register_pattern('gitwildmatch', GitWildMatchPattern
)
388 class GitIgnorePattern(GitWildMatchPattern
):
390 The :class:`GitIgnorePattern` class is deprecated by :class:`GitWildMatchPattern`.
391 This class only exists to maintain compatibility with v0.4.
394 def __init__(self
, *args
, **kw
) -> None:
396 Warn about deprecation.
399 super(GitIgnorePattern
, self
).__init
__(*args
, **kw
)
402 def _deprecated() -> None:
404 Warn about deprecation.
407 "GitIgnorePattern ('gitignore') is deprecated. Use "
408 "GitWildMatchPattern ('gitwildmatch') instead."
409 ), DeprecationWarning, stacklevel
=3)
412 def pattern_to_regex(cls
, *args
, **kw
):
414 Warn about deprecation.
417 return super(GitIgnorePattern
, cls
).pattern_to_regex(*args
, **kw
)
419 # Register `GitIgnorePattern` as "gitignore" for backward compatibility
421 util
.register_pattern('gitignore', GitIgnorePattern
)