]>
Commit | Line | Data |
---|---|---|
53e6db90 DC |
1 | """ |
2 | Filename globbing utility. Mostly a copy of `glob` from Python 3.5. | |
3 | ||
4 | Changes include: | |
5 | * `yield from` and PEP3102 `*` removed. | |
6 | * Hidden files are not ignored. | |
7 | """ | |
8 | ||
9 | import os | |
10 | import re | |
11 | import fnmatch | |
12 | ||
13 | __all__ = ["glob", "iglob", "escape"] | |
14 | ||
15 | ||
16 | def glob(pathname, recursive=False): | |
17 | """Return a list of paths matching a pathname pattern. | |
18 | ||
19 | The pattern may contain simple shell-style wildcards a la | |
20 | fnmatch. However, unlike fnmatch, filenames starting with a | |
21 | dot are special cases that are not matched by '*' and '?' | |
22 | patterns. | |
23 | ||
24 | If recursive is true, the pattern '**' will match any files and | |
25 | zero or more directories and subdirectories. | |
26 | """ | |
27 | return list(iglob(pathname, recursive=recursive)) | |
28 | ||
29 | ||
30 | def iglob(pathname, recursive=False): | |
31 | """Return an iterator which yields the paths matching a pathname pattern. | |
32 | ||
33 | The pattern may contain simple shell-style wildcards a la | |
34 | fnmatch. However, unlike fnmatch, filenames starting with a | |
35 | dot are special cases that are not matched by '*' and '?' | |
36 | patterns. | |
37 | ||
38 | If recursive is true, the pattern '**' will match any files and | |
39 | zero or more directories and subdirectories. | |
40 | """ | |
41 | it = _iglob(pathname, recursive) | |
42 | if recursive and _isrecursive(pathname): | |
43 | s = next(it) # skip empty string | |
44 | assert not s | |
45 | return it | |
46 | ||
47 | ||
48 | def _iglob(pathname, recursive): | |
49 | dirname, basename = os.path.split(pathname) | |
50 | glob_in_dir = glob2 if recursive and _isrecursive(basename) else glob1 | |
51 | ||
52 | if not has_magic(pathname): | |
53 | if basename: | |
54 | if os.path.lexists(pathname): | |
55 | yield pathname | |
56 | else: | |
57 | # Patterns ending with a slash should match only directories | |
58 | if os.path.isdir(dirname): | |
59 | yield pathname | |
60 | return | |
61 | ||
62 | if not dirname: | |
63 | yield from glob_in_dir(dirname, basename) | |
64 | return | |
65 | # `os.path.split()` returns the argument itself as a dirname if it is a | |
66 | # drive or UNC path. Prevent an infinite recursion if a drive or UNC path | |
67 | # contains magic characters (i.e. r'\\?\C:'). | |
68 | if dirname != pathname and has_magic(dirname): | |
69 | dirs = _iglob(dirname, recursive) | |
70 | else: | |
71 | dirs = [dirname] | |
72 | if not has_magic(basename): | |
73 | glob_in_dir = glob0 | |
74 | for dirname in dirs: | |
75 | for name in glob_in_dir(dirname, basename): | |
76 | yield os.path.join(dirname, name) | |
77 | ||
78 | ||
79 | # These 2 helper functions non-recursively glob inside a literal directory. | |
80 | # They return a list of basenames. `glob1` accepts a pattern while `glob0` | |
81 | # takes a literal basename (so it only has to check for its existence). | |
82 | ||
83 | ||
84 | def glob1(dirname, pattern): | |
85 | if not dirname: | |
86 | if isinstance(pattern, bytes): | |
87 | dirname = os.curdir.encode('ASCII') | |
88 | else: | |
89 | dirname = os.curdir | |
90 | try: | |
91 | names = os.listdir(dirname) | |
92 | except OSError: | |
93 | return [] | |
94 | return fnmatch.filter(names, pattern) | |
95 | ||
96 | ||
97 | def glob0(dirname, basename): | |
98 | if not basename: | |
99 | # `os.path.split()` returns an empty basename for paths ending with a | |
100 | # directory separator. 'q*x/' should match only directories. | |
101 | if os.path.isdir(dirname): | |
102 | return [basename] | |
103 | else: | |
104 | if os.path.lexists(os.path.join(dirname, basename)): | |
105 | return [basename] | |
106 | return [] | |
107 | ||
108 | ||
109 | # This helper function recursively yields relative pathnames inside a literal | |
110 | # directory. | |
111 | ||
112 | ||
113 | def glob2(dirname, pattern): | |
114 | assert _isrecursive(pattern) | |
115 | yield pattern[:0] | |
116 | for x in _rlistdir(dirname): | |
117 | yield x | |
118 | ||
119 | ||
120 | # Recursively yields relative pathnames inside a literal directory. | |
121 | def _rlistdir(dirname): | |
122 | if not dirname: | |
123 | if isinstance(dirname, bytes): | |
124 | dirname = os.curdir.encode('ASCII') | |
125 | else: | |
126 | dirname = os.curdir | |
127 | try: | |
128 | names = os.listdir(dirname) | |
129 | except os.error: | |
130 | return | |
131 | for x in names: | |
132 | yield x | |
133 | path = os.path.join(dirname, x) if dirname else x | |
134 | for y in _rlistdir(path): | |
135 | yield os.path.join(x, y) | |
136 | ||
137 | ||
138 | magic_check = re.compile('([*?[])') | |
139 | magic_check_bytes = re.compile(b'([*?[])') | |
140 | ||
141 | ||
142 | def has_magic(s): | |
143 | if isinstance(s, bytes): | |
144 | match = magic_check_bytes.search(s) | |
145 | else: | |
146 | match = magic_check.search(s) | |
147 | return match is not None | |
148 | ||
149 | ||
150 | def _isrecursive(pattern): | |
151 | if isinstance(pattern, bytes): | |
152 | return pattern == b'**' | |
153 | else: | |
154 | return pattern == '**' | |
155 | ||
156 | ||
157 | def escape(pathname): | |
158 | """Escape all special characters. | |
159 | """ | |
160 | # Escaping is done by wrapping any of "*?[" between square brackets. | |
161 | # Metacharacters do not work in the drive part and shouldn't be escaped. | |
162 | drive, pathname = os.path.splitdrive(pathname) | |
163 | if isinstance(pathname, bytes): | |
164 | pathname = magic_check_bytes.sub(br'[\1]', pathname) | |
165 | else: | |
166 | pathname = magic_check.sub(r'[\1]', pathname) | |
167 | return drive + pathname |