fnmatch.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
  1. """Filename matching with shell patterns.
  2. fnmatch(FILENAME, PATTERN) matches according to the local convention.
  3. fnmatchcase(FILENAME, PATTERN) always takes case in account.
  4. The functions operate by translating the pattern into a regular
  5. expression. They cache the compiled regular expressions for speed.
  6. The function translate(PATTERN) returns a regular expression
  7. corresponding to PATTERN. (It does not compile it.)
  8. Based on code from fnmatch.py file distributed with Python 2.6.
  9. Licensed under PSF License (see LICENSE.txt file).
  10. Changes to original fnmatch module:
  11. - translate function supports ``*`` and ``**`` similarly to fnmatch C library
  12. """
  13. import os
  14. import re
  15. __all__ = ["fnmatch", "fnmatchcase", "translate"]
  16. _cache = {}
  17. LEFT_BRACE = re.compile(
  18. r"""
  19. (?: ^ | [^\\] ) # Beginning of string or a character besides "\"
  20. \{ # "{"
  21. """, re.VERBOSE
  22. )
  23. RIGHT_BRACE = re.compile(
  24. r"""
  25. (?: ^ | [^\\] ) # Beginning of string or a character besides "\"
  26. \} # "}"
  27. """, re.VERBOSE
  28. )
  29. NUMERIC_RANGE = re.compile(
  30. r"""
  31. ( # Capture a number
  32. [+-] ? # Zero or one "+" or "-" characters
  33. \d + # One or more digits
  34. )
  35. \.\. # ".."
  36. ( # Capture a number
  37. [+-] ? # Zero or one "+" or "-" characters
  38. \d + # One or more digits
  39. )
  40. """, re.VERBOSE
  41. )
  42. def fnmatch(name, pat):
  43. """Test whether FILENAME matches PATTERN.
  44. Patterns are Unix shell style:
  45. - ``*`` matches everything except path separator
  46. - ``**`` matches everything
  47. - ``?`` matches any single character
  48. - ``[seq]`` matches any character in seq
  49. - ``[!seq]`` matches any char not in seq
  50. - ``{s1,s2,s3}`` matches any of the strings given (separated by commas)
  51. An initial period in FILENAME is not special.
  52. Both FILENAME and PATTERN are first case-normalized
  53. if the operating system requires it.
  54. If you don't want this, use fnmatchcase(FILENAME, PATTERN).
  55. """
  56. name = os.path.normpath(name).replace(os.sep, "/")
  57. return fnmatchcase(name, pat)
  58. def cached_translate(pat):
  59. if not pat in _cache:
  60. res, num_groups = translate(pat)
  61. regex = re.compile(res)
  62. _cache[pat] = regex, num_groups
  63. return _cache[pat]
  64. def fnmatchcase(name, pat):
  65. """Test whether FILENAME matches PATTERN, including case.
  66. This is a version of fnmatch() which doesn't case-normalize
  67. its arguments.
  68. """
  69. regex, num_groups = cached_translate(pat)
  70. match = regex.match(name)
  71. if not match:
  72. return False
  73. pattern_matched = True
  74. for (num, (min_num, max_num)) in zip(match.groups(), num_groups):
  75. if num[0] == '0' or not (min_num <= int(num) <= max_num):
  76. pattern_matched = False
  77. break
  78. return pattern_matched
  79. def translate(pat, nested=False):
  80. """Translate a shell PATTERN to a regular expression.
  81. There is no way to quote meta-characters.
  82. """
  83. index, length = 0, len(pat) # Current index and length of pattern
  84. brace_level = 0
  85. in_brackets = False
  86. result = ''
  87. is_escaped = False
  88. matching_braces = (len(LEFT_BRACE.findall(pat)) ==
  89. len(RIGHT_BRACE.findall(pat)))
  90. numeric_groups = []
  91. while index < length:
  92. current_char = pat[index]
  93. index += 1
  94. if current_char == '*':
  95. pos = index
  96. if pos < length and pat[pos] == '*':
  97. result += '.*'
  98. else:
  99. result += '[^/]*'
  100. elif current_char == '?':
  101. result += '.'
  102. elif current_char == '[':
  103. if in_brackets:
  104. result += '\\['
  105. else:
  106. pos = index
  107. has_slash = False
  108. while pos < length and pat[pos] != ']':
  109. if pat[pos] == '/' and pat[pos-1] != '\\':
  110. has_slash = True
  111. break
  112. pos += 1
  113. if has_slash:
  114. result += '\\[' + pat[index:(pos + 1)] + '\\]'
  115. index = pos + 2
  116. else:
  117. if index < length and pat[index] in '!^':
  118. index += 1
  119. result += '[^'
  120. else:
  121. result += '['
  122. in_brackets = True
  123. elif current_char == '-':
  124. if in_brackets:
  125. result += current_char
  126. else:
  127. result += '\\' + current_char
  128. elif current_char == ']':
  129. result += current_char
  130. in_brackets = False
  131. elif current_char == '{':
  132. pos = index
  133. has_comma = False
  134. while pos < length and (pat[pos] != '}' or is_escaped):
  135. if pat[pos] == ',' and not is_escaped:
  136. has_comma = True
  137. break
  138. is_escaped = pat[pos] == '\\' and not is_escaped
  139. pos += 1
  140. if not has_comma and pos < length:
  141. num_range = NUMERIC_RANGE.match(pat[index:pos])
  142. if num_range:
  143. numeric_groups.append(map(int, num_range.groups()))
  144. result += "([+-]?\d+)"
  145. else:
  146. inner_result, inner_groups = translate(pat[index:pos],
  147. nested=True)
  148. result += '\\{%s\\}' % (inner_result,)
  149. numeric_groups += inner_groups
  150. index = pos + 1
  151. elif matching_braces:
  152. result += '(?:'
  153. brace_level += 1
  154. else:
  155. result += '\\{'
  156. elif current_char == ',':
  157. if brace_level > 0 and not is_escaped:
  158. result += '|'
  159. else:
  160. result += '\\,'
  161. elif current_char == '}':
  162. if brace_level > 0 and not is_escaped:
  163. result += ')'
  164. brace_level -= 1
  165. else:
  166. result += '\\}'
  167. elif current_char == '/':
  168. if pat[index:(index + 3)] == "**/":
  169. result += "(?:/|/.*/)"
  170. index += 3
  171. else:
  172. result += '/'
  173. elif current_char != '\\':
  174. result += re.escape(current_char)
  175. if current_char == '\\':
  176. if is_escaped:
  177. result += re.escape(current_char)
  178. is_escaped = not is_escaped
  179. else:
  180. is_escaped = False
  181. if not nested:
  182. result += '\Z(?ms)'
  183. return result, numeric_groups