| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219 |
- """Filename matching with shell patterns.
- fnmatch(FILENAME, PATTERN) matches according to the local convention.
- fnmatchcase(FILENAME, PATTERN) always takes case in account.
- The functions operate by translating the pattern into a regular
- expression. They cache the compiled regular expressions for speed.
- The function translate(PATTERN) returns a regular expression
- corresponding to PATTERN. (It does not compile it.)
- Based on code from fnmatch.py file distributed with Python 2.6.
- Licensed under PSF License (see LICENSE.txt file).
- Changes to original fnmatch module:
- - translate function supports ``*`` and ``**`` similarly to fnmatch C library
- """
- import os
- import re
- __all__ = ["fnmatch", "fnmatchcase", "translate"]
- _cache = {}
- LEFT_BRACE = re.compile(
- r"""
- (?: ^ | [^\\] ) # Beginning of string or a character besides "\"
- \{ # "{"
- """, re.VERBOSE
- )
- RIGHT_BRACE = re.compile(
- r"""
- (?: ^ | [^\\] ) # Beginning of string or a character besides "\"
- \} # "}"
- """, re.VERBOSE
- )
- NUMERIC_RANGE = re.compile(
- r"""
- ( # Capture a number
- [+-] ? # Zero or one "+" or "-" characters
- \d + # One or more digits
- )
- \.\. # ".."
- ( # Capture a number
- [+-] ? # Zero or one "+" or "-" characters
- \d + # One or more digits
- )
- """, re.VERBOSE
- )
- def fnmatch(name, pat):
- """Test whether FILENAME matches PATTERN.
- Patterns are Unix shell style:
- - ``*`` matches everything except path separator
- - ``**`` matches everything
- - ``?`` matches any single character
- - ``[seq]`` matches any character in seq
- - ``[!seq]`` matches any char not in seq
- - ``{s1,s2,s3}`` matches any of the strings given (separated by commas)
- An initial period in FILENAME is not special.
- Both FILENAME and PATTERN are first case-normalized
- if the operating system requires it.
- If you don't want this, use fnmatchcase(FILENAME, PATTERN).
- """
- name = os.path.normpath(name).replace(os.sep, "/")
- return fnmatchcase(name, pat)
- def cached_translate(pat):
- if not pat in _cache:
- res, num_groups = translate(pat)
- regex = re.compile(res)
- _cache[pat] = regex, num_groups
- return _cache[pat]
- def fnmatchcase(name, pat):
- """Test whether FILENAME matches PATTERN, including case.
- This is a version of fnmatch() which doesn't case-normalize
- its arguments.
- """
- regex, num_groups = cached_translate(pat)
- match = regex.match(name)
- if not match:
- return False
- pattern_matched = True
- for (num, (min_num, max_num)) in zip(match.groups(), num_groups):
- if num[0] == '0' or not (min_num <= int(num) <= max_num):
- pattern_matched = False
- break
- return pattern_matched
- def translate(pat, nested=False):
- """Translate a shell PATTERN to a regular expression.
- There is no way to quote meta-characters.
- """
- index, length = 0, len(pat) # Current index and length of pattern
- brace_level = 0
- in_brackets = False
- result = ''
- is_escaped = False
- matching_braces = (len(LEFT_BRACE.findall(pat)) ==
- len(RIGHT_BRACE.findall(pat)))
- numeric_groups = []
- while index < length:
- current_char = pat[index]
- index += 1
- if current_char == '*':
- pos = index
- if pos < length and pat[pos] == '*':
- result += '.*'
- else:
- result += '[^/]*'
- elif current_char == '?':
- result += '.'
- elif current_char == '[':
- if in_brackets:
- result += '\\['
- else:
- pos = index
- has_slash = False
- while pos < length and pat[pos] != ']':
- if pat[pos] == '/' and pat[pos-1] != '\\':
- has_slash = True
- break
- pos += 1
- if has_slash:
- result += '\\[' + pat[index:(pos + 1)] + '\\]'
- index = pos + 2
- else:
- if index < length and pat[index] in '!^':
- index += 1
- result += '[^'
- else:
- result += '['
- in_brackets = True
- elif current_char == '-':
- if in_brackets:
- result += current_char
- else:
- result += '\\' + current_char
- elif current_char == ']':
- result += current_char
- in_brackets = False
- elif current_char == '{':
- pos = index
- has_comma = False
- while pos < length and (pat[pos] != '}' or is_escaped):
- if pat[pos] == ',' and not is_escaped:
- has_comma = True
- break
- is_escaped = pat[pos] == '\\' and not is_escaped
- pos += 1
- if not has_comma and pos < length:
- num_range = NUMERIC_RANGE.match(pat[index:pos])
- if num_range:
- numeric_groups.append(map(int, num_range.groups()))
- result += "([+-]?\d+)"
- else:
- inner_result, inner_groups = translate(pat[index:pos],
- nested=True)
- result += '\\{%s\\}' % (inner_result,)
- numeric_groups += inner_groups
- index = pos + 1
- elif matching_braces:
- result += '(?:'
- brace_level += 1
- else:
- result += '\\{'
- elif current_char == ',':
- if brace_level > 0 and not is_escaped:
- result += '|'
- else:
- result += '\\,'
- elif current_char == '}':
- if brace_level > 0 and not is_escaped:
- result += ')'
- brace_level -= 1
- else:
- result += '\\}'
- elif current_char == '/':
- if pat[index:(index + 3)] == "**/":
- result += "(?:/|/.*/)"
- index += 3
- else:
- result += '/'
- elif current_char != '\\':
- result += re.escape(current_char)
- if current_char == '\\':
- if is_escaped:
- result += re.escape(current_char)
- is_escaped = not is_escaped
- else:
- is_escaped = False
- if not nested:
- result += '\Z(?ms)'
- return result, numeric_groups
|