diff --git a/.gitignore b/.gitignore index 2428aba..c909359 100755 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ *.pyc *.pyo +.vscode/ .cache/ .idea/ .tox/ diff --git a/README.rst b/README.rst index d6c657d..473ef87 100644 --- a/README.rst +++ b/README.rst @@ -379,6 +379,8 @@ the pattern, the actual match represents the shortest successful match for **Version history (in brief)**: +- 1.17.0 Make left- and center-aligned search consume up to next space +- 1.16.0 Make compiled parse objects pickleable (thanks @martinResearch) - 1.15.0 Several fixes for parsing non-base 10 numbers (thanks @vladikcomper) - 1.14.0 More broad acceptance of Fortran number format (thanks @purpleskyfall) - 1.13.1 Project metadata correction. @@ -452,5 +454,5 @@ the pattern, the actual match represents the shortest successful match for and removed the restriction on mixing fixed-position and named fields - 1.0.0 initial release -This code is copyright 2012-2019 Richard Jones +This code is copyright 2012-2020 Richard Jones See the end of the source file for the license of use. diff --git a/parse.py b/parse.py index e5d4ef5..279d554 100644 --- a/parse.py +++ b/parse.py @@ -379,6 +379,7 @@ **Version history (in brief)**: +- 1.17.0 Make left- and center-aligned search consume up to next space - 1.16.0 Make compiled parse objects pickleable (thanks @martinResearch) - 1.15.0 Several fixes for parsing non-base 10 numbers (thanks @vladikcomper) - 1.14.0 More broad acceptance of Fortran number format (thanks @purpleskyfall) @@ -458,7 +459,8 @@ ''' from __future__ import absolute_import -__version__ = '1.16.0' + +__version__ = '1.17.0' # yes, I now have two problems import re @@ -495,15 +497,17 @@ def with_pattern(pattern, regex_group_count=None): :param regex_group_count: Indicates how many regex-groups are in pattern. :return: wrapped function """ + def decorator(func): func.pattern = pattern func.regex_group_count = regex_group_count return func + return decorator class int_convert: - '''Convert a string to an integer. + """Convert a string to an integer. The string may start with a sign. @@ -514,13 +518,12 @@ class int_convert: it overrides the default base of 10. It may also have other non-numeric characters that we can ignore. - ''' + """ CHARS = '0123456789abcdefghijklmnopqrstuvwxyz' def __init__(self, base=None): self.base = base - def __call__(self, string, match): if string[0] == '-': @@ -536,39 +539,42 @@ def __call__(self, string, match): # If base wasn't specified, detect it automatically if self.base is None: - # Assume decimal number, unless different base is detected - self.base = 10 + # Assume decimal number, unless different base is detected + self.base = 10 - # For number formats starting with 0b, 0o, 0x, use corresponding base ... - if string[number_start] == '0' and len(string) - number_start > 2: - if string[number_start+1] in 'bB': - self.base = 2 - elif string[number_start+1] in 'oO': - self.base = 8 - elif string[number_start+1] in 'xX': - self.base = 16 + # For number formats starting with 0b, 0o, 0x, use corresponding base ... + if string[number_start] == '0' and len(string) - number_start > 2: + if string[number_start + 1] in 'bB': + self.base = 2 + elif string[number_start + 1] in 'oO': + self.base = 8 + elif string[number_start + 1] in 'xX': + self.base = 16 - chars = int_convert.CHARS[:self.base] + chars = int_convert.CHARS[: self.base] string = re.sub('[^%s]' % chars, '', string.lower()) return sign * int(string, self.base) - + + class convert_first: - """Convert the first element of a pair. + """Convert the first element of a pair. This equivalent to lambda s,m: converter(s). But unlike a lambda function, it can be pickled """ + def __init__(self, converter): self.converter = converter + def __call__(self, string, match): return self.converter(string) def percentage(string, match): - return float(string[:-1]) / 100. + return float(string[:-1]) / 100.0 class FixedTzOffset(tzinfo): - """Fixed offset in minutes east from UTC. - """ + """Fixed offset in minutes east from UTC.""" + ZERO = timedelta(0) def __init__(self, offset, name): @@ -576,8 +582,7 @@ def __init__(self, offset, name): self._name = name def __repr__(self): - return '<%s %s %s>' % (self.__class__.__name__, self._name, - self._offset) + return '<%s %s %s>' % (self.__class__.__name__, self._name, self._offset) def utcoffset(self, dt): return self._offset @@ -595,18 +600,29 @@ def __eq__(self, other): MONTHS_MAP = dict( - Jan=1, January=1, - Feb=2, February=2, - Mar=3, March=3, - Apr=4, April=4, + Jan=1, + January=1, + Feb=2, + February=2, + Mar=3, + March=3, + Apr=4, + April=4, May=5, - Jun=6, June=6, - Jul=7, July=7, - Aug=8, August=8, - Sep=9, September=9, - Oct=10, October=10, - Nov=11, November=11, - Dec=12, December=12 + Jun=6, + June=6, + Jul=7, + July=7, + Aug=8, + August=8, + Sep=9, + September=9, + Oct=10, + October=10, + Nov=11, + November=11, + Dec=12, + December=12, ) DAYS_PAT = r'(Mon|Tue|Wed|Thu|Fri|Sat|Sun)' MONTHS_PAT = r'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)' @@ -616,17 +632,28 @@ def __eq__(self, other): TZ_PAT = r'(\s+[-+]\d\d?:?\d\d)' -def date_convert(string, match, ymd=None, mdy=None, dmy=None, - d_m_y=None, hms=None, am=None, tz=None, mm=None, dd=None): - '''Convert the incoming string containing some date / time info into a +def date_convert( + string, + match, + ymd=None, + mdy=None, + dmy=None, + d_m_y=None, + hms=None, + am=None, + tz=None, + mm=None, + dd=None, +): + """Convert the incoming string containing some date / time info into a datetime instance. - ''' + """ groups = match.groups() time_only = False if mm and dd: - y=datetime.today().year - m=groups[mm] - d=groups[dd] + y = datetime.today().year + m = groups[mm] + d = groups[dd] elif ymd is not None: y, m, d = re.split(r'[-/\s]', groups[ymd]) elif mdy is not None: @@ -717,13 +744,11 @@ class RepeatedNameError(ValueError): REGEX_SAFETY = re.compile(r'([?\\\\.[\]()*+\^$!\|])') # allowed field types -ALLOWED_TYPES = set(list('nbox%fFegwWdDsSl') + - ['t' + c for c in 'ieahgcts']) +ALLOWED_TYPES = set(list('nbox%fFegwWdDsSl') + ['t' + c for c in 'ieahgcts']) def extract_format(format, extra_types): - '''Pull apart the format [[fill]align][0][width][.precision][type] - ''' + """Pull apart the format [[fill]align][0][width][.precision][type]""" fill = align = None if format[0] in '<>=^': align = format[0] @@ -768,8 +793,8 @@ def extract_format(format, extra_types): class Parser(object): - '''Encapsulate a format string that may be used to parse other strings. - ''' + """Encapsulate a format string that may be used to parse other strings.""" + def __init__(self, format, extra_types=None, case_sensitive=False): # a mapping of a name as in {hello.world} to a regex-group compatible # name, like hello__world Its used to prevent the transformation of @@ -803,8 +828,7 @@ def __init__(self, format, extra_types=None, case_sensitive=False): def __repr__(self): if len(self._format) > 20: - return '<%s %r>' % (self.__class__.__name__, - self._format[:17] + '...') + return '<%s %r>' % (self.__class__.__name__, self._format[:17] + '...') return '<%s %r>' % (self.__class__.__name__, self._format) @property @@ -816,8 +840,9 @@ def _search_re(self): # access error through sys to keep py3k and backward compat e = str(sys.exc_info()[1]) if e.endswith('this version only supports 100 named groups'): - raise TooManyFields('sorry, you are attempting to parse ' - 'too many complex fields') + raise TooManyFields( + 'sorry, you are attempting to parse ' 'too many complex fields' + ) return self.__search_re @property @@ -830,27 +855,29 @@ def _match_re(self): # access error through sys to keep py3k and backward compat e = str(sys.exc_info()[1]) if e.endswith('this version only supports 100 named groups'): - raise TooManyFields('sorry, you are attempting to parse ' - 'too many complex fields') + raise TooManyFields( + 'sorry, you are attempting to parse ' 'too many complex fields' + ) except re.error: - raise NotImplementedError("Group names (e.g. (?P) can " - "cause failure, as they are not escaped properly: '%s'" % - expression) + raise NotImplementedError( + "Group names (e.g. (?P) can " + "cause failure, as they are not escaped properly: '%s'" % expression + ) return self.__match_re - @property + @property def named_fields(self): return self._named_fields.copy() - - @property + + @property def fixed_fields(self): return self._fixed_fields.copy() def parse(self, string, evaluate_result=True): - '''Match my format to the string exactly. + """Match my format to the string exactly. Return a Result or Match instance or None if there's no match. - ''' + """ m = self._match_re.match(string) if m is None: return None @@ -861,7 +888,7 @@ def parse(self, string, evaluate_result=True): return Match(self, m) def search(self, string, pos=0, endpos=None, evaluate_result=True): - '''Search the string for my format. + """Search the string for my format. Optionally start the search at "pos" character index and limit the search to a maximum index of endpos - equivalent to @@ -871,7 +898,7 @@ def search(self, string, pos=0, endpos=None, evaluate_result=True): Match instance is returned instead of the actual Result instance. Return either a Result instance or None if there's no match. - ''' + """ if endpos is None: endpos = len(string) m = self._search_re.search(string, pos, endpos) @@ -883,8 +910,10 @@ def search(self, string, pos=0, endpos=None, evaluate_result=True): else: return Match(self, m) - def findall(self, string, pos=0, endpos=None, extra_types=None, evaluate_result=True): - '''Search "string" for all occurrences of "format". + def findall( + self, string, pos=0, endpos=None, extra_types=None, evaluate_result=True + ): + """Search "string" for all occurrences of "format". Optionally start the search at "pos" character index and limit the search to a maximum index of endpos - equivalent to @@ -892,10 +921,12 @@ def findall(self, string, pos=0, endpos=None, extra_types=None, evaluate_result= Returns an iterator that holds Result or Match instances for each format match found. - ''' + """ if endpos is None: endpos = len(string) - return ResultIterator(self, string, pos, endpos, evaluate_result=evaluate_result) + return ResultIterator( + self, string, pos, endpos, evaluate_result=evaluate_result + ) def _expand_named_fields(self, named_fields): result = {} @@ -909,7 +940,7 @@ def _expand_named_fields(self, named_fields): if subkeys: for subkey in re.findall(r'\[[^\]]+\]', subkeys): - d = d.setdefault(k,{}) + d = d.setdefault(k, {}) k = subkey[1:-1] # assign the value to the last key @@ -942,8 +973,7 @@ def evaluate_result(self, m): # now figure the match spans spans = dict((n, m.span(name_map[n])) for n in named_fields) - spans.update((i, m.span(n + 1)) - for i, n in enumerate(self._fixed_fields)) + spans.update((i, m.span(n + 1)) for i, n in enumerate(self._fixed_fields)) # and that's our result return Result(fixed_fields, self._expand_named_fields(named_fields), spans) @@ -1004,9 +1034,11 @@ def _handle_field(self, field): name = field if name in self._name_to_group_map: if self._name_types[name] != format: - raise RepeatedNameError('field type %r for field "%s" ' - 'does not match previous seen type %r' % (format, - name, self._name_types[name])) + raise RepeatedNameError( + 'field type %r for field "%s" ' + 'does not match previous seen type %r' + % (format, name, self._name_types[name]) + ) group = self._name_to_group_map[name] # match previously-seen value return r'(?P=%s)' % group @@ -1080,63 +1112,88 @@ def _handle_field(self, field): width = r'{1,%s}' % int(format['width']) else: width = '+' - s = r'\d{w}|[-+ ]?0[xX][0-9a-fA-F]{w}|[-+ ]?0[bB][01]{w}|[-+ ]?0[oO][0-7]{w}'.format(w=width) - self._type_conversions[group] = int_convert() # do not specify number base, determine it automatically + s = r'\d{w}|[-+ ]?0[xX][0-9a-fA-F]{w}|[-+ ]?0[bB][01]{w}|[-+ ]?0[oO][0-7]{w}'.format( + w=width + ) + self._type_conversions[ + group + ] = int_convert() # do not specify number base, determine it automatically elif type == 'ti': - s = r'(\d{4}-\d\d-\d\d)((\s+|T)%s)?(Z|\s*[-+]\d\d:?\d\d)?' % \ - TIME_PAT + s = r'(\d{4}-\d\d-\d\d)((\s+|T)%s)?(Z|\s*[-+]\d\d:?\d\d)?' % TIME_PAT n = self._group_index - self._type_conversions[group] = partial(date_convert, ymd=n + 1, - hms=n + 4, tz=n + 7) + self._type_conversions[group] = partial( + date_convert, ymd=n + 1, hms=n + 4, tz=n + 7 + ) self._group_index += 7 elif type == 'tg': s = r'(\d{1,2}[-/](\d{1,2}|%s)[-/]\d{4})(\s+%s)?%s?%s?' % ( - ALL_MONTHS_PAT, TIME_PAT, AM_PAT, TZ_PAT) + ALL_MONTHS_PAT, + TIME_PAT, + AM_PAT, + TZ_PAT, + ) n = self._group_index - self._type_conversions[group] = partial(date_convert, dmy=n + 1, - hms=n + 5, am=n + 8, tz=n + 9) + self._type_conversions[group] = partial( + date_convert, dmy=n + 1, hms=n + 5, am=n + 8, tz=n + 9 + ) self._group_index += 9 elif type == 'ta': s = r'((\d{1,2}|%s)[-/]\d{1,2}[-/]\d{4})(\s+%s)?%s?%s?' % ( - ALL_MONTHS_PAT, TIME_PAT, AM_PAT, TZ_PAT) + ALL_MONTHS_PAT, + TIME_PAT, + AM_PAT, + TZ_PAT, + ) n = self._group_index - self._type_conversions[group] = partial(date_convert, mdy=n + 1, - hms=n + 5, am=n + 8, tz=n + 9) + self._type_conversions[group] = partial( + date_convert, mdy=n + 1, hms=n + 5, am=n + 8, tz=n + 9 + ) self._group_index += 9 elif type == 'te': # this will allow microseconds through if they're present, but meh - s = r'(%s,\s+)?(\d{1,2}\s+%s\s+\d{4})\s+%s%s' % (DAYS_PAT, - MONTHS_PAT, TIME_PAT, TZ_PAT) + s = r'(%s,\s+)?(\d{1,2}\s+%s\s+\d{4})\s+%s%s' % ( + DAYS_PAT, + MONTHS_PAT, + TIME_PAT, + TZ_PAT, + ) n = self._group_index - self._type_conversions[group] = partial(date_convert, dmy=n + 3, - hms=n + 5, tz=n + 8) + self._type_conversions[group] = partial( + date_convert, dmy=n + 3, hms=n + 5, tz=n + 8 + ) self._group_index += 8 elif type == 'th': # slight flexibility here from the stock Apache format - s = r'(\d{1,2}[-/]%s[-/]\d{4}):%s%s' % (MONTHS_PAT, TIME_PAT, - TZ_PAT) + s = r'(\d{1,2}[-/]%s[-/]\d{4}):%s%s' % (MONTHS_PAT, TIME_PAT, TZ_PAT) n = self._group_index - self._type_conversions[group] = partial(date_convert, dmy=n + 1, - hms=n + 3, tz=n + 6) + self._type_conversions[group] = partial( + date_convert, dmy=n + 1, hms=n + 3, tz=n + 6 + ) self._group_index += 6 elif type == 'tc': s = r'(%s)\s+%s\s+(\d{1,2})\s+%s\s+(\d{4})' % ( - DAYS_PAT, MONTHS_PAT, TIME_PAT) + DAYS_PAT, + MONTHS_PAT, + TIME_PAT, + ) n = self._group_index - self._type_conversions[group] = partial(date_convert, - d_m_y=(n + 4, n + 3, n + 8), hms=n + 5) + self._type_conversions[group] = partial( + date_convert, d_m_y=(n + 4, n + 3, n + 8), hms=n + 5 + ) self._group_index += 8 elif type == 'tt': s = r'%s?%s?%s?' % (TIME_PAT, AM_PAT, TZ_PAT) n = self._group_index - self._type_conversions[group] = partial(date_convert, hms=n + 1, - am=n + 4, tz=n + 5) + self._type_conversions[group] = partial( + date_convert, hms=n + 1, am=n + 4, tz=n + 5 + ) self._group_index += 5 elif type == 'ts': s = r'%s(\s+)(\d+)(\s+)(\d{1,2}:\d{1,2}:\d{1,2})?' % MONTHS_PAT n = self._group_index - self._type_conversions[group] = partial(date_convert, mm=n+1, dd=n+3, - hms=n + 5) + self._type_conversions[group] = partial( + date_convert, mm=n + 1, dd=n + 3, hms=n + 5 + ) self._group_index += 5 elif type == 'l': s = r'[A-Za-z]+' @@ -1190,24 +1247,25 @@ def _handle_field(self, field): # align "=" has been handled if align == '<': - s = '%s%s*' % (s, fill) + s = '%s%s+' % (s, fill) elif align == '>': s = '%s*%s' % (fill, s) elif align == '^': - s = '%s*%s%s*' % (fill, s, fill) + s = '%s*%s%s+' % (fill, s, fill) return s class Result(object): - '''The result of a parse() or search(). + """The result of a parse() or search(). Fixed results may be looked up using `result[index]`. Named results may be looked up using `result['name']`. Named results may be tested for existence using `'name' in result`. - ''' + """ + def __init__(self, fixed, named, spans): self.fixed = fixed self.named = named @@ -1219,19 +1277,19 @@ def __getitem__(self, item): return self.named[item] def __repr__(self): - return '<%s %r %r>' % (self.__class__.__name__, self.fixed, - self.named) + return '<%s %r %r>' % (self.__class__.__name__, self.fixed, self.named) def __contains__(self, name): return name in self.named class Match(object): - '''The result of a parse() or search() if no results are generated. + """The result of a parse() or search() if no results are generated. This class is only used to expose internal used regex match objects to the user and use them for external Parser.evaluate_result calls. - ''' + """ + def __init__(self, parser, match): self.parser = parser self.match = match @@ -1242,10 +1300,11 @@ def evaluate_result(self): class ResultIterator(object): - '''The result of a findall() operation. + """The result of a findall() operation. Each element is a Result instance. - ''' + """ + def __init__(self, parser, string, pos, endpos, evaluate_result=True): self.parser = parser self.string = string @@ -1272,7 +1331,7 @@ def __next__(self): def parse(format, string, extra_types=None, evaluate_result=True, case_sensitive=False): - '''Using "format" attempt to pull values from "string". + """Using "format" attempt to pull values from "string". The format must match the string contents exactly. If the value you're looking for is instead just a part of the string use @@ -1296,14 +1355,21 @@ def parse(format, string, extra_types=None, evaluate_result=True, case_sensitive See the module documentation for the use of "extra_types". In the case there is no match parse() will return None. - ''' + """ p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive) return p.parse(string, evaluate_result=evaluate_result) -def search(format, string, pos=0, endpos=None, extra_types=None, evaluate_result=True, - case_sensitive=False): - '''Search "string" for the first occurrence of "format". +def search( + format, + string, + pos=0, + endpos=None, + extra_types=None, + evaluate_result=True, + case_sensitive=False, +): + """Search "string" for the first occurrence of "format". The format may occur anywhere within the string. If instead you wish for the format to exactly match the string @@ -1330,14 +1396,21 @@ def search(format, string, pos=0, endpos=None, extra_types=None, evaluate_result See the module documentation for the use of "extra_types". In the case there is no match parse() will return None. - ''' + """ p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive) return p.search(string, pos, endpos, evaluate_result=evaluate_result) -def findall(format, string, pos=0, endpos=None, extra_types=None, evaluate_result=True, - case_sensitive=False): - '''Search "string" for all occurrences of "format". +def findall( + format, + string, + pos=0, + endpos=None, + extra_types=None, + evaluate_result=True, + case_sensitive=False, +): + """Search "string" for all occurrences of "format". You will be returned an iterator that holds Result instances for each format match found. @@ -1361,13 +1434,13 @@ def findall(format, string, pos=0, endpos=None, extra_types=None, evaluate_resul If the format is invalid a ValueError will be raised. See the module documentation for the use of "extra_types". - ''' + """ p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive) return p.findall(string, pos, endpos, evaluate_result=evaluate_result) def compile(format, extra_types=None, case_sensitive=False): - '''Create a Parser instance to parse "format". + """Create a Parser instance to parse "format". The resultant Parser has a method .parse(string) which behaves in the same manner as parse(format, string). @@ -1381,7 +1454,7 @@ def compile(format, extra_types=None, case_sensitive=False): See the module documentation for the use of "extra_types". Returns a Parser instance. - ''' + """ return Parser(format, extra_types=extra_types, case_sensitive=case_sensitive) diff --git a/test_parse.py b/test_parse.py index 537370b..8087382 100755 --- a/test_parse.py +++ b/test_parse.py @@ -30,37 +30,26 @@ def test_fixed(self): def test_named(self): # pull a named string out of another string self._test_expression('{name}', r'(?P.+?)') - self._test_expression('{name} {other}', - r'(?P.+?) (?P.+?)') + self._test_expression('{name} {other}', r'(?P.+?) (?P.+?)') def test_named_typed(self): # pull a named string out of another string self._test_expression('{name:w}', r'(?P\w+)') - self._test_expression('{name:w} {other:w}', - r'(?P\w+) (?P\w+)') - - def test_beaker(self): - # skip some trailing whitespace - self._test_expression('{:<}', r'(.+?) *') - - def test_left_fill(self): - # skip some trailing periods - self._test_expression('{:.<}', r'(.+?)\.*') + self._test_expression('{name:w} {other:w}', r'(?P\w+) (?P\w+)') def test_bird(self): # skip some trailing whitespace self._test_expression('{:>}', r' *(.+?)') - def test_center(self): - # skip some surrounding whitespace - self._test_expression('{:^}', r' *(.+?) *') - def test_format_variety(self): def _(fmt, matches): d = parse.extract_format(fmt, {'spam': 'spam'}) for k in matches: - self.assertEqual(d.get(k), matches[k], - 'm["%s"]=%r, expect %r' % (k, d.get(k), matches[k])) + self.assertEqual( + d.get(k), + matches[k], + 'm["%s"]=%r, expect %r' % (k, d.get(k), matches[k]), + ) for t in '%obxegfdDwWsS': _(t, dict(type=t)) @@ -77,8 +66,7 @@ def _(fmt, matches): _('ti', dict(type='ti')) _('spam', dict(type='spam')) - _('.^010d', dict(type='d', width='10', align='^', fill='.', - zero=True)) + _('.^010d', dict(type='d', width='10', align='^', fill='.', zero=True)) _('.2f', dict(type='f', precision='2')) _('10.2f', dict(type='f', width='10', precision='2')) @@ -104,8 +92,9 @@ def test_dot_separated_fields_name_collisions(self): assert res.named['a___b'] == 'd' def test_invalid_groupnames_are_handled_gracefully(self): - self.assertRaises(NotImplementedError, parse.parse, - "{hello['world']}", "doesn't work") + self.assertRaises( + NotImplementedError, parse.parse, "{hello['world']}", "doesn't work" + ) class TestResult(unittest.TestCase): @@ -144,7 +133,7 @@ def test_no_evaluate_result(self): # pull a fixed value out of string match = parse.parse('hello {}', 'hello world', evaluate_result=False) r = match.evaluate_result() - self.assertEqual(r.fixed, ('world', )) + self.assertEqual(r.fixed, ('world',)) def test_regular_expression(self): # match an actual regular expression @@ -179,22 +168,22 @@ def test_hexadecimal(self): def test_fixed(self): # pull a fixed value out of string r = parse.parse('hello {}', 'hello world') - self.assertEqual(r.fixed, ('world', )) + self.assertEqual(r.fixed, ('world',)) def test_left(self): # pull left-aligned text out of string r = parse.parse('{:<} world', 'hello world') - self.assertEqual(r.fixed, ('hello', )) + self.assertEqual(r.fixed, ('hello',)) def test_right(self): # pull right-aligned text out of string r = parse.parse('hello {:>}', 'hello world') - self.assertEqual(r.fixed, ('world', )) + self.assertEqual(r.fixed, ('world',)) def test_center(self): # pull center-aligned text out of string r = parse.parse('hello {:^} world', 'hello there world') - self.assertEqual(r.fixed, ('there', )) + self.assertEqual(r.fixed, ('there',)) def test_typed(self): # pull a named, typed values out of string @@ -206,20 +195,22 @@ def test_typed(self): def test_precision(self): # pull a float out of a string r = parse.parse('Pi = {:.7f}', 'Pi = 3.1415926') - self.assertEqual(r.fixed, (3.1415926, )) + self.assertEqual(r.fixed, (3.1415926,)) r = parse.parse('Pi/10 = {:8.5f}', 'Pi/10 = 0.31415') - self.assertEqual(r.fixed, (0.31415, )) + self.assertEqual(r.fixed, (0.31415,)) # float may have not leading zero r = parse.parse('Pi/10 = {:8.5f}', 'Pi/10 = .31415') - self.assertEqual(r.fixed, (0.31415, )) + self.assertEqual(r.fixed, (0.31415,)) r = parse.parse('Pi/10 = {:8.5f}', 'Pi/10 = -.31415') - self.assertEqual(r.fixed, (-0.31415, )) + self.assertEqual(r.fixed, (-0.31415,)) def test_custom_type(self): # use a custom type - r = parse.parse('{:shouty} {:spam}', 'hello world', - dict(shouty=lambda s: s.upper(), - spam=lambda s: ''.join(reversed(s)))) + r = parse.parse( + '{:shouty} {:spam}', + 'hello world', + dict(shouty=lambda s: s.upper(), spam=lambda s: ''.join(reversed(s))), + ) self.assertEqual(r.fixed, ('HELLO', 'dlrow')) r = parse.parse('{:d}', '12', dict(d=lambda s: int(s) * 2)) self.assertEqual(r.fixed, (24,)) @@ -228,8 +219,7 @@ def test_custom_type(self): def test_typed_fail(self): # pull a named, typed values out of string - self.assertEqual(parse.parse('hello {:d} {:w}', 'hello people 12'), - None) + self.assertEqual(parse.parse('hello {:d} {:w}', 'hello people 12'), None) def test_named(self): # pull a named value out of string @@ -258,13 +248,11 @@ def test_named_repeated_type_fail_value(self): def test_named_repeated_type_mismatch(self): # test repeated name with mismatched type - self.assertRaises(parse.RepeatedNameError, parse.compile, - '{n:d} {n:w}') + self.assertRaises(parse.RepeatedNameError, parse.compile, '{n:d} {n:w}') def test_mixed(self): # pull a fixed and named values out of string - r = parse.parse('hello {} {name} {} {spam}', - 'hello world and other beings') + r = parse.parse('hello {} {name} {} {spam}', 'hello world and other beings') self.assertEqual(r.fixed, ('world', 'other')) self.assertEqual(r.named, dict(name='and', spam='beings')) @@ -281,8 +269,7 @@ def test_named_aligned_typed(self): self.assertEqual(r.named, dict(number=12, things='people')) r = parse.parse('hello {number:>d} {things}', 'hello 12 people') self.assertEqual(r.named, dict(number=12, things='people')) - r = parse.parse('hello {number:^d} {things}', - 'hello 12 people') + r = parse.parse('hello {number:^d} {things}', 'hello 12 people') self.assertEqual(r.named, dict(number=12, things='people')) def test_multiline(self): @@ -311,8 +298,9 @@ def test_spans(self): string = 'hello world and other beings' r = parse.parse('hello {} {name} {} {spam}', string) - self.assertEqual(r.spans, {0: (6, 11), 'name': (12, 15), - 1: (16, 21), 'spam': (22, 28)}) + self.assertEqual( + r.spans, {0: (6, 11), 'name': (12, 15), 1: (16, 21), 'spam': (22, 28)} + ) def test_numbers(self): # pull a numbers out of a string @@ -323,15 +311,16 @@ def y(fmt, s, e, str_equals=False): self.fail('%r (%r) did not match %r' % (fmt, p._expression, s)) r = r.fixed[0] if str_equals: - self.assertEqual(str(r), str(e), - '%r found %r in %r, not %r' % (fmt, r, s, e)) + self.assertEqual( + str(r), str(e), '%r found %r in %r, not %r' % (fmt, r, s, e) + ) else: - self.assertEqual(r, e, - '%r found %r in %r, not %r' % (fmt, r, s, e)) + self.assertEqual(r, e, '%r found %r in %r, not %r' % (fmt, r, s, e)) def n(fmt, s, e): if parse.parse(fmt, s) is not None: self.fail('%r matched %r' % (fmt, s)) + y('a {:d} b', 'a 0 b', 0) y('a {:d} b', 'a 12 b', 12) y('a {:5d} b', 'a 12 b', 12) @@ -342,11 +331,11 @@ def n(fmt, s, e): y('a {:d} b', 'a 0b1000 b', 8) y('a {:d} b', 'a 0o1000 b', 512) y('a {:d} b', 'a 0x1000 b', 4096) - y('a {:d} b', 'a 0xabcdef b', 0xabcdef) + y('a {:d} b', 'a 0xabcdef b', 0xABCDEF) y('a {:%} b', 'a 100% b', 1) - y('a {:%} b', 'a 50% b', .5) - y('a {:%} b', 'a 50.1% b', .501) + y('a {:%} b', 'a 50% b', 0.5) + y('a {:%} b', 'a 50.1% b', 0.501) y('a {:n} b', 'a 100 b', 100) y('a {:n} b', 'a 1,000 b', 1000) @@ -390,9 +379,9 @@ def n(fmt, s, e): y('a {:b} b', 'a 0b1000 b', 8) y('a {:o} b', 'a 12345670 b', int('12345670', 8)) y('a {:o} b', 'a 0o12345670 b', int('12345670', 8)) - y('a {:x} b', 'a 1234567890abcdef b', 0x1234567890abcdef) + y('a {:x} b', 'a 1234567890abcdef b', 0x1234567890ABCDEF) y('a {:x} b', 'a 1234567890ABCDEF b', 0x1234567890ABCDEF) - y('a {:x} b', 'a 0x1234567890abcdef b', 0x1234567890abcdef) + y('a {:x} b', 'a 0x1234567890abcdef b', 0x1234567890ABCDEF) y('a {:x} b', 'a 0x1234567890ABCDEF b', 0x1234567890ABCDEF) y('a {:05d} b', 'a 00001 b', 1) @@ -404,7 +393,7 @@ def n(fmt, s, e): y('a {:x=5d} b', 'a xxx12 b', 12) y('a {:x=5d} b', 'a -xxx12 b', -12) - # Test that hex numbers that ambiguously start with 0b / 0B are parsed correctly + # Test that hex numbers that ambiguously start with 0b / 0B are parsed correctly # See issue #65 (https://github.com/r1chardj0n3s/parse/issues/65) y('a {:x} b', 'a 0B b', 0xB) y('a {:x} b', 'a 0B1 b', 0xB1) @@ -439,14 +428,14 @@ def y(fmt, s, e, tz=None): self.fail('%r (%r) did not match %r' % (fmt, p._expression, s)) r = r.fixed[0] try: - self.assertEqual(r, e, - '%r found %r in %r, not %r' % (fmt, r, s, e)) + self.assertEqual(r, e, '%r found %r in %r, not %r' % (fmt, r, s, e)) except ValueError: self.fail('%r found %r in %r, not %r' % (fmt, r, s, e)) if tz is not None: - self.assertEqual(r.tzinfo, tz, - '%r found TZ %r in %r, not %r' % (fmt, r.tzinfo, s, e)) + self.assertEqual( + r.tzinfo, tz, '%r found TZ %r in %r, not %r' % (fmt, r.tzinfo, s, e) + ) def n(fmt, s, e): if parse.parse(fmt, s) is not None: @@ -461,40 +450,69 @@ def n(fmt, s, e): y('a {:ti} b', 'a 1997-07-16 b', datetime(1997, 7, 16)) # YYYY-MM-DDThh:mmTZD (eg 1997-07-16T19:20+01:00) - y('a {:ti} b', 'a 1997-07-16 19:20 b', - datetime(1997, 7, 16, 19, 20, 0)) - y('a {:ti} b', 'a 1997-07-16T19:20 b', - datetime(1997, 7, 16, 19, 20, 0)) - y('a {:ti} b', 'a 1997-07-16T19:20Z b', - datetime(1997, 7, 16, 19, 20, tzinfo=utc)) - y('a {:ti} b', 'a 1997-07-16T19:20+0100 b', - datetime(1997, 7, 16, 19, 20, tzinfo=tz60)) - y('a {:ti} b', 'a 1997-07-16T19:20+01:00 b', - datetime(1997, 7, 16, 19, 20, tzinfo=tz60)) - y('a {:ti} b', 'a 1997-07-16T19:20 +01:00 b', - datetime(1997, 7, 16, 19, 20, tzinfo=tz60)) + y('a {:ti} b', 'a 1997-07-16 19:20 b', datetime(1997, 7, 16, 19, 20, 0)) + y('a {:ti} b', 'a 1997-07-16T19:20 b', datetime(1997, 7, 16, 19, 20, 0)) + y( + 'a {:ti} b', + 'a 1997-07-16T19:20Z b', + datetime(1997, 7, 16, 19, 20, tzinfo=utc), + ) + y( + 'a {:ti} b', + 'a 1997-07-16T19:20+0100 b', + datetime(1997, 7, 16, 19, 20, tzinfo=tz60), + ) + y( + 'a {:ti} b', + 'a 1997-07-16T19:20+01:00 b', + datetime(1997, 7, 16, 19, 20, tzinfo=tz60), + ) + y( + 'a {:ti} b', + 'a 1997-07-16T19:20 +01:00 b', + datetime(1997, 7, 16, 19, 20, tzinfo=tz60), + ) # YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00) - y('a {:ti} b', 'a 1997-07-16 19:20:30 b', - datetime(1997, 7, 16, 19, 20, 30)) - y('a {:ti} b', 'a 1997-07-16T19:20:30 b', - datetime(1997, 7, 16, 19, 20, 30)) - y('a {:ti} b', 'a 1997-07-16T19:20:30Z b', - datetime(1997, 7, 16, 19, 20, 30, tzinfo=utc)) - y('a {:ti} b', 'a 1997-07-16T19:20:30+01:00 b', - datetime(1997, 7, 16, 19, 20, 30, tzinfo=tz60)) - y('a {:ti} b', 'a 1997-07-16T19:20:30 +01:00 b', - datetime(1997, 7, 16, 19, 20, 30, tzinfo=tz60)) + y('a {:ti} b', 'a 1997-07-16 19:20:30 b', datetime(1997, 7, 16, 19, 20, 30)) + y('a {:ti} b', 'a 1997-07-16T19:20:30 b', datetime(1997, 7, 16, 19, 20, 30)) + y( + 'a {:ti} b', + 'a 1997-07-16T19:20:30Z b', + datetime(1997, 7, 16, 19, 20, 30, tzinfo=utc), + ) + y( + 'a {:ti} b', + 'a 1997-07-16T19:20:30+01:00 b', + datetime(1997, 7, 16, 19, 20, 30, tzinfo=tz60), + ) + y( + 'a {:ti} b', + 'a 1997-07-16T19:20:30 +01:00 b', + datetime(1997, 7, 16, 19, 20, 30, tzinfo=tz60), + ) # YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00) - y('a {:ti} b', 'a 1997-07-16 19:20:30.500000 b', - datetime(1997, 7, 16, 19, 20, 30, 500000)) - y('a {:ti} b', 'a 1997-07-16T19:20:30.500000 b', - datetime(1997, 7, 16, 19, 20, 30, 500000)) - y('a {:ti} b', 'a 1997-07-16T19:20:30.5Z b', - datetime(1997, 7, 16, 19, 20, 30, 500000, tzinfo=utc)) - y('a {:ti} b', 'a 1997-07-16T19:20:30.5+01:00 b', - datetime(1997, 7, 16, 19, 20, 30, 500000, tzinfo=tz60)) + y( + 'a {:ti} b', + 'a 1997-07-16 19:20:30.500000 b', + datetime(1997, 7, 16, 19, 20, 30, 500000), + ) + y( + 'a {:ti} b', + 'a 1997-07-16T19:20:30.500000 b', + datetime(1997, 7, 16, 19, 20, 30, 500000), + ) + y( + 'a {:ti} b', + 'a 1997-07-16T19:20:30.5Z b', + datetime(1997, 7, 16, 19, 20, 30, 500000, tzinfo=utc), + ) + y( + 'a {:ti} b', + 'a 1997-07-16T19:20:30.5+01:00 b', + datetime(1997, 7, 16, 19, 20, 30, 500000, tzinfo=tz60), + ) aest_d = datetime(2011, 11, 21, 10, 21, 36, tzinfo=aest) dt = datetime(2011, 11, 21, 10, 21, 36) @@ -530,9 +548,21 @@ def n(fmt, s, e): y('a {:ta} b', 'a November-21-2011 b', d) # ts Linux System log format datetime - y('a {:ts} b', 'a Nov 21 10:21:36 b', datetime(datetime.today().year, 11, 21, 10, 21, 36)) - y('a {:ts} b', 'a Nov 1 10:21:36 b', datetime(datetime.today().year, 11, 1, 10, 21, 36)) - y('a {:ts} b', 'a Nov 1 03:21:36 b', datetime(datetime.today().year, 11, 1, 3, 21, 36)) + y( + 'a {:ts} b', + 'a Nov 21 10:21:36 b', + datetime(datetime.today().year, 11, 21, 10, 21, 36), + ) + y( + 'a {:ts} b', + 'a Nov 1 10:21:36 b', + datetime(datetime.today().year, 11, 1, 10, 21, 36), + ) + y( + 'a {:ts} b', + 'a Nov 1 03:21:36 b', + datetime(datetime.today().year, 11, 1, 3, 21, 36), + ) # th HTTP log format date/time datetime y('a {:th} b', 'a 21/Nov/2011:10:21:36 +1000 b', aest_d) @@ -579,7 +609,8 @@ def test_datetime_group_count(self): def test_mixed_types(self): # stress-test: pull one of everything out of a string - r = parse.parse(''' + r = parse.parse( + ''' letters: {:w} non-letters: {:W} whitespace: "{:s}" @@ -602,7 +633,7 @@ def test_mixed_types(self): time: {:tt} final value: {} ''', - ''' + ''' letters: abcdef_GHIJLK non-letters: !@#%$ *^% whitespace: " \t\n" @@ -624,12 +655,14 @@ def test_mixed_types(self): HTTP e.g. 21/Nov/2011:00:07:11 +0000 time: 10:21:36 PM -5:30 final value: spam - ''') + ''', + ) self.assertNotEqual(r, None) self.assertEqual(r.fixed[22], 'spam') def test_mixed_type_variant(self): - r = parse.parse(''' + r = parse.parse( + ''' letters: {:w} non-letters: {:W} whitespace: "{:s}" @@ -652,7 +685,7 @@ def test_mixed_type_variant(self): time: {:tt} final value: {} ''', - ''' + ''' letters: abcdef_GHIJLK non-letters: !@#%$ *^% whitespace: " \t\n" @@ -674,7 +707,8 @@ def test_mixed_type_variant(self): HTTP e.g. 21/Nov/2011:00:07:11 +0000 time: 10:21:36 PM -5:30 final value: spam - ''') + ''', + ) self.assertNotEqual(r, None) self.assertEqual(r.fixed[21], 'spam') @@ -691,7 +725,7 @@ def test_letters(self): res = parse.parse('{:l}', '') self.assertIsNone(res) res = parse.parse('{:l}', 'sPaM') - self.assertEqual(res.fixed, ('sPaM', )) + self.assertEqual(res.fixed, ('sPaM',)) res = parse.parse('{:l}', 'sP4M') self.assertIsNone(res) res = parse.parse('{:l}', 'sP_M') @@ -715,7 +749,9 @@ def test_pos(self): self.assertEqual(r, None) def test_no_evaluate_result(self): - match = parse.search('age: {:d}\n', 'name: Rufus\nage: 42\ncolor: red\n', evaluate_result=False) + match = parse.search( + 'age: {:d}\n', 'name: Rufus\nage: 42\ncolor: red\n', evaluate_result=False + ) r = match.evaluate_result() self.assertEqual(r.fixed, (42,)) @@ -723,14 +759,19 @@ def test_no_evaluate_result(self): class TestFindall(unittest.TestCase): def test_findall(self): # basic findall() test - s = ''.join(r.fixed[0] for r in parse.findall(">{}<", - "

some bold text

")) + s = ''.join( + r.fixed[0] for r in parse.findall(">{}<", "

some bold text

") + ) self.assertEqual(s, "some bold text") def test_no_evaluate_result(self): # basic findall() test - s = ''.join(m.evaluate_result().fixed[0] for m in parse.findall(">{}<", - "

some bold text

", evaluate_result=False)) + s = ''.join( + m.evaluate_result().fixed[0] + for m in parse.findall( + ">{}<", "

some bold text

", evaluate_result=False + ) + ) self.assertEqual(s, "some bold text") def test_case_sensitivity(self): @@ -789,15 +830,16 @@ def parse_number(text): return int(text) # -- CASE: Use named (OK) - type_map = dict(Name=parse_word_and_covert_to_uppercase, - Number=parse_number) - r = parse.parse('Hello {name:Name} {number:Number}', - 'Hello Alice 42', extra_types=type_map) + type_map = dict(Name=parse_word_and_covert_to_uppercase, Number=parse_number) + r = parse.parse( + 'Hello {name:Name} {number:Number}', 'Hello Alice 42', extra_types=type_map + ) self.assertEqual(r.named, dict(name='ALICE', number=42)) # -- CASE: Use unnamed/fixed (problematic) - r = parse.parse('Hello {:Name} {:Number}', - 'Hello Alice 42', extra_types=type_map) + r = parse.parse( + 'Hello {:Name} {:Number}', 'Hello Alice 42', extra_types=type_map + ) self.assertEqual(r[0], 'ALICE') self.assertEqual(r[1], 42) @@ -810,11 +852,23 @@ def test_pickling_bug_110(self): # prior to the fix, this would raise an AttributeError pickle.dumps(p) + def test_search_centered_bug_112(self): + r = parse.parse("{:^},{:^}", " 12 , 34 ") + self.assertEqual(r[1], "34") + r = parse.search("{:^},{:^}", " 12 , 34 ") + self.assertEqual(r[1], "34") + + def test_search_left_align_bug_112(self): + r = parse.parse("{:<},{:<}", "12 ,34 ") + self.assertEqual(r[1], "34") + r = parse.search("{:<},{:<}", "12 ,34 ") + self.assertEqual(r[1], "34") + + # ----------------------------------------------------------------------------- # TEST SUPPORT FOR: TestParseType # ----------------------------------------------------------------------------- class TestParseType(unittest.TestCase): - def assert_match(self, parser, text, param_name, expected): result = parser.parse(text) self.assertEqual(result[param_name], expected) @@ -834,8 +888,9 @@ def assert_fixed_mismatch(self, parser, text): def test_pattern_should_be_used(self): def parse_number(text): return int(text) + parse_number.pattern = r"\d+" - parse_number.name = "Number" # For testing only. + parse_number.name = "Number" # For testing only. extra_types = {parse_number.name: parse_number} format = "Value is {number:Number} and..." @@ -849,13 +904,17 @@ def parse_number(text): def test_pattern_should_be_used2(self): def parse_yesno(text): return parse_yesno.mapping[text.lower()] + parse_yesno.mapping = { - "yes": True, "no": False, - "on": True, "off": False, - "true": True, "false": False, + "yes": True, + "no": False, + "on": True, + "off": False, + "true": True, + "false": False, } parse_yesno.pattern = r"|".join(parse_yesno.mapping.keys()) - parse_yesno.name = "YesNo" # For testing only. + parse_yesno.name = "YesNo" # For testing only. extra_types = {parse_yesno.name: parse_yesno} format = "Answer: {answer:YesNo}" @@ -928,7 +987,7 @@ def parse_number(text): (2, IndexError), ] for bad_regex_group_count, error_class in BAD_REGEX_GROUP_COUNTS_AND_ERRORS: - parse_unit.regex_group_count = bad_regex_group_count # -- OVERRIDE-HERE + parse_unit.regex_group_count = bad_regex_group_count # -- OVERRIDE-HERE type_converters = dict(Number=parse_number, Unit=parse_unit) parser = parse.Parser('test {:Unit}-{:Number}', type_converters) self.assertRaises(error_class, parser.parse, 'test meter-10') @@ -940,7 +999,8 @@ def test_with_pattern_and_regex_group_count_is_none(self): @parse.with_pattern(r'[ab]') def parse_data(text): return data_values[text] - parse_data.regex_group_count = None # ENFORCE: None + + parse_data.regex_group_count = None # ENFORCE: None # -- CASE: Unnamed-params parser = parse.Parser('test {:Data}', {'Data': parse_data}) @@ -957,7 +1017,9 @@ def parse_data(text): def test_case_sensitivity(self): r = parse.parse('SPAM {} SPAM', 'spam spam spam') self.assertEqual(r[0], 'spam') - self.assertEqual(parse.parse('SPAM {} SPAM', 'spam spam spam', case_sensitive=True), None) + self.assertEqual( + parse.parse('SPAM {} SPAM', 'spam spam spam', case_sensitive=True), None + ) def test_decimal_value(self): value = Decimal('5.5') @@ -975,7 +1037,7 @@ def test_width_str(self): def test_width_constraints(self): res = parse.parse('{:4}', 'looky') - self.assertEqual(res.fixed, ('looky', )) + self.assertEqual(res.fixed, ('looky',)) res = parse.parse('{:4.4}', 'looky') self.assertIsNone(res) res = parse.parse('{:4.4}', 'ook')