1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 __doc__ = \
27 """
28 pyparsing module - Classes and methods to define and execute parsing grammars
29 =============================================================================
30
31 The pyparsing module is an alternative approach to creating and
32 executing simple grammars, vs. the traditional lex/yacc approach, or the
33 use of regular expressions. With pyparsing, you don't need to learn
34 a new syntax for defining grammars or matching expressions - the parsing
35 module provides a library of classes that you use to construct the
36 grammar directly in Python.
37
38 Here is a program to parse "Hello, World!" (or any greeting of the form
39 ``"<salutation>, <addressee>!"``), built up using :class:`Word`,
40 :class:`Literal`, and :class:`And` elements
41 (the :class:`'+'<ParserElement.__add__>` operators create :class:`And` expressions,
42 and the strings are auto-converted to :class:`Literal` expressions)::
43
44 from pyparsing import Word, alphas
45
46 # define grammar of a greeting
47 greet = Word(alphas) + "," + Word(alphas) + "!"
48
49 hello = "Hello, World!"
50 print (hello, "->", greet.parseString(hello))
51
52 The program outputs the following::
53
54 Hello, World! -> ['Hello', ',', 'World', '!']
55
56 The Python representation of the grammar is quite readable, owing to the
57 self-explanatory class names, and the use of '+', '|' and '^' operators.
58
59 The :class:`ParseResults` object returned from
60 :class:`ParserElement.parseString` can be
61 accessed as a nested list, a dictionary, or an object with named
62 attributes.
63
64 The pyparsing module handles some of the problems that are typically
65 vexing when writing text parsers:
66
67 - extra or missing whitespace (the above program will also handle
68 "Hello,World!", "Hello , World !", etc.)
69 - quoted strings
70 - embedded comments
71
72
73 Getting Started -
74 -----------------
75 Visit the classes :class:`ParserElement` and :class:`ParseResults` to
76 see the base classes that most other pyparsing
77 classes inherit from. Use the docstrings for examples of how to:
78
79 - construct literal match expressions from :class:`Literal` and
80 :class:`CaselessLiteral` classes
81 - construct character word-group expressions using the :class:`Word`
82 class
83 - see how to create repetitive expressions using :class:`ZeroOrMore`
84 and :class:`OneOrMore` classes
85 - use :class:`'+'<And>`, :class:`'|'<MatchFirst>`, :class:`'^'<Or>`,
86 and :class:`'&'<Each>` operators to combine simple expressions into
87 more complex ones
88 - associate names with your parsed results using
89 :class:`ParserElement.setResultsName`
90 - access the parsed data, which is returned as a :class:`ParseResults`
91 object
92 - find some helpful expression short-cuts like :class:`delimitedList`
93 and :class:`oneOf`
94 - find more useful common expressions in the :class:`pyparsing_common`
95 namespace class
96 """
97
98 __version__ = "2.4.6"
99 __versionTime__ = "24 Dec 2019 04:27 UTC"
100 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
101
102 import string
103 from weakref import ref as wkref
104 import copy
105 import sys
106 import warnings
107 import re
108 import sre_constants
109 import collections
110 import pprint
111 import traceback
112 import types
113 from datetime import datetime
114 from operator import itemgetter
115 import itertools
116 from functools import wraps
117 from contextlib import contextmanager
118
119 try:
120
121 from itertools import filterfalse
122 except ImportError:
123 from itertools import ifilterfalse as filterfalse
124
125 try:
126 from _thread import RLock
127 except ImportError:
128 from threading import RLock
129
130 try:
131
132 from collections.abc import Iterable
133 from collections.abc import MutableMapping, Mapping
134 except ImportError:
135
136 from collections import Iterable
137 from collections import MutableMapping, Mapping
138
139 try:
140 from collections import OrderedDict as _OrderedDict
141 except ImportError:
142 try:
143 from ordereddict import OrderedDict as _OrderedDict
144 except ImportError:
145 _OrderedDict = None
146
147 try:
148 from types import SimpleNamespace
149 except ImportError:
151
152
153 __compat__ = SimpleNamespace()
154 __compat__.__doc__ = """
155 A cross-version compatibility configuration for pyparsing features that will be
156 released in a future version. By setting values in this configuration to True,
157 those features can be enabled in prior versions for compatibility development
158 and testing.
159
160 - collect_all_And_tokens - flag to enable fix for Issue #63 that fixes erroneous grouping
161 of results names when an And expression is nested within an Or or MatchFirst; set to
162 True to enable bugfix released in pyparsing 2.3.0, or False to preserve
163 pre-2.3.0 handling of named results
164 """
165 __compat__.collect_all_And_tokens = True
166
167 __diag__ = SimpleNamespace()
168 __diag__.__doc__ = """
169 Diagnostic configuration (all default to False)
170 - warn_multiple_tokens_in_named_alternation - flag to enable warnings when a results
171 name is defined on a MatchFirst or Or expression with one or more And subexpressions
172 (only warns if __compat__.collect_all_And_tokens is False)
173 - warn_ungrouped_named_tokens_in_collection - flag to enable warnings when a results
174 name is defined on a containing expression with ungrouped subexpressions that also
175 have results names
176 - warn_name_set_on_empty_Forward - flag to enable warnings whan a Forward is defined
177 with a results name, but has no contents defined
178 - warn_on_multiple_string_args_to_oneof - flag to enable warnings whan oneOf is
179 incorrectly called with multiple str arguments
180 - enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent
181 calls to ParserElement.setName()
182 """
183 __diag__.warn_multiple_tokens_in_named_alternation = False
184 __diag__.warn_ungrouped_named_tokens_in_collection = False
185 __diag__.warn_name_set_on_empty_Forward = False
186 __diag__.warn_on_multiple_string_args_to_oneof = False
187 __diag__.enable_debug_on_named_expressions = False
188 __diag__._all_names = [nm for nm in vars(__diag__) if nm.startswith("enable_") or nm.startswith("warn_")]
191 __diag__.warn_multiple_tokens_in_named_alternation = True
192 __diag__.warn_ungrouped_named_tokens_in_collection = True
193 __diag__.warn_name_set_on_empty_Forward = True
194 __diag__.warn_on_multiple_string_args_to_oneof = True
195 __diag__.enable_all_warnings = _enable_all_warnings
196
197
198 __all__ = ['__version__', '__versionTime__', '__author__', '__compat__', '__diag__',
199 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
200 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
201 'PrecededBy', 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
202 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
203 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
204 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
205 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 'Char',
206 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
207 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
208 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
209 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
210 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
211 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
212 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
213 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
214 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
215 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation', 'locatedExpr', 'withClass',
216 'CloseMatch', 'tokenMap', 'pyparsing_common', 'pyparsing_unicode', 'unicode_set',
217 'conditionAsParseAction', 're',
218 ]
219
220 system_version = tuple(sys.version_info)[:3]
221 PY_3 = system_version[0] == 3
222 if PY_3:
223 _MAX_INT = sys.maxsize
224 basestring = str
225 unichr = chr
226 unicode = str
227 _ustr = str
228
229
230 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
231
232 else:
233 _MAX_INT = sys.maxint
234 range = xrange
237 """Drop-in replacement for str(obj) that tries to be Unicode
238 friendly. It first tries str(obj). If that fails with
239 a UnicodeEncodeError, then it tries unicode(obj). It then
240 < returns the unicode object | encodes it with the default
241 encoding | ... >.
242 """
243 if isinstance(obj, unicode):
244 return obj
245
246 try:
247
248
249 return str(obj)
250
251 except UnicodeEncodeError:
252
253 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
254 xmlcharref = Regex(r'&#\d+;')
255 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
256 return xmlcharref.transformString(ret)
257
258
259 singleArgBuiltins = []
260 import __builtin__
261
262 for fname in "sum len sorted reversed list tuple set any all min max".split():
263 try:
264 singleArgBuiltins.append(getattr(__builtin__, fname))
265 except AttributeError:
266 continue
267
268 _generatorType = type((y for y in range(1)))
271 """Escape &, <, >, ", ', etc. in a string of data."""
272
273
274 from_symbols = '&><"\''
275 to_symbols = ('&' + s + ';' for s in "amp gt lt quot apos".split())
276 for from_, to_ in zip(from_symbols, to_symbols):
277 data = data.replace(from_, to_)
278 return data
279
280 alphas = string.ascii_uppercase + string.ascii_lowercase
281 nums = "0123456789"
282 hexnums = nums + "ABCDEFabcdef"
283 alphanums = alphas + nums
284 _bslash = chr(92)
285 printables = "".join(c for c in string.printable if c not in string.whitespace)
289 msg = message if message is not None else "failed user-defined condition"
290 exc_type = ParseFatalException if fatal else ParseException
291 fn = _trim_arity(fn)
292
293 @wraps(fn)
294 def pa(s, l, t):
295 if not bool(fn(s, l, t)):
296 raise exc_type(s, l, msg)
297
298 return pa
299
301 """base exception class for all parsing runtime exceptions"""
302
303
304 - def __init__(self, pstr, loc=0, msg=None, elem=None):
305 self.loc = loc
306 if msg is None:
307 self.msg = pstr
308 self.pstr = ""
309 else:
310 self.msg = msg
311 self.pstr = pstr
312 self.parserElement = elem
313 self.args = (pstr, loc, msg)
314
315 @classmethod
317 """
318 internal factory method to simplify creating one type of ParseException
319 from another - avoids having __init__ signature conflicts among subclasses
320 """
321 return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
322
324 """supported attributes by name are:
325 - lineno - returns the line number of the exception text
326 - col - returns the column number of the exception text
327 - line - returns the line containing the exception text
328 """
329 if aname == "lineno":
330 return lineno(self.loc, self.pstr)
331 elif aname in ("col", "column"):
332 return col(self.loc, self.pstr)
333 elif aname == "line":
334 return line(self.loc, self.pstr)
335 else:
336 raise AttributeError(aname)
337
339 if self.pstr:
340 if self.loc >= len(self.pstr):
341 foundstr = ', found end of text'
342 else:
343 foundstr = (', found %r' % self.pstr[self.loc:self.loc + 1]).replace(r'\\', '\\')
344 else:
345 foundstr = ''
346 return ("%s%s (at char %d), (line:%d, col:%d)" %
347 (self.msg, foundstr, self.loc, self.lineno, self.column))
361 return "lineno col line".split() + dir(type(self))
362
364 """
365 Exception thrown when parse expressions don't match class;
366 supported attributes by name are:
367 - lineno - returns the line number of the exception text
368 - col - returns the column number of the exception text
369 - line - returns the line containing the exception text
370
371 Example::
372
373 try:
374 Word(nums).setName("integer").parseString("ABC")
375 except ParseException as pe:
376 print(pe)
377 print("column: {}".format(pe.col))
378
379 prints::
380
381 Expected integer (at char 0), (line:1, col:1)
382 column: 1
383
384 """
385
386 @staticmethod
388 """
389 Method to take an exception and translate the Python internal traceback into a list
390 of the pyparsing expressions that caused the exception to be raised.
391
392 Parameters:
393
394 - exc - exception raised during parsing (need not be a ParseException, in support
395 of Python exceptions that might be raised in a parse action)
396 - depth (default=16) - number of levels back in the stack trace to list expression
397 and function names; if None, the full stack trace names will be listed; if 0, only
398 the failing input line, marker, and exception string will be shown
399
400 Returns a multi-line string listing the ParserElements and/or function names in the
401 exception's stack trace.
402
403 Note: the diagnostic output will include string representations of the expressions
404 that failed to parse. These representations will be more helpful if you use `setName` to
405 give identifiable names to your expressions. Otherwise they will use the default string
406 forms, which may be cryptic to read.
407
408 explain() is only supported under Python 3.
409 """
410 import inspect
411
412 if depth is None:
413 depth = sys.getrecursionlimit()
414 ret = []
415 if isinstance(exc, ParseBaseException):
416 ret.append(exc.line)
417 ret.append(' ' * (exc.col - 1) + '^')
418 ret.append("{0}: {1}".format(type(exc).__name__, exc))
419
420 if depth > 0:
421 callers = inspect.getinnerframes(exc.__traceback__, context=depth)
422 seen = set()
423 for i, ff in enumerate(callers[-depth:]):
424 frm = ff[0]
425
426 f_self = frm.f_locals.get('self', None)
427 if isinstance(f_self, ParserElement):
428 if frm.f_code.co_name not in ('parseImpl', '_parseNoCache'):
429 continue
430 if f_self in seen:
431 continue
432 seen.add(f_self)
433
434 self_type = type(f_self)
435 ret.append("{0}.{1} - {2}".format(self_type.__module__,
436 self_type.__name__,
437 f_self))
438 elif f_self is not None:
439 self_type = type(f_self)
440 ret.append("{0}.{1}".format(self_type.__module__,
441 self_type.__name__))
442 else:
443 code = frm.f_code
444 if code.co_name in ('wrapper', '<module>'):
445 continue
446
447 ret.append("{0}".format(code.co_name))
448
449 depth -= 1
450 if not depth:
451 break
452
453 return '\n'.join(ret)
454
457 """user-throwable exception thrown when inconsistent parse content
458 is found; stops all parsing immediately"""
459 pass
460
462 """just like :class:`ParseFatalException`, but thrown internally
463 when an :class:`ErrorStop<And._ErrorStop>` ('-' operator) indicates
464 that parsing is to stop immediately because an unbacktrackable
465 syntax error has been found.
466 """
467 pass
468
483 """exception thrown by :class:`ParserElement.validate` if the
484 grammar could be improperly recursive
485 """
487 self.parseElementTrace = parseElementList
488
490 return "RecursiveGrammarException: %s" % self.parseElementTrace
491
498 return repr(self.tup[0])
500 self.tup = (self.tup[0], i)
501
503 """Structured parse results, to provide multiple means of access to
504 the parsed data:
505
506 - as a list (``len(results)``)
507 - by list index (``results[0], results[1]``, etc.)
508 - by attribute (``results.<resultsName>`` - see :class:`ParserElement.setResultsName`)
509
510 Example::
511
512 integer = Word(nums)
513 date_str = (integer.setResultsName("year") + '/'
514 + integer.setResultsName("month") + '/'
515 + integer.setResultsName("day"))
516 # equivalent form:
517 # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
518
519 # parseString returns a ParseResults object
520 result = date_str.parseString("1999/12/31")
521
522 def test(s, fn=repr):
523 print("%s -> %s" % (s, fn(eval(s))))
524 test("list(result)")
525 test("result[0]")
526 test("result['month']")
527 test("result.day")
528 test("'month' in result")
529 test("'minutes' in result")
530 test("result.dump()", str)
531
532 prints::
533
534 list(result) -> ['1999', '/', '12', '/', '31']
535 result[0] -> '1999'
536 result['month'] -> '12'
537 result.day -> '31'
538 'month' in result -> True
539 'minutes' in result -> False
540 result.dump() -> ['1999', '/', '12', '/', '31']
541 - day: 31
542 - month: 12
543 - year: 1999
544 """
545 - def __new__(cls, toklist=None, name=None, asList=True, modal=True):
546 if isinstance(toklist, cls):
547 return toklist
548 retobj = object.__new__(cls)
549 retobj.__doinit = True
550 return retobj
551
552
553
554 - def __init__(self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance):
555 if self.__doinit:
556 self.__doinit = False
557 self.__name = None
558 self.__parent = None
559 self.__accumNames = {}
560 self.__asList = asList
561 self.__modal = modal
562 if toklist is None:
563 toklist = []
564 if isinstance(toklist, list):
565 self.__toklist = toklist[:]
566 elif isinstance(toklist, _generatorType):
567 self.__toklist = list(toklist)
568 else:
569 self.__toklist = [toklist]
570 self.__tokdict = dict()
571
572 if name is not None and name:
573 if not modal:
574 self.__accumNames[name] = 0
575 if isinstance(name, int):
576 name = _ustr(name)
577 self.__name = name
578 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None, '', [])):
579 if isinstance(toklist, basestring):
580 toklist = [toklist]
581 if asList:
582 if isinstance(toklist, ParseResults):
583 self[name] = _ParseResultsWithOffset(ParseResults(toklist.__toklist), 0)
584 else:
585 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]), 0)
586 self[name].__name = name
587 else:
588 try:
589 self[name] = toklist[0]
590 except (KeyError, TypeError, IndexError):
591 self[name] = toklist
592
594 if isinstance(i, (int, slice)):
595 return self.__toklist[i]
596 else:
597 if i not in self.__accumNames:
598 return self.__tokdict[i][-1][0]
599 else:
600 return ParseResults([v[0] for v in self.__tokdict[i]])
601
614
616 if isinstance(i, (int, slice)):
617 mylen = len(self.__toklist)
618 del self.__toklist[i]
619
620
621 if isinstance(i, int):
622 if i < 0:
623 i += mylen
624 i = slice(i, i + 1)
625
626 removed = list(range(*i.indices(mylen)))
627 removed.reverse()
628
629 for name, occurrences in self.__tokdict.items():
630 for j in removed:
631 for k, (value, position) in enumerate(occurrences):
632 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
633 else:
634 del self.__tokdict[i]
635
637 return k in self.__tokdict
638
640 return len(self.__toklist)
641
643 return (not not self.__toklist)
644 __nonzero__ = __bool__
645
647 return iter(self.__toklist)
648
650 return iter(self.__toklist[::-1])
651
653 if hasattr(self.__tokdict, "iterkeys"):
654 return self.__tokdict.iterkeys()
655 else:
656 return iter(self.__tokdict)
657
659 return (self[k] for k in self._iterkeys())
660
662 return ((k, self[k]) for k in self._iterkeys())
663
664 if PY_3:
665 keys = _iterkeys
666 """Returns an iterator of all named result keys."""
667
668 values = _itervalues
669 """Returns an iterator of all named result values."""
670
671 items = _iteritems
672 """Returns an iterator of all named result key-value tuples."""
673
674 else:
675 iterkeys = _iterkeys
676 """Returns an iterator of all named result keys (Python 2.x only)."""
677
678 itervalues = _itervalues
679 """Returns an iterator of all named result values (Python 2.x only)."""
680
681 iteritems = _iteritems
682 """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
683
685 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
686 return list(self.iterkeys())
687
689 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
690 return list(self.itervalues())
691
693 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
694 return list(self.iteritems())
695
697 """Since keys() returns an iterator, this method is helpful in bypassing
698 code that looks for the existence of any defined results names."""
699 return bool(self.__tokdict)
700
701 - def pop(self, *args, **kwargs):
702 """
703 Removes and returns item at specified index (default= ``last``).
704 Supports both ``list`` and ``dict`` semantics for ``pop()``. If
705 passed no argument or an integer argument, it will use ``list``
706 semantics and pop tokens from the list of parsed tokens. If passed
707 a non-integer argument (most likely a string), it will use ``dict``
708 semantics and pop the corresponding value from any defined results
709 names. A second default return value argument is supported, just as in
710 ``dict.pop()``.
711
712 Example::
713
714 def remove_first(tokens):
715 tokens.pop(0)
716 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
717 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
718
719 label = Word(alphas)
720 patt = label("LABEL") + OneOrMore(Word(nums))
721 print(patt.parseString("AAB 123 321").dump())
722
723 # Use pop() in a parse action to remove named result (note that corresponding value is not
724 # removed from list form of results)
725 def remove_LABEL(tokens):
726 tokens.pop("LABEL")
727 return tokens
728 patt.addParseAction(remove_LABEL)
729 print(patt.parseString("AAB 123 321").dump())
730
731 prints::
732
733 ['AAB', '123', '321']
734 - LABEL: AAB
735
736 ['AAB', '123', '321']
737 """
738 if not args:
739 args = [-1]
740 for k, v in kwargs.items():
741 if k == 'default':
742 args = (args[0], v)
743 else:
744 raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
745 if (isinstance(args[0], int)
746 or len(args) == 1
747 or args[0] in self):
748 index = args[0]
749 ret = self[index]
750 del self[index]
751 return ret
752 else:
753 defaultvalue = args[1]
754 return defaultvalue
755
756 - def get(self, key, defaultValue=None):
757 """
758 Returns named result matching the given key, or if there is no
759 such name, then returns the given ``defaultValue`` or ``None`` if no
760 ``defaultValue`` is specified.
761
762 Similar to ``dict.get()``.
763
764 Example::
765
766 integer = Word(nums)
767 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
768
769 result = date_str.parseString("1999/12/31")
770 print(result.get("year")) # -> '1999'
771 print(result.get("hour", "not specified")) # -> 'not specified'
772 print(result.get("hour")) # -> None
773 """
774 if key in self:
775 return self[key]
776 else:
777 return defaultValue
778
779 - def insert(self, index, insStr):
780 """
781 Inserts new element at location index in the list of parsed tokens.
782
783 Similar to ``list.insert()``.
784
785 Example::
786
787 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
788
789 # use a parse action to insert the parse location in the front of the parsed results
790 def insert_locn(locn, tokens):
791 tokens.insert(0, locn)
792 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
793 """
794 self.__toklist.insert(index, insStr)
795
796 for name, occurrences in self.__tokdict.items():
797 for k, (value, position) in enumerate(occurrences):
798 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
799
801 """
802 Add single element to end of ParseResults list of elements.
803
804 Example::
805
806 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
807
808 # use a parse action to compute the sum of the parsed integers, and add it to the end
809 def append_sum(tokens):
810 tokens.append(sum(map(int, tokens)))
811 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
812 """
813 self.__toklist.append(item)
814
816 """
817 Add sequence of elements to end of ParseResults list of elements.
818
819 Example::
820
821 patt = OneOrMore(Word(alphas))
822
823 # use a parse action to append the reverse of the matched strings, to make a palindrome
824 def make_palindrome(tokens):
825 tokens.extend(reversed([t[::-1] for t in tokens]))
826 return ''.join(tokens)
827 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
828 """
829 if isinstance(itemseq, ParseResults):
830 self.__iadd__(itemseq)
831 else:
832 self.__toklist.extend(itemseq)
833
835 """
836 Clear all elements and results names.
837 """
838 del self.__toklist[:]
839 self.__tokdict.clear()
840
842 try:
843 return self[name]
844 except KeyError:
845 return ""
846
848 ret = self.copy()
849 ret += other
850 return ret
851
853 if other.__tokdict:
854 offset = len(self.__toklist)
855 addoffset = lambda a: offset if a < 0 else a + offset
856 otheritems = other.__tokdict.items()
857 otherdictitems = [(k, _ParseResultsWithOffset(v[0], addoffset(v[1])))
858 for k, vlist in otheritems for v in vlist]
859 for k, v in otherdictitems:
860 self[k] = v
861 if isinstance(v[0], ParseResults):
862 v[0].__parent = wkref(self)
863
864 self.__toklist += other.__toklist
865 self.__accumNames.update(other.__accumNames)
866 return self
867
869 if isinstance(other, int) and other == 0:
870
871 return self.copy()
872 else:
873
874 return other + self
875
877 return "(%s, %s)" % (repr(self.__toklist), repr(self.__tokdict))
878
880 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
881
892
894 """
895 Returns the parse results as a nested list of matching tokens, all converted to strings.
896
897 Example::
898
899 patt = OneOrMore(Word(alphas))
900 result = patt.parseString("sldkj lsdkj sldkj")
901 # even though the result prints in string-like form, it is actually a pyparsing ParseResults
902 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
903
904 # Use asList() to create an actual list
905 result_list = result.asList()
906 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
907 """
908 return [res.asList() if isinstance(res, ParseResults) else res for res in self.__toklist]
909
911 """
912 Returns the named parse results as a nested dictionary.
913
914 Example::
915
916 integer = Word(nums)
917 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
918
919 result = date_str.parseString('12/31/1999')
920 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
921
922 result_dict = result.asDict()
923 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
924
925 # even though a ParseResults supports dict-like access, sometime you just need to have a dict
926 import json
927 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
928 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
929 """
930 if PY_3:
931 item_fn = self.items
932 else:
933 item_fn = self.iteritems
934
935 def toItem(obj):
936 if isinstance(obj, ParseResults):
937 if obj.haskeys():
938 return obj.asDict()
939 else:
940 return [toItem(v) for v in obj]
941 else:
942 return obj
943
944 return dict((k, toItem(v)) for k, v in item_fn())
945
947 """
948 Returns a new copy of a :class:`ParseResults` object.
949 """
950 ret = ParseResults(self.__toklist)
951 ret.__tokdict = dict(self.__tokdict.items())
952 ret.__parent = self.__parent
953 ret.__accumNames.update(self.__accumNames)
954 ret.__name = self.__name
955 return ret
956
957 - def asXML(self, doctag=None, namedItemsOnly=False, indent="", formatted=True):
958 """
959 (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
960 """
961 nl = "\n"
962 out = []
963 namedItems = dict((v[1], k) for (k, vlist) in self.__tokdict.items()
964 for v in vlist)
965 nextLevelIndent = indent + " "
966
967
968 if not formatted:
969 indent = ""
970 nextLevelIndent = ""
971 nl = ""
972
973 selfTag = None
974 if doctag is not None:
975 selfTag = doctag
976 else:
977 if self.__name:
978 selfTag = self.__name
979
980 if not selfTag:
981 if namedItemsOnly:
982 return ""
983 else:
984 selfTag = "ITEM"
985
986 out += [nl, indent, "<", selfTag, ">"]
987
988 for i, res in enumerate(self.__toklist):
989 if isinstance(res, ParseResults):
990 if i in namedItems:
991 out += [res.asXML(namedItems[i],
992 namedItemsOnly and doctag is None,
993 nextLevelIndent,
994 formatted)]
995 else:
996 out += [res.asXML(None,
997 namedItemsOnly and doctag is None,
998 nextLevelIndent,
999 formatted)]
1000 else:
1001
1002 resTag = None
1003 if i in namedItems:
1004 resTag = namedItems[i]
1005 if not resTag:
1006 if namedItemsOnly:
1007 continue
1008 else:
1009 resTag = "ITEM"
1010 xmlBodyText = _xml_escape(_ustr(res))
1011 out += [nl, nextLevelIndent, "<", resTag, ">",
1012 xmlBodyText,
1013 "</", resTag, ">"]
1014
1015 out += [nl, indent, "</", selfTag, ">"]
1016 return "".join(out)
1017
1019 for k, vlist in self.__tokdict.items():
1020 for v, loc in vlist:
1021 if sub is v:
1022 return k
1023 return None
1024
1026 r"""
1027 Returns the results name for this token expression. Useful when several
1028 different expressions might match at a particular location.
1029
1030 Example::
1031
1032 integer = Word(nums)
1033 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
1034 house_number_expr = Suppress('#') + Word(nums, alphanums)
1035 user_data = (Group(house_number_expr)("house_number")
1036 | Group(ssn_expr)("ssn")
1037 | Group(integer)("age"))
1038 user_info = OneOrMore(user_data)
1039
1040 result = user_info.parseString("22 111-22-3333 #221B")
1041 for item in result:
1042 print(item.getName(), ':', item[0])
1043
1044 prints::
1045
1046 age : 22
1047 ssn : 111-22-3333
1048 house_number : 221B
1049 """
1050 if self.__name:
1051 return self.__name
1052 elif self.__parent:
1053 par = self.__parent()
1054 if par:
1055 return par.__lookup(self)
1056 else:
1057 return None
1058 elif (len(self) == 1
1059 and len(self.__tokdict) == 1
1060 and next(iter(self.__tokdict.values()))[0][1] in (0, -1)):
1061 return next(iter(self.__tokdict.keys()))
1062 else:
1063 return None
1064
1065 - def dump(self, indent='', full=True, include_list=True, _depth=0):
1066 """
1067 Diagnostic method for listing out the contents of
1068 a :class:`ParseResults`. Accepts an optional ``indent`` argument so
1069 that this string can be embedded in a nested display of other data.
1070
1071 Example::
1072
1073 integer = Word(nums)
1074 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1075
1076 result = date_str.parseString('12/31/1999')
1077 print(result.dump())
1078
1079 prints::
1080
1081 ['12', '/', '31', '/', '1999']
1082 - day: 1999
1083 - month: 31
1084 - year: 12
1085 """
1086 out = []
1087 NL = '\n'
1088 if include_list:
1089 out.append(indent + _ustr(self.asList()))
1090 else:
1091 out.append('')
1092
1093 if full:
1094 if self.haskeys():
1095 items = sorted((str(k), v) for k, v in self.items())
1096 for k, v in items:
1097 if out:
1098 out.append(NL)
1099 out.append("%s%s- %s: " % (indent, (' ' * _depth), k))
1100 if isinstance(v, ParseResults):
1101 if v:
1102 out.append(v.dump(indent=indent, full=full, include_list=include_list, _depth=_depth + 1))
1103 else:
1104 out.append(_ustr(v))
1105 else:
1106 out.append(repr(v))
1107 elif any(isinstance(vv, ParseResults) for vv in self):
1108 v = self
1109 for i, vv in enumerate(v):
1110 if isinstance(vv, ParseResults):
1111 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,
1112 (' ' * (_depth)),
1113 i,
1114 indent,
1115 (' ' * (_depth + 1)),
1116 vv.dump(indent=indent,
1117 full=full,
1118 include_list=include_list,
1119 _depth=_depth + 1)))
1120 else:
1121 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,
1122 (' ' * (_depth)),
1123 i,
1124 indent,
1125 (' ' * (_depth + 1)),
1126 _ustr(vv)))
1127
1128 return "".join(out)
1129
1130 - def pprint(self, *args, **kwargs):
1131 """
1132 Pretty-printer for parsed results as a list, using the
1133 `pprint <https://docs.python.org/3/library/pprint.html>`_ module.
1134 Accepts additional positional or keyword args as defined for
1135 `pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ .
1136
1137 Example::
1138
1139 ident = Word(alphas, alphanums)
1140 num = Word(nums)
1141 func = Forward()
1142 term = ident | num | Group('(' + func + ')')
1143 func <<= ident + Group(Optional(delimitedList(term)))
1144 result = func.parseString("fna a,b,(fnb c,d,200),100")
1145 result.pprint(width=40)
1146
1147 prints::
1148
1149 ['fna',
1150 ['a',
1151 'b',
1152 ['(', 'fnb', ['c', 'd', '200'], ')'],
1153 '100']]
1154 """
1155 pprint.pprint(self.asList(), *args, **kwargs)
1156
1157
1159 return (self.__toklist,
1160 (self.__tokdict.copy(),
1161 self.__parent is not None and self.__parent() or None,
1162 self.__accumNames,
1163 self.__name))
1164
1166 self.__toklist = state[0]
1167 self.__tokdict, par, inAccumNames, self.__name = state[1]
1168 self.__accumNames = {}
1169 self.__accumNames.update(inAccumNames)
1170 if par is not None:
1171 self.__parent = wkref(par)
1172 else:
1173 self.__parent = None
1174
1176 return self.__toklist, self.__name, self.__asList, self.__modal
1177
1179 return dir(type(self)) + list(self.keys())
1180
1181 @classmethod
1183 """
1184 Helper classmethod to construct a ParseResults from a dict, preserving the
1185 name-value relations as results names. If an optional 'name' argument is
1186 given, a nested ParseResults will be returned
1187 """
1188 def is_iterable(obj):
1189 try:
1190 iter(obj)
1191 except Exception:
1192 return False
1193 else:
1194 if PY_3:
1195 return not isinstance(obj, (str, bytes))
1196 else:
1197 return not isinstance(obj, basestring)
1198
1199 ret = cls([])
1200 for k, v in other.items():
1201 if isinstance(v, Mapping):
1202 ret += cls.from_dict(v, name=k)
1203 else:
1204 ret += cls([v], name=k, asList=is_iterable(v))
1205 if name is not None:
1206 ret = cls([ret], name=name)
1207 return ret
1208
1209 MutableMapping.register(ParseResults)
1210
1211 -def col (loc, strg):
1212 """Returns current column within a string, counting newlines as line separators.
1213 The first column is number 1.
1214
1215 Note: the default parsing behavior is to expand tabs in the input string
1216 before starting the parsing process. See
1217 :class:`ParserElement.parseString` for more
1218 information on parsing strings containing ``<TAB>`` s, and suggested
1219 methods to maintain a consistent view of the parsed string, the parse
1220 location, and line and column positions within the parsed string.
1221 """
1222 s = strg
1223 return 1 if 0 < loc < len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
1224
1226 """Returns current line number within a string, counting newlines as line separators.
1227 The first line is number 1.
1228
1229 Note - the default parsing behavior is to expand tabs in the input string
1230 before starting the parsing process. See :class:`ParserElement.parseString`
1231 for more information on parsing strings containing ``<TAB>`` s, and
1232 suggested methods to maintain a consistent view of the parsed string, the
1233 parse location, and line and column positions within the parsed string.
1234 """
1235 return strg.count("\n", 0, loc) + 1
1236
1237 -def line(loc, strg):
1238 """Returns the line of text containing loc within a string, counting newlines as line separators.
1239 """
1240 lastCR = strg.rfind("\n", 0, loc)
1241 nextCR = strg.find("\n", loc)
1242 if nextCR >= 0:
1243 return strg[lastCR + 1:nextCR]
1244 else:
1245 return strg[lastCR + 1:]
1246
1248 print(("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % (lineno(loc, instring), col(loc, instring))))
1249
1252
1254 print("Exception raised:" + _ustr(exc))
1255
1257 """'Do-nothing' debug action, to suppress debugging output during parsing."""
1258 pass
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282 'decorator to trim function calls to match the arity of the target'
1284 if func in singleArgBuiltins:
1285 return lambda s, l, t: func(t)
1286 limit = [0]
1287 foundArity = [False]
1288
1289
1290 if system_version[:2] >= (3, 5):
1291 def extract_stack(limit=0):
1292
1293 offset = -3 if system_version == (3, 5, 0) else -2
1294 frame_summary = traceback.extract_stack(limit=-offset + limit - 1)[offset]
1295 return [frame_summary[:2]]
1296 def extract_tb(tb, limit=0):
1297 frames = traceback.extract_tb(tb, limit=limit)
1298 frame_summary = frames[-1]
1299 return [frame_summary[:2]]
1300 else:
1301 extract_stack = traceback.extract_stack
1302 extract_tb = traceback.extract_tb
1303
1304
1305
1306
1307 LINE_DIFF = 6
1308
1309
1310 this_line = extract_stack(limit=2)[-1]
1311 pa_call_line_synth = (this_line[0], this_line[1] + LINE_DIFF)
1312
1313 def wrapper(*args):
1314 while 1:
1315 try:
1316 ret = func(*args[limit[0]:])
1317 foundArity[0] = True
1318 return ret
1319 except TypeError:
1320
1321 if foundArity[0]:
1322 raise
1323 else:
1324 try:
1325 tb = sys.exc_info()[-1]
1326 if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
1327 raise
1328 finally:
1329 try:
1330 del tb
1331 except NameError:
1332 pass
1333
1334 if limit[0] <= maxargs:
1335 limit[0] += 1
1336 continue
1337 raise
1338
1339
1340 func_name = "<parse action>"
1341 try:
1342 func_name = getattr(func, '__name__',
1343 getattr(func, '__class__').__name__)
1344 except Exception:
1345 func_name = str(func)
1346 wrapper.__name__ = func_name
1347
1348 return wrapper
1349
1352 """Abstract base level parser element class."""
1353 DEFAULT_WHITE_CHARS = " \n\t\r"
1354 verbose_stacktrace = False
1355
1356 @staticmethod
1358 r"""
1359 Overrides the default whitespace chars
1360
1361 Example::
1362
1363 # default whitespace chars are space, <TAB> and newline
1364 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
1365
1366 # change to just treat newline as significant
1367 ParserElement.setDefaultWhitespaceChars(" \t")
1368 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']
1369 """
1370 ParserElement.DEFAULT_WHITE_CHARS = chars
1371
1372 @staticmethod
1374 """
1375 Set class to be used for inclusion of string literals into a parser.
1376
1377 Example::
1378
1379 # default literal class used is Literal
1380 integer = Word(nums)
1381 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1382
1383 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
1384
1385
1386 # change to Suppress
1387 ParserElement.inlineLiteralsUsing(Suppress)
1388 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1389
1390 date_str.parseString("1999/12/31") # -> ['1999', '12', '31']
1391 """
1392 ParserElement._literalStringClass = cls
1393
1395 self.parseAction = list()
1396 self.failAction = None
1397
1398 self.strRepr = None
1399 self.resultsName = None
1400 self.saveAsList = savelist
1401 self.skipWhitespace = True
1402 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
1403 self.copyDefaultWhiteChars = True
1404 self.mayReturnEmpty = False
1405 self.keepTabs = False
1406 self.ignoreExprs = list()
1407 self.debug = False
1408 self.streamlined = False
1409 self.mayIndexError = True
1410 self.errmsg = ""
1411 self.modalResults = True
1412 self.debugActions = (None, None, None)
1413 self.re = None
1414 self.callPreparse = True
1415 self.callDuringTry = False
1416
1418 """
1419 Make a copy of this :class:`ParserElement`. Useful for defining
1420 different parse actions for the same parsing pattern, using copies of
1421 the original parse element.
1422
1423 Example::
1424
1425 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1426 integerK = integer.copy().addParseAction(lambda toks: toks[0] * 1024) + Suppress("K")
1427 integerM = integer.copy().addParseAction(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
1428
1429 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
1430
1431 prints::
1432
1433 [5120, 100, 655360, 268435456]
1434
1435 Equivalent form of ``expr.copy()`` is just ``expr()``::
1436
1437 integerM = integer().addParseAction(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
1438 """
1439 cpy = copy.copy(self)
1440 cpy.parseAction = self.parseAction[:]
1441 cpy.ignoreExprs = self.ignoreExprs[:]
1442 if self.copyDefaultWhiteChars:
1443 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1444 return cpy
1445
1447 """
1448 Define name for this expression, makes debugging and exception messages clearer.
1449
1450 Example::
1451
1452 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
1453 Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
1454 """
1455 self.name = name
1456 self.errmsg = "Expected " + self.name
1457 if __diag__.enable_debug_on_named_expressions:
1458 self.setDebug()
1459 return self
1460
1462 """
1463 Define name for referencing matching tokens as a nested attribute
1464 of the returned parse results.
1465 NOTE: this returns a *copy* of the original :class:`ParserElement` object;
1466 this is so that the client can define a basic element, such as an
1467 integer, and reference it in multiple places with different names.
1468
1469 You can also set results names using the abbreviated syntax,
1470 ``expr("name")`` in place of ``expr.setResultsName("name")``
1471 - see :class:`__call__`.
1472
1473 Example::
1474
1475 date_str = (integer.setResultsName("year") + '/'
1476 + integer.setResultsName("month") + '/'
1477 + integer.setResultsName("day"))
1478
1479 # equivalent form:
1480 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1481 """
1482 return self._setResultsName(name, listAllMatches)
1483
1485 newself = self.copy()
1486 if name.endswith("*"):
1487 name = name[:-1]
1488 listAllMatches = True
1489 newself.resultsName = name
1490 newself.modalResults = not listAllMatches
1491 return newself
1492
1494 """Method to invoke the Python pdb debugger when this element is
1495 about to be parsed. Set ``breakFlag`` to True to enable, False to
1496 disable.
1497 """
1498 if breakFlag:
1499 _parseMethod = self._parse
1500 def breaker(instring, loc, doActions=True, callPreParse=True):
1501 import pdb
1502
1503 pdb.set_trace()
1504 return _parseMethod(instring, loc, doActions, callPreParse)
1505 breaker._originalParseMethod = _parseMethod
1506 self._parse = breaker
1507 else:
1508 if hasattr(self._parse, "_originalParseMethod"):
1509 self._parse = self._parse._originalParseMethod
1510 return self
1511
1513 """
1514 Define one or more actions to perform when successfully matching parse element definition.
1515 Parse action fn is a callable method with 0-3 arguments, called as ``fn(s, loc, toks)`` ,
1516 ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where:
1517
1518 - s = the original string being parsed (see note below)
1519 - loc = the location of the matching substring
1520 - toks = a list of the matched tokens, packaged as a :class:`ParseResults` object
1521
1522 If the functions in fns modify the tokens, they can return them as the return
1523 value from fn, and the modified list of tokens will replace the original.
1524 Otherwise, fn does not need to return any value.
1525
1526 If None is passed as the parse action, all previously added parse actions for this
1527 expression are cleared.
1528
1529 Optional keyword arguments:
1530 - callDuringTry = (default= ``False``) indicate if parse action should be run during lookaheads and alternate testing
1531
1532 Note: the default parsing behavior is to expand tabs in the input string
1533 before starting the parsing process. See :class:`parseString for more
1534 information on parsing strings containing ``<TAB>`` s, and suggested
1535 methods to maintain a consistent view of the parsed string, the parse
1536 location, and line and column positions within the parsed string.
1537
1538 Example::
1539
1540 integer = Word(nums)
1541 date_str = integer + '/' + integer + '/' + integer
1542
1543 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
1544
1545 # use parse action to convert to ints at parse time
1546 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1547 date_str = integer + '/' + integer + '/' + integer
1548
1549 # note that integer fields are now ints, not strings
1550 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]
1551 """
1552 if list(fns) == [None,]:
1553 self.parseAction = []
1554 else:
1555 if not all(callable(fn) for fn in fns):
1556 raise TypeError("parse actions must be callable")
1557 self.parseAction = list(map(_trim_arity, list(fns)))
1558 self.callDuringTry = kwargs.get("callDuringTry", False)
1559 return self
1560
1562 """
1563 Add one or more parse actions to expression's list of parse actions. See :class:`setParseAction`.
1564
1565 See examples in :class:`copy`.
1566 """
1567 self.parseAction += list(map(_trim_arity, list(fns)))
1568 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1569 return self
1570
1572 """Add a boolean predicate function to expression's list of parse actions. See
1573 :class:`setParseAction` for function call signatures. Unlike ``setParseAction``,
1574 functions passed to ``addCondition`` need to return boolean success/fail of the condition.
1575
1576 Optional keyword arguments:
1577 - message = define a custom message to be used in the raised exception
1578 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
1579
1580 Example::
1581
1582 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1583 year_int = integer.copy()
1584 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
1585 date_str = year_int + '/' + integer + '/' + integer
1586
1587 result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
1588 """
1589 for fn in fns:
1590 self.parseAction.append(conditionAsParseAction(fn, message=kwargs.get('message'),
1591 fatal=kwargs.get('fatal', False)))
1592
1593 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1594 return self
1595
1597 """Define action to perform if parsing fails at this expression.
1598 Fail acton fn is a callable function that takes the arguments
1599 ``fn(s, loc, expr, err)`` where:
1600 - s = string being parsed
1601 - loc = location where expression match was attempted and failed
1602 - expr = the parse expression that failed
1603 - err = the exception thrown
1604 The function returns no value. It may throw :class:`ParseFatalException`
1605 if it is desired to stop parsing immediately."""
1606 self.failAction = fn
1607 return self
1608
1610 exprsFound = True
1611 while exprsFound:
1612 exprsFound = False
1613 for e in self.ignoreExprs:
1614 try:
1615 while 1:
1616 loc, dummy = e._parse(instring, loc)
1617 exprsFound = True
1618 except ParseException:
1619 pass
1620 return loc
1621
1623 if self.ignoreExprs:
1624 loc = self._skipIgnorables(instring, loc)
1625
1626 if self.skipWhitespace:
1627 wt = self.whiteChars
1628 instrlen = len(instring)
1629 while loc < instrlen and instring[loc] in wt:
1630 loc += 1
1631
1632 return loc
1633
1634 - def parseImpl(self, instring, loc, doActions=True):
1636
1637 - def postParse(self, instring, loc, tokenlist):
1639
1640
1641 - def _parseNoCache(self, instring, loc, doActions=True, callPreParse=True):
1642 TRY, MATCH, FAIL = 0, 1, 2
1643 debugging = (self.debug)
1644
1645 if debugging or self.failAction:
1646
1647 if self.debugActions[TRY]:
1648 self.debugActions[TRY](instring, loc, self)
1649 try:
1650 if callPreParse and self.callPreparse:
1651 preloc = self.preParse(instring, loc)
1652 else:
1653 preloc = loc
1654 tokensStart = preloc
1655 if self.mayIndexError or preloc >= len(instring):
1656 try:
1657 loc, tokens = self.parseImpl(instring, preloc, doActions)
1658 except IndexError:
1659 raise ParseException(instring, len(instring), self.errmsg, self)
1660 else:
1661 loc, tokens = self.parseImpl(instring, preloc, doActions)
1662 except Exception as err:
1663
1664 if self.debugActions[FAIL]:
1665 self.debugActions[FAIL](instring, tokensStart, self, err)
1666 if self.failAction:
1667 self.failAction(instring, tokensStart, self, err)
1668 raise
1669 else:
1670 if callPreParse and self.callPreparse:
1671 preloc = self.preParse(instring, loc)
1672 else:
1673 preloc = loc
1674 tokensStart = preloc
1675 if self.mayIndexError or preloc >= len(instring):
1676 try:
1677 loc, tokens = self.parseImpl(instring, preloc, doActions)
1678 except IndexError:
1679 raise ParseException(instring, len(instring), self.errmsg, self)
1680 else:
1681 loc, tokens = self.parseImpl(instring, preloc, doActions)
1682
1683 tokens = self.postParse(instring, loc, tokens)
1684
1685 retTokens = ParseResults(tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults)
1686 if self.parseAction and (doActions or self.callDuringTry):
1687 if debugging:
1688 try:
1689 for fn in self.parseAction:
1690 try:
1691 tokens = fn(instring, tokensStart, retTokens)
1692 except IndexError as parse_action_exc:
1693 exc = ParseException("exception raised in parse action")
1694 exc.__cause__ = parse_action_exc
1695 raise exc
1696
1697 if tokens is not None and tokens is not retTokens:
1698 retTokens = ParseResults(tokens,
1699 self.resultsName,
1700 asList=self.saveAsList and isinstance(tokens, (ParseResults, list)),
1701 modal=self.modalResults)
1702 except Exception as err:
1703
1704 if self.debugActions[FAIL]:
1705 self.debugActions[FAIL](instring, tokensStart, self, err)
1706 raise
1707 else:
1708 for fn in self.parseAction:
1709 try:
1710 tokens = fn(instring, tokensStart, retTokens)
1711 except IndexError as parse_action_exc:
1712 exc = ParseException("exception raised in parse action")
1713 exc.__cause__ = parse_action_exc
1714 raise exc
1715
1716 if tokens is not None and tokens is not retTokens:
1717 retTokens = ParseResults(tokens,
1718 self.resultsName,
1719 asList=self.saveAsList and isinstance(tokens, (ParseResults, list)),
1720 modal=self.modalResults)
1721 if debugging:
1722
1723 if self.debugActions[MATCH]:
1724 self.debugActions[MATCH](instring, tokensStart, loc, self, retTokens)
1725
1726 return loc, retTokens
1727
1733
1735 try:
1736 self.tryParse(instring, loc)
1737 except (ParseException, IndexError):
1738 return False
1739 else:
1740 return True
1741
1744 cache = {}
1745 self.not_in_cache = not_in_cache = object()
1746
1747 def get(self, key):
1748 return cache.get(key, not_in_cache)
1749
1750 def set(self, key, value):
1751 cache[key] = value
1752
1753 def clear(self):
1754 cache.clear()
1755
1756 def cache_len(self):
1757 return len(cache)
1758
1759 self.get = types.MethodType(get, self)
1760 self.set = types.MethodType(set, self)
1761 self.clear = types.MethodType(clear, self)
1762 self.__len__ = types.MethodType(cache_len, self)
1763
1764 if _OrderedDict is not None:
1767 self.not_in_cache = not_in_cache = object()
1768
1769 cache = _OrderedDict()
1770
1771 def get(self, key):
1772 return cache.get(key, not_in_cache)
1773
1774 def set(self, key, value):
1775 cache[key] = value
1776 while len(cache) > size:
1777 try:
1778 cache.popitem(False)
1779 except KeyError:
1780 pass
1781
1782 def clear(self):
1783 cache.clear()
1784
1785 def cache_len(self):
1786 return len(cache)
1787
1788 self.get = types.MethodType(get, self)
1789 self.set = types.MethodType(set, self)
1790 self.clear = types.MethodType(clear, self)
1791 self.__len__ = types.MethodType(cache_len, self)
1792
1793 else:
1796 self.not_in_cache = not_in_cache = object()
1797
1798 cache = {}
1799 key_fifo = collections.deque([], size)
1800
1801 def get(self, key):
1802 return cache.get(key, not_in_cache)
1803
1804 def set(self, key, value):
1805 cache[key] = value
1806 while len(key_fifo) > size:
1807 cache.pop(key_fifo.popleft(), None)
1808 key_fifo.append(key)
1809
1810 def clear(self):
1811 cache.clear()
1812 key_fifo.clear()
1813
1814 def cache_len(self):
1815 return len(cache)
1816
1817 self.get = types.MethodType(get, self)
1818 self.set = types.MethodType(set, self)
1819 self.clear = types.MethodType(clear, self)
1820 self.__len__ = types.MethodType(cache_len, self)
1821
1822
1823 packrat_cache = {}
1824 packrat_cache_lock = RLock()
1825 packrat_cache_stats = [0, 0]
1826
1827
1828
1829 - def _parseCache(self, instring, loc, doActions=True, callPreParse=True):
1830 HIT, MISS = 0, 1
1831 lookup = (self, instring, loc, callPreParse, doActions)
1832 with ParserElement.packrat_cache_lock:
1833 cache = ParserElement.packrat_cache
1834 value = cache.get(lookup)
1835 if value is cache.not_in_cache:
1836 ParserElement.packrat_cache_stats[MISS] += 1
1837 try:
1838 value = self._parseNoCache(instring, loc, doActions, callPreParse)
1839 except ParseBaseException as pe:
1840
1841 cache.set(lookup, pe.__class__(*pe.args))
1842 raise
1843 else:
1844 cache.set(lookup, (value[0], value[1].copy()))
1845 return value
1846 else:
1847 ParserElement.packrat_cache_stats[HIT] += 1
1848 if isinstance(value, Exception):
1849 raise value
1850 return value[0], value[1].copy()
1851
1852 _parse = _parseNoCache
1853
1854 @staticmethod
1858
1859 _packratEnabled = False
1860 @staticmethod
1862 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1863 Repeated parse attempts at the same string location (which happens
1864 often in many complex grammars) can immediately return a cached value,
1865 instead of re-executing parsing/validating code. Memoizing is done of
1866 both valid results and parsing exceptions.
1867
1868 Parameters:
1869
1870 - cache_size_limit - (default= ``128``) - if an integer value is provided
1871 will limit the size of the packrat cache; if None is passed, then
1872 the cache size will be unbounded; if 0 is passed, the cache will
1873 be effectively disabled.
1874
1875 This speedup may break existing programs that use parse actions that
1876 have side-effects. For this reason, packrat parsing is disabled when
1877 you first import pyparsing. To activate the packrat feature, your
1878 program must call the class method :class:`ParserElement.enablePackrat`.
1879 For best results, call ``enablePackrat()`` immediately after
1880 importing pyparsing.
1881
1882 Example::
1883
1884 import pyparsing
1885 pyparsing.ParserElement.enablePackrat()
1886 """
1887 if not ParserElement._packratEnabled:
1888 ParserElement._packratEnabled = True
1889 if cache_size_limit is None:
1890 ParserElement.packrat_cache = ParserElement._UnboundedCache()
1891 else:
1892 ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
1893 ParserElement._parse = ParserElement._parseCache
1894
1896 """
1897 Execute the parse expression with the given string.
1898 This is the main interface to the client code, once the complete
1899 expression has been built.
1900
1901 Returns the parsed data as a :class:`ParseResults` object, which may be
1902 accessed as a list, or as a dict or object with attributes if the given parser
1903 includes results names.
1904
1905 If you want the grammar to require that the entire input string be
1906 successfully parsed, then set ``parseAll`` to True (equivalent to ending
1907 the grammar with ``StringEnd()``).
1908
1909 Note: ``parseString`` implicitly calls ``expandtabs()`` on the input string,
1910 in order to report proper column numbers in parse actions.
1911 If the input string contains tabs and
1912 the grammar uses parse actions that use the ``loc`` argument to index into the
1913 string being parsed, you can ensure you have a consistent view of the input
1914 string by:
1915
1916 - calling ``parseWithTabs`` on your grammar before calling ``parseString``
1917 (see :class:`parseWithTabs`)
1918 - define your parse action using the full ``(s, loc, toks)`` signature, and
1919 reference the input string using the parse action's ``s`` argument
1920 - explictly expand the tabs in your input string before calling
1921 ``parseString``
1922
1923 Example::
1924
1925 Word('a').parseString('aaaaabaaa') # -> ['aaaaa']
1926 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text
1927 """
1928 ParserElement.resetCache()
1929 if not self.streamlined:
1930 self.streamline()
1931
1932 for e in self.ignoreExprs:
1933 e.streamline()
1934 if not self.keepTabs:
1935 instring = instring.expandtabs()
1936 try:
1937 loc, tokens = self._parse(instring, 0)
1938 if parseAll:
1939 loc = self.preParse(instring, loc)
1940 se = Empty() + StringEnd()
1941 se._parse(instring, loc)
1942 except ParseBaseException as exc:
1943 if ParserElement.verbose_stacktrace:
1944 raise
1945 else:
1946
1947 raise exc
1948 else:
1949 return tokens
1950
1952 """
1953 Scan the input string for expression matches. Each match will return the
1954 matching tokens, start location, and end location. May be called with optional
1955 ``maxMatches`` argument, to clip scanning after 'n' matches are found. If
1956 ``overlap`` is specified, then overlapping matches will be reported.
1957
1958 Note that the start and end locations are reported relative to the string
1959 being parsed. See :class:`parseString` for more information on parsing
1960 strings with embedded tabs.
1961
1962 Example::
1963
1964 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1965 print(source)
1966 for tokens, start, end in Word(alphas).scanString(source):
1967 print(' '*start + '^'*(end-start))
1968 print(' '*start + tokens[0])
1969
1970 prints::
1971
1972 sldjf123lsdjjkf345sldkjf879lkjsfd987
1973 ^^^^^
1974 sldjf
1975 ^^^^^^^
1976 lsdjjkf
1977 ^^^^^^
1978 sldkjf
1979 ^^^^^^
1980 lkjsfd
1981 """
1982 if not self.streamlined:
1983 self.streamline()
1984 for e in self.ignoreExprs:
1985 e.streamline()
1986
1987 if not self.keepTabs:
1988 instring = _ustr(instring).expandtabs()
1989 instrlen = len(instring)
1990 loc = 0
1991 preparseFn = self.preParse
1992 parseFn = self._parse
1993 ParserElement.resetCache()
1994 matches = 0
1995 try:
1996 while loc <= instrlen and matches < maxMatches:
1997 try:
1998 preloc = preparseFn(instring, loc)
1999 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)
2000 except ParseException:
2001 loc = preloc + 1
2002 else:
2003 if nextLoc > loc:
2004 matches += 1
2005 yield tokens, preloc, nextLoc
2006 if overlap:
2007 nextloc = preparseFn(instring, loc)
2008 if nextloc > loc:
2009 loc = nextLoc
2010 else:
2011 loc += 1
2012 else:
2013 loc = nextLoc
2014 else:
2015 loc = preloc + 1
2016 except ParseBaseException as exc:
2017 if ParserElement.verbose_stacktrace:
2018 raise
2019 else:
2020
2021 raise exc
2022
2068
2070 """
2071 Another extension to :class:`scanString`, simplifying the access to the tokens found
2072 to match the given parse expression. May be called with optional
2073 ``maxMatches`` argument, to clip searching after 'n' matches are found.
2074
2075 Example::
2076
2077 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
2078 cap_word = Word(alphas.upper(), alphas.lower())
2079
2080 print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
2081
2082 # the sum() builtin can be used to merge results into a single ParseResults object
2083 print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
2084
2085 prints::
2086
2087 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
2088 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
2089 """
2090 try:
2091 return ParseResults([t for t, s, e in self.scanString(instring, maxMatches)])
2092 except ParseBaseException as exc:
2093 if ParserElement.verbose_stacktrace:
2094 raise
2095 else:
2096
2097 raise exc
2098
2099 - def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
2100 """
2101 Generator method to split a string using the given expression as a separator.
2102 May be called with optional ``maxsplit`` argument, to limit the number of splits;
2103 and the optional ``includeSeparators`` argument (default= ``False``), if the separating
2104 matching text should be included in the split results.
2105
2106 Example::
2107
2108 punc = oneOf(list(".,;:/-!?"))
2109 print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
2110
2111 prints::
2112
2113 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
2114 """
2115 splits = 0
2116 last = 0
2117 for t, s, e in self.scanString(instring, maxMatches=maxsplit):
2118 yield instring[last:s]
2119 if includeSeparators:
2120 yield t[0]
2121 last = e
2122 yield instring[last:]
2123
2125 """
2126 Implementation of + operator - returns :class:`And`. Adding strings to a ParserElement
2127 converts them to :class:`Literal`s by default.
2128
2129 Example::
2130
2131 greet = Word(alphas) + "," + Word(alphas) + "!"
2132 hello = "Hello, World!"
2133 print (hello, "->", greet.parseString(hello))
2134
2135 prints::
2136
2137 Hello, World! -> ['Hello', ',', 'World', '!']
2138
2139 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`.
2140
2141 Literal('start') + ... + Literal('end')
2142
2143 is equivalent to:
2144
2145 Literal('start') + SkipTo('end')("_skipped*") + Literal('end')
2146
2147 Note that the skipped text is returned with '_skipped' as a results name,
2148 and to support having multiple skips in the same parser, the value returned is
2149 a list of all skipped text.
2150 """
2151 if other is Ellipsis:
2152 return _PendingSkip(self)
2153
2154 if isinstance(other, basestring):
2155 other = self._literalStringClass(other)
2156 if not isinstance(other, ParserElement):
2157 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2158 SyntaxWarning, stacklevel=2)
2159 return None
2160 return And([self, other])
2161
2163 """
2164 Implementation of + operator when left operand is not a :class:`ParserElement`
2165 """
2166 if other is Ellipsis:
2167 return SkipTo(self)("_skipped*") + self
2168
2169 if isinstance(other, basestring):
2170 other = self._literalStringClass(other)
2171 if not isinstance(other, ParserElement):
2172 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2173 SyntaxWarning, stacklevel=2)
2174 return None
2175 return other + self
2176
2178 """
2179 Implementation of - operator, returns :class:`And` with error stop
2180 """
2181 if isinstance(other, basestring):
2182 other = self._literalStringClass(other)
2183 if not isinstance(other, ParserElement):
2184 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2185 SyntaxWarning, stacklevel=2)
2186 return None
2187 return self + And._ErrorStop() + other
2188
2190 """
2191 Implementation of - operator when left operand is not a :class:`ParserElement`
2192 """
2193 if isinstance(other, basestring):
2194 other = self._literalStringClass(other)
2195 if not isinstance(other, ParserElement):
2196 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2197 SyntaxWarning, stacklevel=2)
2198 return None
2199 return other - self
2200
2202 """
2203 Implementation of * operator, allows use of ``expr * 3`` in place of
2204 ``expr + expr + expr``. Expressions may also me multiplied by a 2-integer
2205 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples
2206 may also include ``None`` as in:
2207 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent
2208 to ``expr*n + ZeroOrMore(expr)``
2209 (read as "at least n instances of ``expr``")
2210 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)``
2211 (read as "0 to n instances of ``expr``")
2212 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)``
2213 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)``
2214
2215 Note that ``expr*(None, n)`` does not raise an exception if
2216 more than n exprs exist in the input stream; that is,
2217 ``expr*(None, n)`` does not enforce a maximum number of expr
2218 occurrences. If this behavior is desired, then write
2219 ``expr*(None, n) + ~expr``
2220 """
2221 if other is Ellipsis:
2222 other = (0, None)
2223 elif isinstance(other, tuple) and other[:1] == (Ellipsis,):
2224 other = ((0, ) + other[1:] + (None,))[:2]
2225
2226 if isinstance(other, int):
2227 minElements, optElements = other, 0
2228 elif isinstance(other, tuple):
2229 other = tuple(o if o is not Ellipsis else None for o in other)
2230 other = (other + (None, None))[:2]
2231 if other[0] is None:
2232 other = (0, other[1])
2233 if isinstance(other[0], int) and other[1] is None:
2234 if other[0] == 0:
2235 return ZeroOrMore(self)
2236 if other[0] == 1:
2237 return OneOrMore(self)
2238 else:
2239 return self * other[0] + ZeroOrMore(self)
2240 elif isinstance(other[0], int) and isinstance(other[1], int):
2241 minElements, optElements = other
2242 optElements -= minElements
2243 else:
2244 raise TypeError("cannot multiply 'ParserElement' and ('%s', '%s') objects", type(other[0]), type(other[1]))
2245 else:
2246 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
2247
2248 if minElements < 0:
2249 raise ValueError("cannot multiply ParserElement by negative value")
2250 if optElements < 0:
2251 raise ValueError("second tuple value must be greater or equal to first tuple value")
2252 if minElements == optElements == 0:
2253 raise ValueError("cannot multiply ParserElement by 0 or (0, 0)")
2254
2255 if optElements:
2256 def makeOptionalList(n):
2257 if n > 1:
2258 return Optional(self + makeOptionalList(n - 1))
2259 else:
2260 return Optional(self)
2261 if minElements:
2262 if minElements == 1:
2263 ret = self + makeOptionalList(optElements)
2264 else:
2265 ret = And([self] * minElements) + makeOptionalList(optElements)
2266 else:
2267 ret = makeOptionalList(optElements)
2268 else:
2269 if minElements == 1:
2270 ret = self
2271 else:
2272 ret = And([self] * minElements)
2273 return ret
2274
2277
2279 """
2280 Implementation of | operator - returns :class:`MatchFirst`
2281 """
2282 if other is Ellipsis:
2283 return _PendingSkip(self, must_skip=True)
2284
2285 if isinstance(other, basestring):
2286 other = self._literalStringClass(other)
2287 if not isinstance(other, ParserElement):
2288 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2289 SyntaxWarning, stacklevel=2)
2290 return None
2291 return MatchFirst([self, other])
2292
2294 """
2295 Implementation of | operator when left operand is not a :class:`ParserElement`
2296 """
2297 if isinstance(other, basestring):
2298 other = self._literalStringClass(other)
2299 if not isinstance(other, ParserElement):
2300 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2301 SyntaxWarning, stacklevel=2)
2302 return None
2303 return other | self
2304
2306 """
2307 Implementation of ^ operator - returns :class:`Or`
2308 """
2309 if isinstance(other, basestring):
2310 other = self._literalStringClass(other)
2311 if not isinstance(other, ParserElement):
2312 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2313 SyntaxWarning, stacklevel=2)
2314 return None
2315 return Or([self, other])
2316
2318 """
2319 Implementation of ^ operator when left operand is not a :class:`ParserElement`
2320 """
2321 if isinstance(other, basestring):
2322 other = self._literalStringClass(other)
2323 if not isinstance(other, ParserElement):
2324 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2325 SyntaxWarning, stacklevel=2)
2326 return None
2327 return other ^ self
2328
2330 """
2331 Implementation of & operator - returns :class:`Each`
2332 """
2333 if isinstance(other, basestring):
2334 other = self._literalStringClass(other)
2335 if not isinstance(other, ParserElement):
2336 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2337 SyntaxWarning, stacklevel=2)
2338 return None
2339 return Each([self, other])
2340
2342 """
2343 Implementation of & operator when left operand is not a :class:`ParserElement`
2344 """
2345 if isinstance(other, basestring):
2346 other = self._literalStringClass(other)
2347 if not isinstance(other, ParserElement):
2348 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2349 SyntaxWarning, stacklevel=2)
2350 return None
2351 return other & self
2352
2354 """
2355 Implementation of ~ operator - returns :class:`NotAny`
2356 """
2357 return NotAny(self)
2358
2360
2361
2362 raise TypeError('%r object is not iterable' % self.__class__.__name__)
2363
2365 """
2366 use ``[]`` indexing notation as a short form for expression repetition:
2367 - ``expr[n]`` is equivalent to ``expr*n``
2368 - ``expr[m, n]`` is equivalent to ``expr*(m, n)``
2369 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent
2370 to ``expr*n + ZeroOrMore(expr)``
2371 (read as "at least n instances of ``expr``")
2372 - ``expr[..., n]`` is equivalent to ``expr*(0, n)``
2373 (read as "0 to n instances of ``expr``")
2374 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``
2375 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
2376 ``None`` may be used in place of ``...``.
2377
2378 Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception
2379 if more than ``n`` ``expr``s exist in the input stream. If this behavior is
2380 desired, then write ``expr[..., n] + ~expr``.
2381 """
2382
2383
2384 try:
2385 if isinstance(key, str):
2386 key = (key,)
2387 iter(key)
2388 except TypeError:
2389 key = (key, key)
2390
2391 if len(key) > 2:
2392 warnings.warn("only 1 or 2 index arguments supported ({0}{1})".format(key[:5],
2393 '... [{0}]'.format(len(key))
2394 if len(key) > 5 else ''))
2395
2396
2397 ret = self * tuple(key[:2])
2398 return ret
2399
2401 """
2402 Shortcut for :class:`setResultsName`, with ``listAllMatches=False``.
2403
2404 If ``name`` is given with a trailing ``'*'`` character, then ``listAllMatches`` will be
2405 passed as ``True``.
2406
2407 If ``name` is omitted, same as calling :class:`copy`.
2408
2409 Example::
2410
2411 # these are equivalent
2412 userdata = Word(alphas).setResultsName("name") + Word(nums + "-").setResultsName("socsecno")
2413 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno")
2414 """
2415 if name is not None:
2416 return self._setResultsName(name)
2417 else:
2418 return self.copy()
2419
2421 """
2422 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from
2423 cluttering up returned output.
2424 """
2425 return Suppress(self)
2426
2428 """
2429 Disables the skipping of whitespace before matching the characters in the
2430 :class:`ParserElement`'s defined pattern. This is normally only used internally by
2431 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
2432 """
2433 self.skipWhitespace = False
2434 return self
2435
2437 """
2438 Overrides the default whitespace chars
2439 """
2440 self.skipWhitespace = True
2441 self.whiteChars = chars
2442 self.copyDefaultWhiteChars = False
2443 return self
2444
2446 """
2447 Overrides default behavior to expand ``<TAB>``s to spaces before parsing the input string.
2448 Must be called before ``parseString`` when the input grammar contains elements that
2449 match ``<TAB>`` characters.
2450 """
2451 self.keepTabs = True
2452 return self
2453
2455 """
2456 Define expression to be ignored (e.g., comments) while doing pattern
2457 matching; may be called repeatedly, to define multiple comment or other
2458 ignorable patterns.
2459
2460 Example::
2461
2462 patt = OneOrMore(Word(alphas))
2463 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
2464
2465 patt.ignore(cStyleComment)
2466 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
2467 """
2468 if isinstance(other, basestring):
2469 other = Suppress(other)
2470
2471 if isinstance(other, Suppress):
2472 if other not in self.ignoreExprs:
2473 self.ignoreExprs.append(other)
2474 else:
2475 self.ignoreExprs.append(Suppress(other.copy()))
2476 return self
2477
2487
2489 """
2490 Enable display of debugging messages while doing pattern matching.
2491 Set ``flag`` to True to enable, False to disable.
2492
2493 Example::
2494
2495 wd = Word(alphas).setName("alphaword")
2496 integer = Word(nums).setName("numword")
2497 term = wd | integer
2498
2499 # turn on debugging for wd
2500 wd.setDebug()
2501
2502 OneOrMore(term).parseString("abc 123 xyz 890")
2503
2504 prints::
2505
2506 Match alphaword at loc 0(1,1)
2507 Matched alphaword -> ['abc']
2508 Match alphaword at loc 3(1,4)
2509 Exception raised:Expected alphaword (at char 4), (line:1, col:5)
2510 Match alphaword at loc 7(1,8)
2511 Matched alphaword -> ['xyz']
2512 Match alphaword at loc 11(1,12)
2513 Exception raised:Expected alphaword (at char 12), (line:1, col:13)
2514 Match alphaword at loc 15(1,16)
2515 Exception raised:Expected alphaword (at char 15), (line:1, col:16)
2516
2517 The output shown is that produced by the default debug actions - custom debug actions can be
2518 specified using :class:`setDebugActions`. Prior to attempting
2519 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``
2520 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``
2521 message is shown. Also note the use of :class:`setName` to assign a human-readable name to the expression,
2522 which makes debugging and exception messages easier to understand - for instance, the default
2523 name created for the :class:`Word` expression without calling ``setName`` is ``"W:(ABCD...)"``.
2524 """
2525 if flag:
2526 self.setDebugActions(_defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction)
2527 else:
2528 self.debug = False
2529 return self
2530
2533
2536
2538 self.streamlined = True
2539 self.strRepr = None
2540 return self
2541
2544
2545 - def validate(self, validateTrace=None):
2546 """
2547 Check defined expressions for valid structure, check for infinite recursive definitions.
2548 """
2549 self.checkRecursion([])
2550
2551 - def parseFile(self, file_or_filename, parseAll=False):
2552 """
2553 Execute the parse expression on the given file or filename.
2554 If a filename is specified (instead of a file object),
2555 the entire file is opened, read, and closed before parsing.
2556 """
2557 try:
2558 file_contents = file_or_filename.read()
2559 except AttributeError:
2560 with open(file_or_filename, "r") as f:
2561 file_contents = f.read()
2562 try:
2563 return self.parseString(file_contents, parseAll)
2564 except ParseBaseException as exc:
2565 if ParserElement.verbose_stacktrace:
2566 raise
2567 else:
2568
2569 raise exc
2570
2572 if self is other:
2573 return True
2574 elif isinstance(other, basestring):
2575 return self.matches(other)
2576 elif isinstance(other, ParserElement):
2577 return vars(self) == vars(other)
2578 return False
2579
2581 return not (self == other)
2582
2585
2587 return self == other
2588
2590 return not (self == other)
2591
2592 - def matches(self, testString, parseAll=True):
2593 """
2594 Method for quick testing of a parser against a test string. Good for simple
2595 inline microtests of sub expressions while building up larger parser.
2596
2597 Parameters:
2598 - testString - to test against this expression for a match
2599 - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests
2600
2601 Example::
2602
2603 expr = Word(nums)
2604 assert expr.matches("100")
2605 """
2606 try:
2607 self.parseString(_ustr(testString), parseAll=parseAll)
2608 return True
2609 except ParseBaseException:
2610 return False
2611
2612 - def runTests(self, tests, parseAll=True, comment='#',
2613 fullDump=True, printResults=True, failureTests=False, postParse=None,
2614 file=None):
2615 """
2616 Execute the parse expression on a series of test strings, showing each
2617 test, the parsed results or where the parse failed. Quick and easy way to
2618 run a parse expression against a list of sample strings.
2619
2620 Parameters:
2621 - tests - a list of separate test strings, or a multiline string of test strings
2622 - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests
2623 - comment - (default= ``'#'``) - expression for indicating embedded comments in the test
2624 string; pass None to disable comment filtering
2625 - fullDump - (default= ``True``) - dump results as list followed by results names in nested outline;
2626 if False, only dump nested list
2627 - printResults - (default= ``True``) prints test output to stdout
2628 - failureTests - (default= ``False``) indicates if these tests are expected to fail parsing
2629 - postParse - (default= ``None``) optional callback for successful parse results; called as
2630 `fn(test_string, parse_results)` and returns a string to be added to the test output
2631 - file - (default=``None``) optional file-like object to which test output will be written;
2632 if None, will default to ``sys.stdout``
2633
2634 Returns: a (success, results) tuple, where success indicates that all tests succeeded
2635 (or failed if ``failureTests`` is True), and the results contain a list of lines of each
2636 test's output
2637
2638 Example::
2639
2640 number_expr = pyparsing_common.number.copy()
2641
2642 result = number_expr.runTests('''
2643 # unsigned integer
2644 100
2645 # negative integer
2646 -100
2647 # float with scientific notation
2648 6.02e23
2649 # integer with scientific notation
2650 1e-12
2651 ''')
2652 print("Success" if result[0] else "Failed!")
2653
2654 result = number_expr.runTests('''
2655 # stray character
2656 100Z
2657 # missing leading digit before '.'
2658 -.100
2659 # too many '.'
2660 3.14.159
2661 ''', failureTests=True)
2662 print("Success" if result[0] else "Failed!")
2663
2664 prints::
2665
2666 # unsigned integer
2667 100
2668 [100]
2669
2670 # negative integer
2671 -100
2672 [-100]
2673
2674 # float with scientific notation
2675 6.02e23
2676 [6.02e+23]
2677
2678 # integer with scientific notation
2679 1e-12
2680 [1e-12]
2681
2682 Success
2683
2684 # stray character
2685 100Z
2686 ^
2687 FAIL: Expected end of text (at char 3), (line:1, col:4)
2688
2689 # missing leading digit before '.'
2690 -.100
2691 ^
2692 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2693
2694 # too many '.'
2695 3.14.159
2696 ^
2697 FAIL: Expected end of text (at char 4), (line:1, col:5)
2698
2699 Success
2700
2701 Each test string must be on a single line. If you want to test a string that spans multiple
2702 lines, create a test like this::
2703
2704 expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
2705
2706 (Note that this is a raw string literal, you must include the leading 'r'.)
2707 """
2708 if isinstance(tests, basestring):
2709 tests = list(map(str.strip, tests.rstrip().splitlines()))
2710 if isinstance(comment, basestring):
2711 comment = Literal(comment)
2712 if file is None:
2713 file = sys.stdout
2714 print_ = file.write
2715
2716 allResults = []
2717 comments = []
2718 success = True
2719 NL = Literal(r'\n').addParseAction(replaceWith('\n')).ignore(quotedString)
2720 BOM = u'\ufeff'
2721 for t in tests:
2722 if comment is not None and comment.matches(t, False) or comments and not t:
2723 comments.append(t)
2724 continue
2725 if not t:
2726 continue
2727 out = ['\n'.join(comments), t]
2728 comments = []
2729 try:
2730
2731 t = NL.transformString(t.lstrip(BOM))
2732 result = self.parseString(t, parseAll=parseAll)
2733 except ParseBaseException as pe:
2734 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
2735 if '\n' in t:
2736 out.append(line(pe.loc, t))
2737 out.append(' ' * (col(pe.loc, t) - 1) + '^' + fatal)
2738 else:
2739 out.append(' ' * pe.loc + '^' + fatal)
2740 out.append("FAIL: " + str(pe))
2741 success = success and failureTests
2742 result = pe
2743 except Exception as exc:
2744 out.append("FAIL-EXCEPTION: " + str(exc))
2745 success = success and failureTests
2746 result = exc
2747 else:
2748 success = success and not failureTests
2749 if postParse is not None:
2750 try:
2751 pp_value = postParse(t, result)
2752 if pp_value is not None:
2753 if isinstance(pp_value, ParseResults):
2754 out.append(pp_value.dump())
2755 else:
2756 out.append(str(pp_value))
2757 else:
2758 out.append(result.dump())
2759 except Exception as e:
2760 out.append(result.dump(full=fullDump))
2761 out.append("{0} failed: {1}: {2}".format(postParse.__name__, type(e).__name__, e))
2762 else:
2763 out.append(result.dump(full=fullDump))
2764
2765 if printResults:
2766 if fullDump:
2767 out.append('')
2768 print_('\n'.join(out))
2769
2770 allResults.append((t, result))
2771
2772 return success, allResults
2773
2776
2777
2778 - def __init__(self, expr, must_skip=False):
2779 super(_PendingSkip, self).__init__()
2780 self.strRepr = str(expr + Empty()).replace('Empty', '...')
2781 self.name = self.strRepr
2782 self.anchor = expr
2783 self.must_skip = must_skip
2784
2786 skipper = SkipTo(other).setName("...")("_skipped*")
2787 if self.must_skip:
2788 def must_skip(t):
2789 if not t._skipped or t._skipped.asList() == ['']:
2790 del t[0]
2791 t.pop("_skipped", None)
2792 def show_skip(t):
2793 if t._skipped.asList()[-1:] == ['']:
2794 skipped = t.pop('_skipped')
2795 t['_skipped'] = 'missing <' + repr(self.anchor) + '>'
2796 return (self.anchor + skipper().addParseAction(must_skip)
2797 | skipper().addParseAction(show_skip)) + other
2798
2799 return self.anchor + skipper + other
2800
2803
2805 raise Exception("use of `...` expression without following SkipTo target expression")
2806
2807
2808 -class Token(ParserElement):
2809 """Abstract :class:`ParserElement` subclass, for defining atomic
2810 matching patterns.
2811 """
2814
2815
2816 -class Empty(Token):
2817 """An empty token, will always match.
2818 """
2820 super(Empty, self).__init__()
2821 self.name = "Empty"
2822 self.mayReturnEmpty = True
2823 self.mayIndexError = False
2824
2827 """A token that will never match.
2828 """
2830 super(NoMatch, self).__init__()
2831 self.name = "NoMatch"
2832 self.mayReturnEmpty = True
2833 self.mayIndexError = False
2834 self.errmsg = "Unmatchable token"
2835
2836 - def parseImpl(self, instring, loc, doActions=True):
2838
2841 """Token to exactly match a specified string.
2842
2843 Example::
2844
2845 Literal('blah').parseString('blah') # -> ['blah']
2846 Literal('blah').parseString('blahfooblah') # -> ['blah']
2847 Literal('blah').parseString('bla') # -> Exception: Expected "blah"
2848
2849 For case-insensitive matching, use :class:`CaselessLiteral`.
2850
2851 For keyword matching (force word break before and after the matched string),
2852 use :class:`Keyword` or :class:`CaselessKeyword`.
2853 """
2855 super(Literal, self).__init__()
2856 self.match = matchString
2857 self.matchLen = len(matchString)
2858 try:
2859 self.firstMatchChar = matchString[0]
2860 except IndexError:
2861 warnings.warn("null string passed to Literal; use Empty() instead",
2862 SyntaxWarning, stacklevel=2)
2863 self.__class__ = Empty
2864 self.name = '"%s"' % _ustr(self.match)
2865 self.errmsg = "Expected " + self.name
2866 self.mayReturnEmpty = False
2867 self.mayIndexError = False
2868
2869
2870
2871 if self.matchLen == 1 and type(self) is Literal:
2872 self.__class__ = _SingleCharLiteral
2873
2874 - def parseImpl(self, instring, loc, doActions=True):
2875 if instring[loc] == self.firstMatchChar and instring.startswith(self.match, loc):
2876 return loc + self.matchLen, self.match
2877 raise ParseException(instring, loc, self.errmsg, self)
2878
2880 - def parseImpl(self, instring, loc, doActions=True):
2881 if instring[loc] == self.firstMatchChar:
2882 return loc + 1, self.match
2883 raise ParseException(instring, loc, self.errmsg, self)
2884
2885 _L = Literal
2886 ParserElement._literalStringClass = Literal
2889 """Token to exactly match a specified string as a keyword, that is,
2890 it must be immediately followed by a non-keyword character. Compare
2891 with :class:`Literal`:
2892
2893 - ``Literal("if")`` will match the leading ``'if'`` in
2894 ``'ifAndOnlyIf'``.
2895 - ``Keyword("if")`` will not; it will only match the leading
2896 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``
2897
2898 Accepts two optional constructor arguments in addition to the
2899 keyword string:
2900
2901 - ``identChars`` is a string of characters that would be valid
2902 identifier characters, defaulting to all alphanumerics + "_" and
2903 "$"
2904 - ``caseless`` allows case-insensitive matching, default is ``False``.
2905
2906 Example::
2907
2908 Keyword("start").parseString("start") # -> ['start']
2909 Keyword("start").parseString("starting") # -> Exception
2910
2911 For case-insensitive matching, use :class:`CaselessKeyword`.
2912 """
2913 DEFAULT_KEYWORD_CHARS = alphanums + "_$"
2914
2915 - def __init__(self, matchString, identChars=None, caseless=False):
2916 super(Keyword, self).__init__()
2917 if identChars is None:
2918 identChars = Keyword.DEFAULT_KEYWORD_CHARS
2919 self.match = matchString
2920 self.matchLen = len(matchString)
2921 try:
2922 self.firstMatchChar = matchString[0]
2923 except IndexError:
2924 warnings.warn("null string passed to Keyword; use Empty() instead",
2925 SyntaxWarning, stacklevel=2)
2926 self.name = '"%s"' % self.match
2927 self.errmsg = "Expected " + self.name
2928 self.mayReturnEmpty = False
2929 self.mayIndexError = False
2930 self.caseless = caseless
2931 if caseless:
2932 self.caselessmatch = matchString.upper()
2933 identChars = identChars.upper()
2934 self.identChars = set(identChars)
2935
2936 - def parseImpl(self, instring, loc, doActions=True):
2937 if self.caseless:
2938 if ((instring[loc:loc + self.matchLen].upper() == self.caselessmatch)
2939 and (loc >= len(instring) - self.matchLen
2940 or instring[loc + self.matchLen].upper() not in self.identChars)
2941 and (loc == 0
2942 or instring[loc - 1].upper() not in self.identChars)):
2943 return loc + self.matchLen, self.match
2944
2945 else:
2946 if instring[loc] == self.firstMatchChar:
2947 if ((self.matchLen == 1 or instring.startswith(self.match, loc))
2948 and (loc >= len(instring) - self.matchLen
2949 or instring[loc + self.matchLen] not in self.identChars)
2950 and (loc == 0 or instring[loc - 1] not in self.identChars)):
2951 return loc + self.matchLen, self.match
2952
2953 raise ParseException(instring, loc, self.errmsg, self)
2954
2959
2960 @staticmethod
2965
2967 """Token to match a specified string, ignoring case of letters.
2968 Note: the matched results will always be in the case of the given
2969 match string, NOT the case of the input text.
2970
2971 Example::
2972
2973 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
2974
2975 (Contrast with example for :class:`CaselessKeyword`.)
2976 """
2978 super(CaselessLiteral, self).__init__(matchString.upper())
2979
2980 self.returnString = matchString
2981 self.name = "'%s'" % self.returnString
2982 self.errmsg = "Expected " + self.name
2983
2984 - def parseImpl(self, instring, loc, doActions=True):
2985 if instring[loc:loc + self.matchLen].upper() == self.match:
2986 return loc + self.matchLen, self.returnString
2987 raise ParseException(instring, loc, self.errmsg, self)
2988
2990 """
2991 Caseless version of :class:`Keyword`.
2992
2993 Example::
2994
2995 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
2996
2997 (Contrast with example for :class:`CaselessLiteral`.)
2998 """
2999 - def __init__(self, matchString, identChars=None):
3001
3003 """A variation on :class:`Literal` which matches "close" matches,
3004 that is, strings with at most 'n' mismatching characters.
3005 :class:`CloseMatch` takes parameters:
3006
3007 - ``match_string`` - string to be matched
3008 - ``maxMismatches`` - (``default=1``) maximum number of
3009 mismatches allowed to count as a match
3010
3011 The results from a successful parse will contain the matched text
3012 from the input string and the following named results:
3013
3014 - ``mismatches`` - a list of the positions within the
3015 match_string where mismatches were found
3016 - ``original`` - the original match_string used to compare
3017 against the input string
3018
3019 If ``mismatches`` is an empty list, then the match was an exact
3020 match.
3021
3022 Example::
3023
3024 patt = CloseMatch("ATCATCGAATGGA")
3025 patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
3026 patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
3027
3028 # exact match
3029 patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
3030
3031 # close match allowing up to 2 mismatches
3032 patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
3033 patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
3034 """
3035 - def __init__(self, match_string, maxMismatches=1):
3036 super(CloseMatch, self).__init__()
3037 self.name = match_string
3038 self.match_string = match_string
3039 self.maxMismatches = maxMismatches
3040 self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
3041 self.mayIndexError = False
3042 self.mayReturnEmpty = False
3043
3044 - def parseImpl(self, instring, loc, doActions=True):
3045 start = loc
3046 instrlen = len(instring)
3047 maxloc = start + len(self.match_string)
3048
3049 if maxloc <= instrlen:
3050 match_string = self.match_string
3051 match_stringloc = 0
3052 mismatches = []
3053 maxMismatches = self.maxMismatches
3054
3055 for match_stringloc, s_m in enumerate(zip(instring[loc:maxloc], match_string)):
3056 src, mat = s_m
3057 if src != mat:
3058 mismatches.append(match_stringloc)
3059 if len(mismatches) > maxMismatches:
3060 break
3061 else:
3062 loc = match_stringloc + 1
3063 results = ParseResults([instring[start:loc]])
3064 results['original'] = match_string
3065 results['mismatches'] = mismatches
3066 return loc, results
3067
3068 raise ParseException(instring, loc, self.errmsg, self)
3069
3070
3071 -class Word(Token):
3072 """Token for matching words composed of allowed character sets.
3073 Defined with string containing all allowed initial characters, an
3074 optional string containing allowed body characters (if omitted,
3075 defaults to the initial character set), and an optional minimum,
3076 maximum, and/or exact length. The default value for ``min`` is
3077 1 (a minimum value < 1 is not valid); the default values for
3078 ``max`` and ``exact`` are 0, meaning no maximum or exact
3079 length restriction. An optional ``excludeChars`` parameter can
3080 list characters that might be found in the input ``bodyChars``
3081 string; useful to define a word of all printables except for one or
3082 two characters, for instance.
3083
3084 :class:`srange` is useful for defining custom character set strings
3085 for defining ``Word`` expressions, using range notation from
3086 regular expression character sets.
3087
3088 A common mistake is to use :class:`Word` to match a specific literal
3089 string, as in ``Word("Address")``. Remember that :class:`Word`
3090 uses the string argument to define *sets* of matchable characters.
3091 This expression would match "Add", "AAA", "dAred", or any other word
3092 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an
3093 exact literal string, use :class:`Literal` or :class:`Keyword`.
3094
3095 pyparsing includes helper strings for building Words:
3096
3097 - :class:`alphas`
3098 - :class:`nums`
3099 - :class:`alphanums`
3100 - :class:`hexnums`
3101 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255
3102 - accented, tilded, umlauted, etc.)
3103 - :class:`punc8bit` (non-alphabetic characters in ASCII range
3104 128-255 - currency, symbols, superscripts, diacriticals, etc.)
3105 - :class:`printables` (any non-whitespace character)
3106
3107 Example::
3108
3109 # a word composed of digits
3110 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
3111
3112 # a word with a leading capital, and zero or more lowercase
3113 capital_word = Word(alphas.upper(), alphas.lower())
3114
3115 # hostnames are alphanumeric, with leading alpha, and '-'
3116 hostname = Word(alphas, alphanums + '-')
3117
3118 # roman numeral (not a strict parser, accepts invalid mix of characters)
3119 roman = Word("IVXLCDM")
3120
3121 # any string of non-whitespace characters, except for ','
3122 csv_value = Word(printables, excludeChars=",")
3123 """
3124 - def __init__(self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None):
3125 super(Word, self).__init__()
3126 if excludeChars:
3127 excludeChars = set(excludeChars)
3128 initChars = ''.join(c for c in initChars if c not in excludeChars)
3129 if bodyChars:
3130 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
3131 self.initCharsOrig = initChars
3132 self.initChars = set(initChars)
3133 if bodyChars:
3134 self.bodyCharsOrig = bodyChars
3135 self.bodyChars = set(bodyChars)
3136 else:
3137 self.bodyCharsOrig = initChars
3138 self.bodyChars = set(initChars)
3139
3140 self.maxSpecified = max > 0
3141
3142 if min < 1:
3143 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
3144
3145 self.minLen = min
3146
3147 if max > 0:
3148 self.maxLen = max
3149 else:
3150 self.maxLen = _MAX_INT
3151
3152 if exact > 0:
3153 self.maxLen = exact
3154 self.minLen = exact
3155
3156 self.name = _ustr(self)
3157 self.errmsg = "Expected " + self.name
3158 self.mayIndexError = False
3159 self.asKeyword = asKeyword
3160
3161 if ' ' not in self.initCharsOrig + self.bodyCharsOrig and (min == 1 and max == 0 and exact == 0):
3162 if self.bodyCharsOrig == self.initCharsOrig:
3163 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
3164 elif len(self.initCharsOrig) == 1:
3165 self.reString = "%s[%s]*" % (re.escape(self.initCharsOrig),
3166 _escapeRegexRangeChars(self.bodyCharsOrig),)
3167 else:
3168 self.reString = "[%s][%s]*" % (_escapeRegexRangeChars(self.initCharsOrig),
3169 _escapeRegexRangeChars(self.bodyCharsOrig),)
3170 if self.asKeyword:
3171 self.reString = r"\b" + self.reString + r"\b"
3172
3173 try:
3174 self.re = re.compile(self.reString)
3175 except Exception:
3176 self.re = None
3177 else:
3178 self.re_match = self.re.match
3179 self.__class__ = _WordRegex
3180
3181 - def parseImpl(self, instring, loc, doActions=True):
3182 if instring[loc] not in self.initChars:
3183 raise ParseException(instring, loc, self.errmsg, self)
3184
3185 start = loc
3186 loc += 1
3187 instrlen = len(instring)
3188 bodychars = self.bodyChars
3189 maxloc = start + self.maxLen
3190 maxloc = min(maxloc, instrlen)
3191 while loc < maxloc and instring[loc] in bodychars:
3192 loc += 1
3193
3194 throwException = False
3195 if loc - start < self.minLen:
3196 throwException = True
3197 elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
3198 throwException = True
3199 elif self.asKeyword:
3200 if (start > 0 and instring[start - 1] in bodychars
3201 or loc < instrlen and instring[loc] in bodychars):
3202 throwException = True
3203
3204 if throwException:
3205 raise ParseException(instring, loc, self.errmsg, self)
3206
3207 return loc, instring[start:loc]
3208
3210 try:
3211 return super(Word, self).__str__()
3212 except Exception:
3213 pass
3214
3215 if self.strRepr is None:
3216
3217 def charsAsStr(s):
3218 if len(s) > 4:
3219 return s[:4] + "..."
3220 else:
3221 return s
3222
3223 if self.initCharsOrig != self.bodyCharsOrig:
3224 self.strRepr = "W:(%s, %s)" % (charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig))
3225 else:
3226 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
3227
3228 return self.strRepr
3229
3231 - def parseImpl(self, instring, loc, doActions=True):
3232 result = self.re_match(instring, loc)
3233 if not result:
3234 raise ParseException(instring, loc, self.errmsg, self)
3235
3236 loc = result.end()
3237 return loc, result.group()
3238
3239
3240 -class Char(_WordRegex):
3241 """A short-cut class for defining ``Word(characters, exact=1)``,
3242 when defining a match of any single character in a string of
3243 characters.
3244 """
3245 - def __init__(self, charset, asKeyword=False, excludeChars=None):
3246 super(Char, self).__init__(charset, exact=1, asKeyword=asKeyword, excludeChars=excludeChars)
3247 self.reString = "[%s]" % _escapeRegexRangeChars(''.join(self.initChars))
3248 if asKeyword:
3249 self.reString = r"\b%s\b" % self.reString
3250 self.re = re.compile(self.reString)
3251 self.re_match = self.re.match
3252
3253
3254 -class Regex(Token):
3255 r"""Token for matching strings that match a given regular
3256 expression. Defined with string specifying the regular expression in
3257 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.
3258 If the given regex contains named groups (defined using ``(?P<name>...)``),
3259 these will be preserved as named parse results.
3260
3261 If instead of the Python stdlib re module you wish to use a different RE module
3262 (such as the `regex` module), you can replace it by either building your
3263 Regex object with a compiled RE that was compiled using regex:
3264
3265 Example::
3266
3267 realnum = Regex(r"[+-]?\d+\.\d*")
3268 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
3269 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
3270 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
3271
3272 # use regex module instead of stdlib re module to construct a Regex using
3273 # a compiled regular expression
3274 import regex
3275 parser = pp.Regex(regex.compile(r'[0-9]'))
3276
3277 """
3278 - def __init__(self, pattern, flags=0, asGroupList=False, asMatch=False):
3279 """The parameters ``pattern`` and ``flags`` are passed
3280 to the ``re.compile()`` function as-is. See the Python
3281 `re module <https://docs.python.org/3/library/re.html>`_ module for an
3282 explanation of the acceptable patterns and flags.
3283 """
3284 super(Regex, self).__init__()
3285
3286 if isinstance(pattern, basestring):
3287 if not pattern:
3288 warnings.warn("null string passed to Regex; use Empty() instead",
3289 SyntaxWarning, stacklevel=2)
3290
3291 self.pattern = pattern
3292 self.flags = flags
3293
3294 try:
3295 self.re = re.compile(self.pattern, self.flags)
3296 self.reString = self.pattern
3297 except sre_constants.error:
3298 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
3299 SyntaxWarning, stacklevel=2)
3300 raise
3301
3302 elif hasattr(pattern, 'pattern') and hasattr(pattern, 'match'):
3303 self.re = pattern
3304 self.pattern = self.reString = pattern.pattern
3305 self.flags = flags
3306
3307 else:
3308 raise TypeError("Regex may only be constructed with a string or a compiled RE object")
3309
3310 self.re_match = self.re.match
3311
3312 self.name = _ustr(self)
3313 self.errmsg = "Expected " + self.name
3314 self.mayIndexError = False
3315 self.mayReturnEmpty = True
3316 self.asGroupList = asGroupList
3317 self.asMatch = asMatch
3318 if self.asGroupList:
3319 self.parseImpl = self.parseImplAsGroupList
3320 if self.asMatch:
3321 self.parseImpl = self.parseImplAsMatch
3322
3323 - def parseImpl(self, instring, loc, doActions=True):
3324 result = self.re_match(instring, loc)
3325 if not result:
3326 raise ParseException(instring, loc, self.errmsg, self)
3327
3328 loc = result.end()
3329 ret = ParseResults(result.group())
3330 d = result.groupdict()
3331 if d:
3332 for k, v in d.items():
3333 ret[k] = v
3334 return loc, ret
3335
3337 result = self.re_match(instring, loc)
3338 if not result:
3339 raise ParseException(instring, loc, self.errmsg, self)
3340
3341 loc = result.end()
3342 ret = result.groups()
3343 return loc, ret
3344
3346 result = self.re_match(instring, loc)
3347 if not result:
3348 raise ParseException(instring, loc, self.errmsg, self)
3349
3350 loc = result.end()
3351 ret = result
3352 return loc, ret
3353
3355 try:
3356 return super(Regex, self).__str__()
3357 except Exception:
3358 pass
3359
3360 if self.strRepr is None:
3361 self.strRepr = "Re:(%s)" % repr(self.pattern)
3362
3363 return self.strRepr
3364
3365 - def sub(self, repl):
3366 r"""
3367 Return Regex with an attached parse action to transform the parsed
3368 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.
3369
3370 Example::
3371
3372 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")
3373 print(make_html.transformString("h1:main title:"))
3374 # prints "<h1>main title</h1>"
3375 """
3376 if self.asGroupList:
3377 warnings.warn("cannot use sub() with Regex(asGroupList=True)",
3378 SyntaxWarning, stacklevel=2)
3379 raise SyntaxError()
3380
3381 if self.asMatch and callable(repl):
3382 warnings.warn("cannot use sub() with a callable with Regex(asMatch=True)",
3383 SyntaxWarning, stacklevel=2)
3384 raise SyntaxError()
3385
3386 if self.asMatch:
3387 def pa(tokens):
3388 return tokens[0].expand(repl)
3389 else:
3390 def pa(tokens):
3391 return self.re.sub(repl, tokens[0])
3392 return self.addParseAction(pa)
3393
3395 r"""
3396 Token for matching strings that are delimited by quoting characters.
3397
3398 Defined with the following parameters:
3399
3400 - quoteChar - string of one or more characters defining the
3401 quote delimiting string
3402 - escChar - character to escape quotes, typically backslash
3403 (default= ``None``)
3404 - escQuote - special quote sequence to escape an embedded quote
3405 string (such as SQL's ``""`` to escape an embedded ``"``)
3406 (default= ``None``)
3407 - multiline - boolean indicating whether quotes can span
3408 multiple lines (default= ``False``)
3409 - unquoteResults - boolean indicating whether the matched text
3410 should be unquoted (default= ``True``)
3411 - endQuoteChar - string of one or more characters defining the
3412 end of the quote delimited string (default= ``None`` => same as
3413 quoteChar)
3414 - convertWhitespaceEscapes - convert escaped whitespace
3415 (``'\t'``, ``'\n'``, etc.) to actual whitespace
3416 (default= ``True``)
3417
3418 Example::
3419
3420 qs = QuotedString('"')
3421 print(qs.searchString('lsjdf "This is the quote" sldjf'))
3422 complex_qs = QuotedString('{{', endQuoteChar='}}')
3423 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
3424 sql_qs = QuotedString('"', escQuote='""')
3425 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
3426
3427 prints::
3428
3429 [['This is the quote']]
3430 [['This is the "quote"']]
3431 [['This is the quote with "embedded" quotes']]
3432 """
3433 - def __init__(self, quoteChar, escChar=None, escQuote=None, multiline=False,
3434 unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
3435 super(QuotedString, self).__init__()
3436
3437
3438 quoteChar = quoteChar.strip()
3439 if not quoteChar:
3440 warnings.warn("quoteChar cannot be the empty string", SyntaxWarning, stacklevel=2)
3441 raise SyntaxError()
3442
3443 if endQuoteChar is None:
3444 endQuoteChar = quoteChar
3445 else:
3446 endQuoteChar = endQuoteChar.strip()
3447 if not endQuoteChar:
3448 warnings.warn("endQuoteChar cannot be the empty string", SyntaxWarning, stacklevel=2)
3449 raise SyntaxError()
3450
3451 self.quoteChar = quoteChar
3452 self.quoteCharLen = len(quoteChar)
3453 self.firstQuoteChar = quoteChar[0]
3454 self.endQuoteChar = endQuoteChar
3455 self.endQuoteCharLen = len(endQuoteChar)
3456 self.escChar = escChar
3457 self.escQuote = escQuote
3458 self.unquoteResults = unquoteResults
3459 self.convertWhitespaceEscapes = convertWhitespaceEscapes
3460
3461 if multiline:
3462 self.flags = re.MULTILINE | re.DOTALL
3463 self.pattern = r'%s(?:[^%s%s]' % (re.escape(self.quoteChar),
3464 _escapeRegexRangeChars(self.endQuoteChar[0]),
3465 (escChar is not None and _escapeRegexRangeChars(escChar) or ''))
3466 else:
3467 self.flags = 0
3468 self.pattern = r'%s(?:[^%s\n\r%s]' % (re.escape(self.quoteChar),
3469 _escapeRegexRangeChars(self.endQuoteChar[0]),
3470 (escChar is not None and _escapeRegexRangeChars(escChar) or ''))
3471 if len(self.endQuoteChar) > 1:
3472 self.pattern += (
3473 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
3474 _escapeRegexRangeChars(self.endQuoteChar[i]))
3475 for i in range(len(self.endQuoteChar) - 1, 0, -1)) + ')')
3476
3477 if escQuote:
3478 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
3479 if escChar:
3480 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
3481 self.escCharReplacePattern = re.escape(self.escChar) + "(.)"
3482 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
3483
3484 try:
3485 self.re = re.compile(self.pattern, self.flags)
3486 self.reString = self.pattern
3487 self.re_match = self.re.match
3488 except sre_constants.error:
3489 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
3490 SyntaxWarning, stacklevel=2)
3491 raise
3492
3493 self.name = _ustr(self)
3494 self.errmsg = "Expected " + self.name
3495 self.mayIndexError = False
3496 self.mayReturnEmpty = True
3497
3498 - def parseImpl(self, instring, loc, doActions=True):
3499 result = instring[loc] == self.firstQuoteChar and self.re_match(instring, loc) or None
3500 if not result:
3501 raise ParseException(instring, loc, self.errmsg, self)
3502
3503 loc = result.end()
3504 ret = result.group()
3505
3506 if self.unquoteResults:
3507
3508
3509 ret = ret[self.quoteCharLen: -self.endQuoteCharLen]
3510
3511 if isinstance(ret, basestring):
3512
3513 if '\\' in ret and self.convertWhitespaceEscapes:
3514 ws_map = {
3515 r'\t': '\t',
3516 r'\n': '\n',
3517 r'\f': '\f',
3518 r'\r': '\r',
3519 }
3520 for wslit, wschar in ws_map.items():
3521 ret = ret.replace(wslit, wschar)
3522
3523
3524 if self.escChar:
3525 ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)
3526
3527
3528 if self.escQuote:
3529 ret = ret.replace(self.escQuote, self.endQuoteChar)
3530
3531 return loc, ret
3532
3534 try:
3535 return super(QuotedString, self).__str__()
3536 except Exception:
3537 pass
3538
3539 if self.strRepr is None:
3540 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
3541
3542 return self.strRepr
3543
3546 """Token for matching words composed of characters *not* in a given
3547 set (will include whitespace in matched characters if not listed in
3548 the provided exclusion set - see example). Defined with string
3549 containing all disallowed characters, and an optional minimum,
3550 maximum, and/or exact length. The default value for ``min`` is
3551 1 (a minimum value < 1 is not valid); the default values for
3552 ``max`` and ``exact`` are 0, meaning no maximum or exact
3553 length restriction.
3554
3555 Example::
3556
3557 # define a comma-separated-value as anything that is not a ','
3558 csv_value = CharsNotIn(',')
3559 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
3560
3561 prints::
3562
3563 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
3564 """
3565 - def __init__(self, notChars, min=1, max=0, exact=0):
3566 super(CharsNotIn, self).__init__()
3567 self.skipWhitespace = False
3568 self.notChars = notChars
3569
3570 if min < 1:
3571 raise ValueError("cannot specify a minimum length < 1; use "
3572 "Optional(CharsNotIn()) if zero-length char group is permitted")
3573
3574 self.minLen = min
3575
3576 if max > 0:
3577 self.maxLen = max
3578 else:
3579 self.maxLen = _MAX_INT
3580
3581 if exact > 0:
3582 self.maxLen = exact
3583 self.minLen = exact
3584
3585 self.name = _ustr(self)
3586 self.errmsg = "Expected " + self.name
3587 self.mayReturnEmpty = (self.minLen == 0)
3588 self.mayIndexError = False
3589
3590 - def parseImpl(self, instring, loc, doActions=True):
3591 if instring[loc] in self.notChars:
3592 raise ParseException(instring, loc, self.errmsg, self)
3593
3594 start = loc
3595 loc += 1
3596 notchars = self.notChars
3597 maxlen = min(start + self.maxLen, len(instring))
3598 while loc < maxlen and instring[loc] not in notchars:
3599 loc += 1
3600
3601 if loc - start < self.minLen:
3602 raise ParseException(instring, loc, self.errmsg, self)
3603
3604 return loc, instring[start:loc]
3605
3607 try:
3608 return super(CharsNotIn, self).__str__()
3609 except Exception:
3610 pass
3611
3612 if self.strRepr is None:
3613 if len(self.notChars) > 4:
3614 self.strRepr = "!W:(%s...)" % self.notChars[:4]
3615 else:
3616 self.strRepr = "!W:(%s)" % self.notChars
3617
3618 return self.strRepr
3619
3621 """Special matching class for matching whitespace. Normally,
3622 whitespace is ignored by pyparsing grammars. This class is included
3623 when some whitespace structures are significant. Define with
3624 a string containing the whitespace characters to be matched; default
3625 is ``" \\t\\r\\n"``. Also takes optional ``min``,
3626 ``max``, and ``exact`` arguments, as defined for the
3627 :class:`Word` class.
3628 """
3629 whiteStrs = {
3630 ' ' : '<SP>',
3631 '\t': '<TAB>',
3632 '\n': '<LF>',
3633 '\r': '<CR>',
3634 '\f': '<FF>',
3635 u'\u00A0': '<NBSP>',
3636 u'\u1680': '<OGHAM_SPACE_MARK>',
3637 u'\u180E': '<MONGOLIAN_VOWEL_SEPARATOR>',
3638 u'\u2000': '<EN_QUAD>',
3639 u'\u2001': '<EM_QUAD>',
3640 u'\u2002': '<EN_SPACE>',
3641 u'\u2003': '<EM_SPACE>',
3642 u'\u2004': '<THREE-PER-EM_SPACE>',
3643 u'\u2005': '<FOUR-PER-EM_SPACE>',
3644 u'\u2006': '<SIX-PER-EM_SPACE>',
3645 u'\u2007': '<FIGURE_SPACE>',
3646 u'\u2008': '<PUNCTUATION_SPACE>',
3647 u'\u2009': '<THIN_SPACE>',
3648 u'\u200A': '<HAIR_SPACE>',
3649 u'\u200B': '<ZERO_WIDTH_SPACE>',
3650 u'\u202F': '<NNBSP>',
3651 u'\u205F': '<MMSP>',
3652 u'\u3000': '<IDEOGRAPHIC_SPACE>',
3653 }
3654 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
3655 super(White, self).__init__()
3656 self.matchWhite = ws
3657 self.setWhitespaceChars("".join(c for c in self.whiteChars if c not in self.matchWhite))
3658
3659 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
3660 self.mayReturnEmpty = True
3661 self.errmsg = "Expected " + self.name
3662
3663 self.minLen = min
3664
3665 if max > 0:
3666 self.maxLen = max
3667 else:
3668 self.maxLen = _MAX_INT
3669
3670 if exact > 0:
3671 self.maxLen = exact
3672 self.minLen = exact
3673
3674 - def parseImpl(self, instring, loc, doActions=True):
3675 if instring[loc] not in self.matchWhite:
3676 raise ParseException(instring, loc, self.errmsg, self)
3677 start = loc
3678 loc += 1
3679 maxloc = start + self.maxLen
3680 maxloc = min(maxloc, len(instring))
3681 while loc < maxloc and instring[loc] in self.matchWhite:
3682 loc += 1
3683
3684 if loc - start < self.minLen:
3685 raise ParseException(instring, loc, self.errmsg, self)
3686
3687 return loc, instring[start:loc]
3688
3692 super(_PositionToken, self).__init__()
3693 self.name = self.__class__.__name__
3694 self.mayReturnEmpty = True
3695 self.mayIndexError = False
3696
3698 """Token to advance to a specific column of input text; useful for
3699 tabular report scraping.
3700 """
3704
3706 if col(loc, instring) != self.col:
3707 instrlen = len(instring)
3708 if self.ignoreExprs:
3709 loc = self._skipIgnorables(instring, loc)
3710 while loc < instrlen and instring[loc].isspace() and col(loc, instring) != self.col:
3711 loc += 1
3712 return loc
3713
3714 - def parseImpl(self, instring, loc, doActions=True):
3715 thiscol = col(loc, instring)
3716 if thiscol > self.col:
3717 raise ParseException(instring, loc, "Text not in expected column", self)
3718 newloc = loc + self.col - thiscol
3719 ret = instring[loc: newloc]
3720 return newloc, ret
3721
3724 r"""Matches if current position is at the beginning of a line within
3725 the parse string
3726
3727 Example::
3728
3729 test = '''\
3730 AAA this line
3731 AAA and this line
3732 AAA but not this one
3733 B AAA and definitely not this one
3734 '''
3735
3736 for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
3737 print(t)
3738
3739 prints::
3740
3741 ['AAA', ' this line']
3742 ['AAA', ' and this line']
3743
3744 """
3748
3749 - def parseImpl(self, instring, loc, doActions=True):
3750 if col(loc, instring) == 1:
3751 return loc, []
3752 raise ParseException(instring, loc, self.errmsg, self)
3753
3755 """Matches if current position is at the end of a line within the
3756 parse string
3757 """
3762
3763 - def parseImpl(self, instring, loc, doActions=True):
3764 if loc < len(instring):
3765 if instring[loc] == "\n":
3766 return loc + 1, "\n"
3767 else:
3768 raise ParseException(instring, loc, self.errmsg, self)
3769 elif loc == len(instring):
3770 return loc + 1, []
3771 else:
3772 raise ParseException(instring, loc, self.errmsg, self)
3773
3775 """Matches if current position is at the beginning of the parse
3776 string
3777 """
3781
3782 - def parseImpl(self, instring, loc, doActions=True):
3783 if loc != 0:
3784
3785 if loc != self.preParse(instring, 0):
3786 raise ParseException(instring, loc, self.errmsg, self)
3787 return loc, []
3788
3790 """Matches if current position is at the end of the parse string
3791 """
3795
3796 - def parseImpl(self, instring, loc, doActions=True):
3797 if loc < len(instring):
3798 raise ParseException(instring, loc, self.errmsg, self)
3799 elif loc == len(instring):
3800 return loc + 1, []
3801 elif loc > len(instring):
3802 return loc, []
3803 else:
3804 raise ParseException(instring, loc, self.errmsg, self)
3805
3807 """Matches if the current position is at the beginning of a Word,
3808 and is not preceded by any character in a given set of
3809 ``wordChars`` (default= ``printables``). To emulate the
3810 ``\b`` behavior of regular expressions, use
3811 ``WordStart(alphanums)``. ``WordStart`` will also match at
3812 the beginning of the string being parsed, or at the beginning of
3813 a line.
3814 """
3816 super(WordStart, self).__init__()
3817 self.wordChars = set(wordChars)
3818 self.errmsg = "Not at the start of a word"
3819
3820 - def parseImpl(self, instring, loc, doActions=True):
3821 if loc != 0:
3822 if (instring[loc - 1] in self.wordChars
3823 or instring[loc] not in self.wordChars):
3824 raise ParseException(instring, loc, self.errmsg, self)
3825 return loc, []
3826
3828 """Matches if the current position is at the end of a Word, and is
3829 not followed by any character in a given set of ``wordChars``
3830 (default= ``printables``). To emulate the ``\b`` behavior of
3831 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``
3832 will also match at the end of the string being parsed, or at the end
3833 of a line.
3834 """
3836 super(WordEnd, self).__init__()
3837 self.wordChars = set(wordChars)
3838 self.skipWhitespace = False
3839 self.errmsg = "Not at the end of a word"
3840
3841 - def parseImpl(self, instring, loc, doActions=True):
3842 instrlen = len(instring)
3843 if instrlen > 0 and loc < instrlen:
3844 if (instring[loc] in self.wordChars or
3845 instring[loc - 1] not in self.wordChars):
3846 raise ParseException(instring, loc, self.errmsg, self)
3847 return loc, []
3848
3851 """Abstract subclass of ParserElement, for combining and
3852 post-processing parsed tokens.
3853 """
3854 - def __init__(self, exprs, savelist=False):
3855 super(ParseExpression, self).__init__(savelist)
3856 if isinstance(exprs, _generatorType):
3857 exprs = list(exprs)
3858
3859 if isinstance(exprs, basestring):
3860 self.exprs = [self._literalStringClass(exprs)]
3861 elif isinstance(exprs, ParserElement):
3862 self.exprs = [exprs]
3863 elif isinstance(exprs, Iterable):
3864 exprs = list(exprs)
3865
3866 if any(isinstance(expr, basestring) for expr in exprs):
3867 exprs = (self._literalStringClass(e) if isinstance(e, basestring) else e for e in exprs)
3868 self.exprs = list(exprs)
3869 else:
3870 try:
3871 self.exprs = list(exprs)
3872 except TypeError:
3873 self.exprs = [exprs]
3874 self.callPreparse = False
3875
3877 self.exprs.append(other)
3878 self.strRepr = None
3879 return self
3880
3882 """Extends ``leaveWhitespace`` defined in base class, and also invokes ``leaveWhitespace`` on
3883 all contained expressions."""
3884 self.skipWhitespace = False
3885 self.exprs = [e.copy() for e in self.exprs]
3886 for e in self.exprs:
3887 e.leaveWhitespace()
3888 return self
3889
3891 if isinstance(other, Suppress):
3892 if other not in self.ignoreExprs:
3893 super(ParseExpression, self).ignore(other)
3894 for e in self.exprs:
3895 e.ignore(self.ignoreExprs[-1])
3896 else:
3897 super(ParseExpression, self).ignore(other)
3898 for e in self.exprs:
3899 e.ignore(self.ignoreExprs[-1])
3900 return self
3901
3903 try:
3904 return super(ParseExpression, self).__str__()
3905 except Exception:
3906 pass
3907
3908 if self.strRepr is None:
3909 self.strRepr = "%s:(%s)" % (self.__class__.__name__, _ustr(self.exprs))
3910 return self.strRepr
3911
3913 super(ParseExpression, self).streamline()
3914
3915 for e in self.exprs:
3916 e.streamline()
3917
3918
3919
3920
3921 if len(self.exprs) == 2:
3922 other = self.exprs[0]
3923 if (isinstance(other, self.__class__)
3924 and not other.parseAction
3925 and other.resultsName is None
3926 and not other.debug):
3927 self.exprs = other.exprs[:] + [self.exprs[1]]
3928 self.strRepr = None
3929 self.mayReturnEmpty |= other.mayReturnEmpty
3930 self.mayIndexError |= other.mayIndexError
3931
3932 other = self.exprs[-1]
3933 if (isinstance(other, self.__class__)
3934 and not other.parseAction
3935 and other.resultsName is None
3936 and not other.debug):
3937 self.exprs = self.exprs[:-1] + other.exprs[:]
3938 self.strRepr = None
3939 self.mayReturnEmpty |= other.mayReturnEmpty
3940 self.mayIndexError |= other.mayIndexError
3941
3942 self.errmsg = "Expected " + _ustr(self)
3943
3944 return self
3945
3946 - def validate(self, validateTrace=None):
3947 tmp = (validateTrace if validateTrace is not None else [])[:] + [self]
3948 for e in self.exprs:
3949 e.validate(tmp)
3950 self.checkRecursion([])
3951
3956
3958 if __diag__.warn_ungrouped_named_tokens_in_collection:
3959 for e in self.exprs:
3960 if isinstance(e, ParserElement) and e.resultsName:
3961 warnings.warn("{0}: setting results name {1!r} on {2} expression "
3962 "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection",
3963 name,
3964 type(self).__name__,
3965 e.resultsName),
3966 stacklevel=3)
3967
3968 return super(ParseExpression, self)._setResultsName(name, listAllMatches)
3969
3970
3971 -class And(ParseExpression):
3972 """
3973 Requires all given :class:`ParseExpression` s to be found in the given order.
3974 Expressions may be separated by whitespace.
3975 May be constructed using the ``'+'`` operator.
3976 May also be constructed using the ``'-'`` operator, which will
3977 suppress backtracking.
3978
3979 Example::
3980
3981 integer = Word(nums)
3982 name_expr = OneOrMore(Word(alphas))
3983
3984 expr = And([integer("id"), name_expr("name"), integer("age")])
3985 # more easily written as:
3986 expr = integer("id") + name_expr("name") + integer("age")
3987 """
3988
3994
3995 - def __init__(self, exprs, savelist=True):
3996 if exprs and Ellipsis in exprs:
3997 tmp = []
3998 for i, expr in enumerate(exprs):
3999 if expr is Ellipsis:
4000 if i < len(exprs) - 1:
4001 skipto_arg = (Empty() + exprs[i + 1]).exprs[-1]
4002 tmp.append(SkipTo(skipto_arg)("_skipped*"))
4003 else:
4004 raise Exception("cannot construct And with sequence ending in ...")
4005 else:
4006 tmp.append(expr)
4007 exprs[:] = tmp
4008 super(And, self).__init__(exprs, savelist)
4009 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4010 self.setWhitespaceChars(self.exprs[0].whiteChars)
4011 self.skipWhitespace = self.exprs[0].skipWhitespace
4012 self.callPreparse = True
4013
4015
4016 if self.exprs:
4017 if any(isinstance(e, ParseExpression) and e.exprs and isinstance(e.exprs[-1], _PendingSkip)
4018 for e in self.exprs[:-1]):
4019 for i, e in enumerate(self.exprs[:-1]):
4020 if e is None:
4021 continue
4022 if (isinstance(e, ParseExpression)
4023 and e.exprs and isinstance(e.exprs[-1], _PendingSkip)):
4024 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]
4025 self.exprs[i + 1] = None
4026 self.exprs = [e for e in self.exprs if e is not None]
4027
4028 super(And, self).streamline()
4029 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4030 return self
4031
4032 - def parseImpl(self, instring, loc, doActions=True):
4033
4034
4035 loc, resultlist = self.exprs[0]._parse(instring, loc, doActions, callPreParse=False)
4036 errorStop = False
4037 for e in self.exprs[1:]:
4038 if isinstance(e, And._ErrorStop):
4039 errorStop = True
4040 continue
4041 if errorStop:
4042 try:
4043 loc, exprtokens = e._parse(instring, loc, doActions)
4044 except ParseSyntaxException:
4045 raise
4046 except ParseBaseException as pe:
4047 pe.__traceback__ = None
4048 raise ParseSyntaxException._from_exception(pe)
4049 except IndexError:
4050 raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
4051 else:
4052 loc, exprtokens = e._parse(instring, loc, doActions)
4053 if exprtokens or exprtokens.haskeys():
4054 resultlist += exprtokens
4055 return loc, resultlist
4056
4058 if isinstance(other, basestring):
4059 other = self._literalStringClass(other)
4060 return self.append(other)
4061
4063 subRecCheckList = parseElementList[:] + [self]
4064 for e in self.exprs:
4065 e.checkRecursion(subRecCheckList)
4066 if not e.mayReturnEmpty:
4067 break
4068
4070 if hasattr(self, "name"):
4071 return self.name
4072
4073 if self.strRepr is None:
4074 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
4075
4076 return self.strRepr
4077
4078
4079 -class Or(ParseExpression):
4080 """Requires that at least one :class:`ParseExpression` is found. If
4081 two expressions match, the expression that matches the longest
4082 string will be used. May be constructed using the ``'^'``
4083 operator.
4084
4085 Example::
4086
4087 # construct Or using '^' operator
4088
4089 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
4090 print(number.searchString("123 3.1416 789"))
4091
4092 prints::
4093
4094 [['123'], ['3.1416'], ['789']]
4095 """
4096 - def __init__(self, exprs, savelist=False):
4097 super(Or, self).__init__(exprs, savelist)
4098 if self.exprs:
4099 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4100 else:
4101 self.mayReturnEmpty = True
4102
4104 super(Or, self).streamline()
4105 if __compat__.collect_all_And_tokens:
4106 self.saveAsList = any(e.saveAsList for e in self.exprs)
4107 return self
4108
4109 - def parseImpl(self, instring, loc, doActions=True):
4110 maxExcLoc = -1
4111 maxException = None
4112 matches = []
4113 for e in self.exprs:
4114 try:
4115 loc2 = e.tryParse(instring, loc)
4116 except ParseException as err:
4117 err.__traceback__ = None
4118 if err.loc > maxExcLoc:
4119 maxException = err
4120 maxExcLoc = err.loc
4121 except IndexError:
4122 if len(instring) > maxExcLoc:
4123 maxException = ParseException(instring, len(instring), e.errmsg, self)
4124 maxExcLoc = len(instring)
4125 else:
4126
4127 matches.append((loc2, e))
4128
4129 if matches:
4130
4131
4132 matches.sort(key=itemgetter(0), reverse=True)
4133
4134 if not doActions:
4135
4136
4137 best_expr = matches[0][1]
4138 return best_expr._parse(instring, loc, doActions)
4139
4140 longest = -1, None
4141 for loc1, expr1 in matches:
4142 if loc1 <= longest[0]:
4143
4144 return longest
4145
4146 try:
4147 loc2, toks = expr1._parse(instring, loc, doActions)
4148 except ParseException as err:
4149 err.__traceback__ = None
4150 if err.loc > maxExcLoc:
4151 maxException = err
4152 maxExcLoc = err.loc
4153 else:
4154 if loc2 >= loc1:
4155 return loc2, toks
4156
4157 elif loc2 > longest[0]:
4158 longest = loc2, toks
4159
4160 if longest != (-1, None):
4161 return longest
4162
4163 if maxException is not None:
4164 maxException.msg = self.errmsg
4165 raise maxException
4166 else:
4167 raise ParseException(instring, loc, "no defined alternatives to match", self)
4168
4169
4171 if isinstance(other, basestring):
4172 other = self._literalStringClass(other)
4173 return self.append(other)
4174
4176 if hasattr(self, "name"):
4177 return self.name
4178
4179 if self.strRepr is None:
4180 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
4181
4182 return self.strRepr
4183
4185 subRecCheckList = parseElementList[:] + [self]
4186 for e in self.exprs:
4187 e.checkRecursion(subRecCheckList)
4188
4190 if (not __compat__.collect_all_And_tokens
4191 and __diag__.warn_multiple_tokens_in_named_alternation):
4192 if any(isinstance(e, And) for e in self.exprs):
4193 warnings.warn("{0}: setting results name {1!r} on {2} expression "
4194 "may only return a single token for an And alternative, "
4195 "in future will return the full list of tokens".format(
4196 "warn_multiple_tokens_in_named_alternation", name, type(self).__name__),
4197 stacklevel=3)
4198
4199 return super(Or, self)._setResultsName(name, listAllMatches)
4200
4203 """Requires that at least one :class:`ParseExpression` is found. If
4204 two expressions match, the first one listed is the one that will
4205 match. May be constructed using the ``'|'`` operator.
4206
4207 Example::
4208
4209 # construct MatchFirst using '|' operator
4210
4211 # watch the order of expressions to match
4212 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
4213 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
4214
4215 # put more selective expression first
4216 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
4217 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
4218 """
4219 - def __init__(self, exprs, savelist=False):
4220 super(MatchFirst, self).__init__(exprs, savelist)
4221 if self.exprs:
4222 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4223 else:
4224 self.mayReturnEmpty = True
4225
4227 super(MatchFirst, self).streamline()
4228 if __compat__.collect_all_And_tokens:
4229 self.saveAsList = any(e.saveAsList for e in self.exprs)
4230 return self
4231
4232 - def parseImpl(self, instring, loc, doActions=True):
4233 maxExcLoc = -1
4234 maxException = None
4235 for e in self.exprs:
4236 try:
4237 ret = e._parse(instring, loc, doActions)
4238 return ret
4239 except ParseException as err:
4240 if err.loc > maxExcLoc:
4241 maxException = err
4242 maxExcLoc = err.loc
4243 except IndexError:
4244 if len(instring) > maxExcLoc:
4245 maxException = ParseException(instring, len(instring), e.errmsg, self)
4246 maxExcLoc = len(instring)
4247
4248
4249 else:
4250 if maxException is not None:
4251 maxException.msg = self.errmsg
4252 raise maxException
4253 else:
4254 raise ParseException(instring, loc, "no defined alternatives to match", self)
4255
4257 if isinstance(other, basestring):
4258 other = self._literalStringClass(other)
4259 return self.append(other)
4260
4262 if hasattr(self, "name"):
4263 return self.name
4264
4265 if self.strRepr is None:
4266 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
4267
4268 return self.strRepr
4269
4271 subRecCheckList = parseElementList[:] + [self]
4272 for e in self.exprs:
4273 e.checkRecursion(subRecCheckList)
4274
4276 if (not __compat__.collect_all_And_tokens
4277 and __diag__.warn_multiple_tokens_in_named_alternation):
4278 if any(isinstance(e, And) for e in self.exprs):
4279 warnings.warn("{0}: setting results name {1!r} on {2} expression "
4280 "may only return a single token for an And alternative, "
4281 "in future will return the full list of tokens".format(
4282 "warn_multiple_tokens_in_named_alternation", name, type(self).__name__),
4283 stacklevel=3)
4284
4285 return super(MatchFirst, self)._setResultsName(name, listAllMatches)
4286
4287
4288 -class Each(ParseExpression):
4289 """Requires all given :class:`ParseExpression` s to be found, but in
4290 any order. Expressions may be separated by whitespace.
4291
4292 May be constructed using the ``'&'`` operator.
4293
4294 Example::
4295
4296 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
4297 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
4298 integer = Word(nums)
4299 shape_attr = "shape:" + shape_type("shape")
4300 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
4301 color_attr = "color:" + color("color")
4302 size_attr = "size:" + integer("size")
4303
4304 # use Each (using operator '&') to accept attributes in any order
4305 # (shape and posn are required, color and size are optional)
4306 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
4307
4308 shape_spec.runTests('''
4309 shape: SQUARE color: BLACK posn: 100, 120
4310 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4311 color:GREEN size:20 shape:TRIANGLE posn:20,40
4312 '''
4313 )
4314
4315 prints::
4316
4317 shape: SQUARE color: BLACK posn: 100, 120
4318 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
4319 - color: BLACK
4320 - posn: ['100', ',', '120']
4321 - x: 100
4322 - y: 120
4323 - shape: SQUARE
4324
4325
4326 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4327 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
4328 - color: BLUE
4329 - posn: ['50', ',', '80']
4330 - x: 50
4331 - y: 80
4332 - shape: CIRCLE
4333 - size: 50
4334
4335
4336 color: GREEN size: 20 shape: TRIANGLE posn: 20,40
4337 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
4338 - color: GREEN
4339 - posn: ['20', ',', '40']
4340 - x: 20
4341 - y: 40
4342 - shape: TRIANGLE
4343 - size: 20
4344 """
4345 - def __init__(self, exprs, savelist=True):
4346 super(Each, self).__init__(exprs, savelist)
4347 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4348 self.skipWhitespace = True
4349 self.initExprGroups = True
4350 self.saveAsList = True
4351
4353 super(Each, self).streamline()
4354 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4355 return self
4356
4357 - def parseImpl(self, instring, loc, doActions=True):
4358 if self.initExprGroups:
4359 self.opt1map = dict((id(e.expr), e) for e in self.exprs if isinstance(e, Optional))
4360 opt1 = [e.expr for e in self.exprs if isinstance(e, Optional)]
4361 opt2 = [e for e in self.exprs if e.mayReturnEmpty and not isinstance(e, Optional)]
4362 self.optionals = opt1 + opt2
4363 self.multioptionals = [e.expr for e in self.exprs if isinstance(e, ZeroOrMore)]
4364 self.multirequired = [e.expr for e in self.exprs if isinstance(e, OneOrMore)]
4365 self.required = [e for e in self.exprs if not isinstance(e, (Optional, ZeroOrMore, OneOrMore))]
4366 self.required += self.multirequired
4367 self.initExprGroups = False
4368 tmpLoc = loc
4369 tmpReqd = self.required[:]
4370 tmpOpt = self.optionals[:]
4371 matchOrder = []
4372
4373 keepMatching = True
4374 while keepMatching:
4375 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
4376 failed = []
4377 for e in tmpExprs:
4378 try:
4379 tmpLoc = e.tryParse(instring, tmpLoc)
4380 except ParseException:
4381 failed.append(e)
4382 else:
4383 matchOrder.append(self.opt1map.get(id(e), e))
4384 if e in tmpReqd:
4385 tmpReqd.remove(e)
4386 elif e in tmpOpt:
4387 tmpOpt.remove(e)
4388 if len(failed) == len(tmpExprs):
4389 keepMatching = False
4390
4391 if tmpReqd:
4392 missing = ", ".join(_ustr(e) for e in tmpReqd)
4393 raise ParseException(instring, loc, "Missing one or more required elements (%s)" % missing)
4394
4395
4396 matchOrder += [e for e in self.exprs if isinstance(e, Optional) and e.expr in tmpOpt]
4397
4398 resultlist = []
4399 for e in matchOrder:
4400 loc, results = e._parse(instring, loc, doActions)
4401 resultlist.append(results)
4402
4403 finalResults = sum(resultlist, ParseResults([]))
4404 return loc, finalResults
4405
4407 if hasattr(self, "name"):
4408 return self.name
4409
4410 if self.strRepr is None:
4411 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
4412
4413 return self.strRepr
4414
4416 subRecCheckList = parseElementList[:] + [self]
4417 for e in self.exprs:
4418 e.checkRecursion(subRecCheckList)
4419
4422 """Abstract subclass of :class:`ParserElement`, for combining and
4423 post-processing parsed tokens.
4424 """
4425 - def __init__(self, expr, savelist=False):
4426 super(ParseElementEnhance, self).__init__(savelist)
4427 if isinstance(expr, basestring):
4428 if issubclass(self._literalStringClass, Token):
4429 expr = self._literalStringClass(expr)
4430 else:
4431 expr = self._literalStringClass(Literal(expr))
4432 self.expr = expr
4433 self.strRepr = None
4434 if expr is not None:
4435 self.mayIndexError = expr.mayIndexError
4436 self.mayReturnEmpty = expr.mayReturnEmpty
4437 self.setWhitespaceChars(expr.whiteChars)
4438 self.skipWhitespace = expr.skipWhitespace
4439 self.saveAsList = expr.saveAsList
4440 self.callPreparse = expr.callPreparse
4441 self.ignoreExprs.extend(expr.ignoreExprs)
4442
4443 - def parseImpl(self, instring, loc, doActions=True):
4444 if self.expr is not None:
4445 return self.expr._parse(instring, loc, doActions, callPreParse=False)
4446 else:
4447 raise ParseException("", loc, self.errmsg, self)
4448
4450 self.skipWhitespace = False
4451 self.expr = self.expr.copy()
4452 if self.expr is not None:
4453 self.expr.leaveWhitespace()
4454 return self
4455
4457 if isinstance(other, Suppress):
4458 if other not in self.ignoreExprs:
4459 super(ParseElementEnhance, self).ignore(other)
4460 if self.expr is not None:
4461 self.expr.ignore(self.ignoreExprs[-1])
4462 else:
4463 super(ParseElementEnhance, self).ignore(other)
4464 if self.expr is not None:
4465 self.expr.ignore(self.ignoreExprs[-1])
4466 return self
4467
4473
4475 if self in parseElementList:
4476 raise RecursiveGrammarException(parseElementList + [self])
4477 subRecCheckList = parseElementList[:] + [self]
4478 if self.expr is not None:
4479 self.expr.checkRecursion(subRecCheckList)
4480
4481 - def validate(self, validateTrace=None):
4482 if validateTrace is None:
4483 validateTrace = []
4484 tmp = validateTrace[:] + [self]
4485 if self.expr is not None:
4486 self.expr.validate(tmp)
4487 self.checkRecursion([])
4488
4490 try:
4491 return super(ParseElementEnhance, self).__str__()
4492 except Exception:
4493 pass
4494
4495 if self.strRepr is None and self.expr is not None:
4496 self.strRepr = "%s:(%s)" % (self.__class__.__name__, _ustr(self.expr))
4497 return self.strRepr
4498
4501 """Lookahead matching of the given parse expression.
4502 ``FollowedBy`` does *not* advance the parsing position within
4503 the input string, it only verifies that the specified parse
4504 expression matches at the current position. ``FollowedBy``
4505 always returns a null token list. If any results names are defined
4506 in the lookahead expression, those *will* be returned for access by
4507 name.
4508
4509 Example::
4510
4511 # use FollowedBy to match a label only if it is followed by a ':'
4512 data_word = Word(alphas)
4513 label = data_word + FollowedBy(':')
4514 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4515
4516 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
4517
4518 prints::
4519
4520 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
4521 """
4525
4526 - def parseImpl(self, instring, loc, doActions=True):
4527
4528
4529 _, ret = self.expr._parse(instring, loc, doActions=doActions)
4530 del ret[:]
4531
4532 return loc, ret
4533
4536 """Lookbehind matching of the given parse expression.
4537 ``PrecededBy`` does not advance the parsing position within the
4538 input string, it only verifies that the specified parse expression
4539 matches prior to the current position. ``PrecededBy`` always
4540 returns a null token list, but if a results name is defined on the
4541 given expression, it is returned.
4542
4543 Parameters:
4544
4545 - expr - expression that must match prior to the current parse
4546 location
4547 - retreat - (default= ``None``) - (int) maximum number of characters
4548 to lookbehind prior to the current parse location
4549
4550 If the lookbehind expression is a string, Literal, Keyword, or
4551 a Word or CharsNotIn with a specified exact or maximum length, then
4552 the retreat parameter is not required. Otherwise, retreat must be
4553 specified to give a maximum number of characters to look back from
4554 the current parse position for a lookbehind match.
4555
4556 Example::
4557
4558 # VB-style variable names with type prefixes
4559 int_var = PrecededBy("#") + pyparsing_common.identifier
4560 str_var = PrecededBy("$") + pyparsing_common.identifier
4561
4562 """
4563 - def __init__(self, expr, retreat=None):
4564 super(PrecededBy, self).__init__(expr)
4565 self.expr = self.expr().leaveWhitespace()
4566 self.mayReturnEmpty = True
4567 self.mayIndexError = False
4568 self.exact = False
4569 if isinstance(expr, str):
4570 retreat = len(expr)
4571 self.exact = True
4572 elif isinstance(expr, (Literal, Keyword)):
4573 retreat = expr.matchLen
4574 self.exact = True
4575 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:
4576 retreat = expr.maxLen
4577 self.exact = True
4578 elif isinstance(expr, _PositionToken):
4579 retreat = 0
4580 self.exact = True
4581 self.retreat = retreat
4582 self.errmsg = "not preceded by " + str(expr)
4583 self.skipWhitespace = False
4584 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))
4585
4586 - def parseImpl(self, instring, loc=0, doActions=True):
4587 if self.exact:
4588 if loc < self.retreat:
4589 raise ParseException(instring, loc, self.errmsg)
4590 start = loc - self.retreat
4591 _, ret = self.expr._parse(instring, start)
4592 else:
4593
4594 test_expr = self.expr + StringEnd()
4595 instring_slice = instring[max(0, loc - self.retreat):loc]
4596 last_expr = ParseException(instring, loc, self.errmsg)
4597 for offset in range(1, min(loc, self.retreat + 1)+1):
4598 try:
4599
4600 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset)
4601 except ParseBaseException as pbe:
4602 last_expr = pbe
4603 else:
4604 break
4605 else:
4606 raise last_expr
4607 return loc, ret
4608
4609
4610 -class NotAny(ParseElementEnhance):
4611 """Lookahead to disallow matching with the given parse expression.
4612 ``NotAny`` does *not* advance the parsing position within the
4613 input string, it only verifies that the specified parse expression
4614 does *not* match at the current position. Also, ``NotAny`` does
4615 *not* skip over leading whitespace. ``NotAny`` always returns
4616 a null token list. May be constructed using the '~' operator.
4617
4618 Example::
4619
4620 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())
4621
4622 # take care not to mistake keywords for identifiers
4623 ident = ~(AND | OR | NOT) + Word(alphas)
4624 boolean_term = Optional(NOT) + ident
4625
4626 # very crude boolean expression - to support parenthesis groups and
4627 # operation hierarchy, use infixNotation
4628 boolean_expr = boolean_term + ZeroOrMore((AND | OR) + boolean_term)
4629
4630 # integers that are followed by "." are actually floats
4631 integer = Word(nums) + ~Char(".")
4632 """
4634 super(NotAny, self).__init__(expr)
4635
4636 self.skipWhitespace = False
4637 self.mayReturnEmpty = True
4638 self.errmsg = "Found unwanted token, " + _ustr(self.expr)
4639
4640 - def parseImpl(self, instring, loc, doActions=True):
4644
4646 if hasattr(self, "name"):
4647 return self.name
4648
4649 if self.strRepr is None:
4650 self.strRepr = "~{" + _ustr(self.expr) + "}"
4651
4652 return self.strRepr
4653
4655 - def __init__(self, expr, stopOn=None):
4656 super(_MultipleMatch, self).__init__(expr)
4657 self.saveAsList = True
4658 ender = stopOn
4659 if isinstance(ender, basestring):
4660 ender = self._literalStringClass(ender)
4661 self.stopOn(ender)
4662
4664 if isinstance(ender, basestring):
4665 ender = self._literalStringClass(ender)
4666 self.not_ender = ~ender if ender is not None else None
4667 return self
4668
4669 - def parseImpl(self, instring, loc, doActions=True):
4670 self_expr_parse = self.expr._parse
4671 self_skip_ignorables = self._skipIgnorables
4672 check_ender = self.not_ender is not None
4673 if check_ender:
4674 try_not_ender = self.not_ender.tryParse
4675
4676
4677
4678 if check_ender:
4679 try_not_ender(instring, loc)
4680 loc, tokens = self_expr_parse(instring, loc, doActions, callPreParse=False)
4681 try:
4682 hasIgnoreExprs = (not not self.ignoreExprs)
4683 while 1:
4684 if check_ender:
4685 try_not_ender(instring, loc)
4686 if hasIgnoreExprs:
4687 preloc = self_skip_ignorables(instring, loc)
4688 else:
4689 preloc = loc
4690 loc, tmptokens = self_expr_parse(instring, preloc, doActions)
4691 if tmptokens or tmptokens.haskeys():
4692 tokens += tmptokens
4693 except (ParseException, IndexError):
4694 pass
4695
4696 return loc, tokens
4697
4699 if __diag__.warn_ungrouped_named_tokens_in_collection:
4700 for e in [self.expr] + getattr(self.expr, 'exprs', []):
4701 if isinstance(e, ParserElement) and e.resultsName:
4702 warnings.warn("{0}: setting results name {1!r} on {2} expression "
4703 "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection",
4704 name,
4705 type(self).__name__,
4706 e.resultsName),
4707 stacklevel=3)
4708
4709 return super(_MultipleMatch, self)._setResultsName(name, listAllMatches)
4710
4713 """Repetition of one or more of the given expression.
4714
4715 Parameters:
4716 - expr - expression that must match one or more times
4717 - stopOn - (default= ``None``) - expression for a terminating sentinel
4718 (only required if the sentinel would ordinarily match the repetition
4719 expression)
4720
4721 Example::
4722
4723 data_word = Word(alphas)
4724 label = data_word + FollowedBy(':')
4725 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
4726
4727 text = "shape: SQUARE posn: upper left color: BLACK"
4728 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
4729
4730 # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
4731 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4732 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
4733
4734 # could also be written as
4735 (attr_expr * (1,)).parseString(text).pprint()
4736 """
4737
4739 if hasattr(self, "name"):
4740 return self.name
4741
4742 if self.strRepr is None:
4743 self.strRepr = "{" + _ustr(self.expr) + "}..."
4744
4745 return self.strRepr
4746
4748 """Optional repetition of zero or more of the given expression.
4749
4750 Parameters:
4751 - expr - expression that must match zero or more times
4752 - stopOn - (default= ``None``) - expression for a terminating sentinel
4753 (only required if the sentinel would ordinarily match the repetition
4754 expression)
4755
4756 Example: similar to :class:`OneOrMore`
4757 """
4758 - def __init__(self, expr, stopOn=None):
4761
4762 - def parseImpl(self, instring, loc, doActions=True):
4767
4769 if hasattr(self, "name"):
4770 return self.name
4771
4772 if self.strRepr is None:
4773 self.strRepr = "[" + _ustr(self.expr) + "]..."
4774
4775 return self.strRepr
4776
4784
4786 """Optional matching of the given expression.
4787
4788 Parameters:
4789 - expr - expression that must match zero or more times
4790 - default (optional) - value to be returned if the optional expression is not found.
4791
4792 Example::
4793
4794 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
4795 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
4796 zip.runTests('''
4797 # traditional ZIP code
4798 12345
4799
4800 # ZIP+4 form
4801 12101-0001
4802
4803 # invalid ZIP
4804 98765-
4805 ''')
4806
4807 prints::
4808
4809 # traditional ZIP code
4810 12345
4811 ['12345']
4812
4813 # ZIP+4 form
4814 12101-0001
4815 ['12101-0001']
4816
4817 # invalid ZIP
4818 98765-
4819 ^
4820 FAIL: Expected end of text (at char 5), (line:1, col:6)
4821 """
4822 __optionalNotMatched = _NullToken()
4823
4825 super(Optional, self).__init__(expr, savelist=False)
4826 self.saveAsList = self.expr.saveAsList
4827 self.defaultValue = default
4828 self.mayReturnEmpty = True
4829
4830 - def parseImpl(self, instring, loc, doActions=True):
4831 try:
4832 loc, tokens = self.expr._parse(instring, loc, doActions, callPreParse=False)
4833 except (ParseException, IndexError):
4834 if self.defaultValue is not self.__optionalNotMatched:
4835 if self.expr.resultsName:
4836 tokens = ParseResults([self.defaultValue])
4837 tokens[self.expr.resultsName] = self.defaultValue
4838 else:
4839 tokens = [self.defaultValue]
4840 else:
4841 tokens = []
4842 return loc, tokens
4843
4845 if hasattr(self, "name"):
4846 return self.name
4847
4848 if self.strRepr is None:
4849 self.strRepr = "[" + _ustr(self.expr) + "]"
4850
4851 return self.strRepr
4852
4853 -class SkipTo(ParseElementEnhance):
4854 """Token for skipping over all undefined text until the matched
4855 expression is found.
4856
4857 Parameters:
4858 - expr - target expression marking the end of the data to be skipped
4859 - include - (default= ``False``) if True, the target expression is also parsed
4860 (the skipped text and target expression are returned as a 2-element list).
4861 - ignore - (default= ``None``) used to define grammars (typically quoted strings and
4862 comments) that might contain false matches to the target expression
4863 - failOn - (default= ``None``) define expressions that are not allowed to be
4864 included in the skipped test; if found before the target expression is found,
4865 the SkipTo is not a match
4866
4867 Example::
4868
4869 report = '''
4870 Outstanding Issues Report - 1 Jan 2000
4871
4872 # | Severity | Description | Days Open
4873 -----+----------+-------------------------------------------+-----------
4874 101 | Critical | Intermittent system crash | 6
4875 94 | Cosmetic | Spelling error on Login ('log|n') | 14
4876 79 | Minor | System slow when running too many reports | 47
4877 '''
4878 integer = Word(nums)
4879 SEP = Suppress('|')
4880 # use SkipTo to simply match everything up until the next SEP
4881 # - ignore quoted strings, so that a '|' character inside a quoted string does not match
4882 # - parse action will call token.strip() for each matched token, i.e., the description body
4883 string_data = SkipTo(SEP, ignore=quotedString)
4884 string_data.setParseAction(tokenMap(str.strip))
4885 ticket_expr = (integer("issue_num") + SEP
4886 + string_data("sev") + SEP
4887 + string_data("desc") + SEP
4888 + integer("days_open"))
4889
4890 for tkt in ticket_expr.searchString(report):
4891 print tkt.dump()
4892
4893 prints::
4894
4895 ['101', 'Critical', 'Intermittent system crash', '6']
4896 - days_open: 6
4897 - desc: Intermittent system crash
4898 - issue_num: 101
4899 - sev: Critical
4900 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
4901 - days_open: 14
4902 - desc: Spelling error on Login ('log|n')
4903 - issue_num: 94
4904 - sev: Cosmetic
4905 ['79', 'Minor', 'System slow when running too many reports', '47']
4906 - days_open: 47
4907 - desc: System slow when running too many reports
4908 - issue_num: 79
4909 - sev: Minor
4910 """
4911 - def __init__(self, other, include=False, ignore=None, failOn=None):
4912 super(SkipTo, self).__init__(other)
4913 self.ignoreExpr = ignore
4914 self.mayReturnEmpty = True
4915 self.mayIndexError = False
4916 self.includeMatch = include
4917 self.saveAsList = False
4918 if isinstance(failOn, basestring):
4919 self.failOn = self._literalStringClass(failOn)
4920 else:
4921 self.failOn = failOn
4922 self.errmsg = "No match found for " + _ustr(self.expr)
4923
4924 - def parseImpl(self, instring, loc, doActions=True):
4925 startloc = loc
4926 instrlen = len(instring)
4927 expr = self.expr
4928 expr_parse = self.expr._parse
4929 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
4930 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
4931
4932 tmploc = loc
4933 while tmploc <= instrlen:
4934 if self_failOn_canParseNext is not None:
4935
4936 if self_failOn_canParseNext(instring, tmploc):
4937 break
4938
4939 if self_ignoreExpr_tryParse is not None:
4940
4941 while 1:
4942 try:
4943 tmploc = self_ignoreExpr_tryParse(instring, tmploc)
4944 except ParseBaseException:
4945 break
4946
4947 try:
4948 expr_parse(instring, tmploc, doActions=False, callPreParse=False)
4949 except (ParseException, IndexError):
4950
4951 tmploc += 1
4952 else:
4953
4954 break
4955
4956 else:
4957
4958 raise ParseException(instring, loc, self.errmsg, self)
4959
4960
4961 loc = tmploc
4962 skiptext = instring[startloc:loc]
4963 skipresult = ParseResults(skiptext)
4964
4965 if self.includeMatch:
4966 loc, mat = expr_parse(instring, loc, doActions, callPreParse=False)
4967 skipresult += mat
4968
4969 return loc, skipresult
4970
4971 -class Forward(ParseElementEnhance):
4972 """Forward declaration of an expression to be defined later -
4973 used for recursive grammars, such as algebraic infix notation.
4974 When the expression is known, it is assigned to the ``Forward``
4975 variable using the '<<' operator.
4976
4977 Note: take care when assigning to ``Forward`` not to overlook
4978 precedence of operators.
4979
4980 Specifically, '|' has a lower precedence than '<<', so that::
4981
4982 fwdExpr << a | b | c
4983
4984 will actually be evaluated as::
4985
4986 (fwdExpr << a) | b | c
4987
4988 thereby leaving b and c out as parseable alternatives. It is recommended that you
4989 explicitly group the values inserted into the ``Forward``::
4990
4991 fwdExpr << (a | b | c)
4992
4993 Converting to use the '<<=' operator instead will avoid this problem.
4994
4995 See :class:`ParseResults.pprint` for an example of a recursive
4996 parser created using ``Forward``.
4997 """
5000
5002 if isinstance(other, basestring):
5003 other = self._literalStringClass(other)
5004 self.expr = other
5005 self.strRepr = None
5006 self.mayIndexError = self.expr.mayIndexError
5007 self.mayReturnEmpty = self.expr.mayReturnEmpty
5008 self.setWhitespaceChars(self.expr.whiteChars)
5009 self.skipWhitespace = self.expr.skipWhitespace
5010 self.saveAsList = self.expr.saveAsList
5011 self.ignoreExprs.extend(self.expr.ignoreExprs)
5012 return self
5013
5015 return self << other
5016
5018 self.skipWhitespace = False
5019 return self
5020
5022 if not self.streamlined:
5023 self.streamlined = True
5024 if self.expr is not None:
5025 self.expr.streamline()
5026 return self
5027
5028 - def validate(self, validateTrace=None):
5029 if validateTrace is None:
5030 validateTrace = []
5031
5032 if self not in validateTrace:
5033 tmp = validateTrace[:] + [self]
5034 if self.expr is not None:
5035 self.expr.validate(tmp)
5036 self.checkRecursion([])
5037
5039 if hasattr(self, "name"):
5040 return self.name
5041 if self.strRepr is not None:
5042 return self.strRepr
5043
5044
5045 self.strRepr = ": ..."
5046
5047
5048 retString = '...'
5049 try:
5050 if self.expr is not None:
5051 retString = _ustr(self.expr)[:1000]
5052 else:
5053 retString = "None"
5054 finally:
5055 self.strRepr = self.__class__.__name__ + ": " + retString
5056 return self.strRepr
5057
5059 if self.expr is not None:
5060 return super(Forward, self).copy()
5061 else:
5062 ret = Forward()
5063 ret <<= self
5064 return ret
5065
5067 if __diag__.warn_name_set_on_empty_Forward:
5068 if self.expr is None:
5069 warnings.warn("{0}: setting results name {0!r} on {1} expression "
5070 "that has no contained expression".format("warn_name_set_on_empty_Forward",
5071 name,
5072 type(self).__name__),
5073 stacklevel=3)
5074
5075 return super(Forward, self)._setResultsName(name, listAllMatches)
5076
5078 """
5079 Abstract subclass of :class:`ParseExpression`, for converting parsed results.
5080 """
5081 - def __init__(self, expr, savelist=False):
5084
5086 """Converter to concatenate all matching tokens to a single string.
5087 By default, the matching patterns must also be contiguous in the
5088 input string; this can be disabled by specifying
5089 ``'adjacent=False'`` in the constructor.
5090
5091 Example::
5092
5093 real = Word(nums) + '.' + Word(nums)
5094 print(real.parseString('3.1416')) # -> ['3', '.', '1416']
5095 # will also erroneously match the following
5096 print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
5097
5098 real = Combine(Word(nums) + '.' + Word(nums))
5099 print(real.parseString('3.1416')) # -> ['3.1416']
5100 # no match when there are internal spaces
5101 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
5102 """
5103 - def __init__(self, expr, joinString="", adjacent=True):
5104 super(Combine, self).__init__(expr)
5105
5106 if adjacent:
5107 self.leaveWhitespace()
5108 self.adjacent = adjacent
5109 self.skipWhitespace = True
5110 self.joinString = joinString
5111 self.callPreparse = True
5112
5119
5120 - def postParse(self, instring, loc, tokenlist):
5121 retToks = tokenlist.copy()
5122 del retToks[:]
5123 retToks += ParseResults(["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults)
5124
5125 if self.resultsName and retToks.haskeys():
5126 return [retToks]
5127 else:
5128 return retToks
5129
5130 -class Group(TokenConverter):
5131 """Converter to return the matched tokens as a list - useful for
5132 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.
5133
5134 Example::
5135
5136 ident = Word(alphas)
5137 num = Word(nums)
5138 term = ident | num
5139 func = ident + Optional(delimitedList(term))
5140 print(func.parseString("fn a, b, 100")) # -> ['fn', 'a', 'b', '100']
5141
5142 func = ident + Group(Optional(delimitedList(term)))
5143 print(func.parseString("fn a, b, 100")) # -> ['fn', ['a', 'b', '100']]
5144 """
5146 super(Group, self).__init__(expr)
5147 self.saveAsList = True
5148
5149 - def postParse(self, instring, loc, tokenlist):
5151
5152 -class Dict(TokenConverter):
5153 """Converter to return a repetitive expression as a list, but also
5154 as a dictionary. Each element can also be referenced using the first
5155 token in the expression as its key. Useful for tabular report
5156 scraping when the first column can be used as a item key.
5157
5158 Example::
5159
5160 data_word = Word(alphas)
5161 label = data_word + FollowedBy(':')
5162 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
5163
5164 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
5165 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
5166
5167 # print attributes as plain groups
5168 print(OneOrMore(attr_expr).parseString(text).dump())
5169
5170 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
5171 result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
5172 print(result.dump())
5173
5174 # access named fields as dict entries, or output as dict
5175 print(result['shape'])
5176 print(result.asDict())
5177
5178 prints::
5179
5180 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
5181 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
5182 - color: light blue
5183 - posn: upper left
5184 - shape: SQUARE
5185 - texture: burlap
5186 SQUARE
5187 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
5188
5189 See more examples at :class:`ParseResults` of accessing fields by results name.
5190 """
5192 super(Dict, self).__init__(expr)
5193 self.saveAsList = True
5194
5195 - def postParse(self, instring, loc, tokenlist):
5196 for i, tok in enumerate(tokenlist):
5197 if len(tok) == 0:
5198 continue
5199 ikey = tok[0]
5200 if isinstance(ikey, int):
5201 ikey = _ustr(tok[0]).strip()
5202 if len(tok) == 1:
5203 tokenlist[ikey] = _ParseResultsWithOffset("", i)
5204 elif len(tok) == 2 and not isinstance(tok[1], ParseResults):
5205 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)
5206 else:
5207 dictvalue = tok.copy()
5208 del dictvalue[0]
5209 if len(dictvalue) != 1 or (isinstance(dictvalue, ParseResults) and dictvalue.haskeys()):
5210 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)
5211 else:
5212 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)
5213
5214 if self.resultsName:
5215 return [tokenlist]
5216 else:
5217 return tokenlist
5218
5221 """Converter for ignoring the results of a parsed expression.
5222
5223 Example::
5224
5225 source = "a, b, c,d"
5226 wd = Word(alphas)
5227 wd_list1 = wd + ZeroOrMore(',' + wd)
5228 print(wd_list1.parseString(source))
5229
5230 # often, delimiters that are useful during parsing are just in the
5231 # way afterward - use Suppress to keep them out of the parsed output
5232 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
5233 print(wd_list2.parseString(source))
5234
5235 prints::
5236
5237 ['a', ',', 'b', ',', 'c', ',', 'd']
5238 ['a', 'b', 'c', 'd']
5239
5240 (See also :class:`delimitedList`.)
5241 """
5242 - def postParse(self, instring, loc, tokenlist):
5244
5247
5250 """Wrapper for parse actions, to ensure they are only called once.
5251 """
5253 self.callable = _trim_arity(methodCall)
5254 self.called = False
5256 if not self.called:
5257 results = self.callable(s, l, t)
5258 self.called = True
5259 return results
5260 raise ParseException(s, l, "")
5263
5265 """Decorator for debugging parse actions.
5266
5267 When the parse action is called, this decorator will print
5268 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.
5269 When the parse action completes, the decorator will print
5270 ``"<<"`` followed by the returned value, or any exception that the parse action raised.
5271
5272 Example::
5273
5274 wd = Word(alphas)
5275
5276 @traceParseAction
5277 def remove_duplicate_chars(tokens):
5278 return ''.join(sorted(set(''.join(tokens))))
5279
5280 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
5281 print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
5282
5283 prints::
5284
5285 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
5286 <<leaving remove_duplicate_chars (ret: 'dfjkls')
5287 ['dfjkls']
5288 """
5289 f = _trim_arity(f)
5290 def z(*paArgs):
5291 thisFunc = f.__name__
5292 s, l, t = paArgs[-3:]
5293 if len(paArgs) > 3:
5294 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
5295 sys.stderr.write(">>entering %s(line: '%s', %d, %r)\n" % (thisFunc, line(l, s), l, t))
5296 try:
5297 ret = f(*paArgs)
5298 except Exception as exc:
5299 sys.stderr.write("<<leaving %s (exception: %s)\n" % (thisFunc, exc))
5300 raise
5301 sys.stderr.write("<<leaving %s (ret: %r)\n" % (thisFunc, ret))
5302 return ret
5303 try:
5304 z.__name__ = f.__name__
5305 except AttributeError:
5306 pass
5307 return z
5308
5309
5310
5311
5312 -def delimitedList(expr, delim=",", combine=False):
5313 """Helper to define a delimited list of expressions - the delimiter
5314 defaults to ','. By default, the list elements and delimiters can
5315 have intervening whitespace, and comments, but this can be
5316 overridden by passing ``combine=True`` in the constructor. If
5317 ``combine`` is set to ``True``, the matching tokens are
5318 returned as a single token string, with the delimiters included;
5319 otherwise, the matching tokens are returned as a list of tokens,
5320 with the delimiters suppressed.
5321
5322 Example::
5323
5324 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
5325 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
5326 """
5327 dlName = _ustr(expr) + " [" + _ustr(delim) + " " + _ustr(expr) + "]..."
5328 if combine:
5329 return Combine(expr + ZeroOrMore(delim + expr)).setName(dlName)
5330 else:
5331 return (expr + ZeroOrMore(Suppress(delim) + expr)).setName(dlName)
5332
5334 """Helper to define a counted list of expressions.
5335
5336 This helper defines a pattern of the form::
5337
5338 integer expr expr expr...
5339
5340 where the leading integer tells how many expr expressions follow.
5341 The matched tokens returns the array of expr tokens as a list - the
5342 leading count token is suppressed.
5343
5344 If ``intExpr`` is specified, it should be a pyparsing expression
5345 that produces an integer value.
5346
5347 Example::
5348
5349 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']
5350
5351 # in this parser, the leading integer value is given in binary,
5352 # '10' indicating that 2 values are in the array
5353 binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
5354 countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd']
5355 """
5356 arrayExpr = Forward()
5357 def countFieldParseAction(s, l, t):
5358 n = t[0]
5359 arrayExpr << (n and Group(And([expr] * n)) or Group(empty))
5360 return []
5361 if intExpr is None:
5362 intExpr = Word(nums).setParseAction(lambda t: int(t[0]))
5363 else:
5364 intExpr = intExpr.copy()
5365 intExpr.setName("arrayLen")
5366 intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
5367 return (intExpr + arrayExpr).setName('(len) ' + _ustr(expr) + '...')
5368
5370 ret = []
5371 for i in L:
5372 if isinstance(i, list):
5373 ret.extend(_flatten(i))
5374 else:
5375 ret.append(i)
5376 return ret
5377
5379 """Helper to define an expression that is indirectly defined from
5380 the tokens matched in a previous expression, that is, it looks for
5381 a 'repeat' of a previous expression. For example::
5382
5383 first = Word(nums)
5384 second = matchPreviousLiteral(first)
5385 matchExpr = first + ":" + second
5386
5387 will match ``"1:1"``, but not ``"1:2"``. Because this
5388 matches a previous literal, will also match the leading
5389 ``"1:1"`` in ``"1:10"``. If this is not desired, use
5390 :class:`matchPreviousExpr`. Do *not* use with packrat parsing
5391 enabled.
5392 """
5393 rep = Forward()
5394 def copyTokenToRepeater(s, l, t):
5395 if t:
5396 if len(t) == 1:
5397 rep << t[0]
5398 else:
5399
5400 tflat = _flatten(t.asList())
5401 rep << And(Literal(tt) for tt in tflat)
5402 else:
5403 rep << Empty()
5404 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
5405 rep.setName('(prev) ' + _ustr(expr))
5406 return rep
5407
5409 """Helper to define an expression that is indirectly defined from
5410 the tokens matched in a previous expression, that is, it looks for
5411 a 'repeat' of a previous expression. For example::
5412
5413 first = Word(nums)
5414 second = matchPreviousExpr(first)
5415 matchExpr = first + ":" + second
5416
5417 will match ``"1:1"``, but not ``"1:2"``. Because this
5418 matches by expressions, will *not* match the leading ``"1:1"``
5419 in ``"1:10"``; the expressions are evaluated first, and then
5420 compared, so ``"1"`` is compared with ``"10"``. Do *not* use
5421 with packrat parsing enabled.
5422 """
5423 rep = Forward()
5424 e2 = expr.copy()
5425 rep <<= e2
5426 def copyTokenToRepeater(s, l, t):
5427 matchTokens = _flatten(t.asList())
5428 def mustMatchTheseTokens(s, l, t):
5429 theseTokens = _flatten(t.asList())
5430 if theseTokens != matchTokens:
5431 raise ParseException('', 0, '')
5432 rep.setParseAction(mustMatchTheseTokens, callDuringTry=True)
5433 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
5434 rep.setName('(prev) ' + _ustr(expr))
5435 return rep
5436
5438
5439 for c in r"\^-]":
5440 s = s.replace(c, _bslash + c)
5441 s = s.replace("\n", r"\n")
5442 s = s.replace("\t", r"\t")
5443 return _ustr(s)
5444
5445 -def oneOf(strs, caseless=False, useRegex=True, asKeyword=False):
5446 """Helper to quickly define a set of alternative Literals, and makes
5447 sure to do longest-first testing when there is a conflict,
5448 regardless of the input order, but returns
5449 a :class:`MatchFirst` for best performance.
5450
5451 Parameters:
5452
5453 - strs - a string of space-delimited literals, or a collection of
5454 string literals
5455 - caseless - (default= ``False``) - treat all literals as
5456 caseless
5457 - useRegex - (default= ``True``) - as an optimization, will
5458 generate a Regex object; otherwise, will generate
5459 a :class:`MatchFirst` object (if ``caseless=True`` or ``asKeyword=True``, or if
5460 creating a :class:`Regex` raises an exception)
5461 - asKeyword - (default=``False``) - enforce Keyword-style matching on the
5462 generated expressions
5463
5464 Example::
5465
5466 comp_oper = oneOf("< = > <= >= !=")
5467 var = Word(alphas)
5468 number = Word(nums)
5469 term = var | number
5470 comparison_expr = term + comp_oper + term
5471 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))
5472
5473 prints::
5474
5475 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
5476 """
5477 if isinstance(caseless, basestring):
5478 warnings.warn("More than one string argument passed to oneOf, pass "
5479 "choices as a list or space-delimited string", stacklevel=2)
5480
5481 if caseless:
5482 isequal = (lambda a, b: a.upper() == b.upper())
5483 masks = (lambda a, b: b.upper().startswith(a.upper()))
5484 parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral
5485 else:
5486 isequal = (lambda a, b: a == b)
5487 masks = (lambda a, b: b.startswith(a))
5488 parseElementClass = Keyword if asKeyword else Literal
5489
5490 symbols = []
5491 if isinstance(strs, basestring):
5492 symbols = strs.split()
5493 elif isinstance(strs, Iterable):
5494 symbols = list(strs)
5495 else:
5496 warnings.warn("Invalid argument to oneOf, expected string or iterable",
5497 SyntaxWarning, stacklevel=2)
5498 if not symbols:
5499 return NoMatch()
5500
5501 if not asKeyword:
5502
5503
5504 i = 0
5505 while i < len(symbols) - 1:
5506 cur = symbols[i]
5507 for j, other in enumerate(symbols[i + 1:]):
5508 if isequal(other, cur):
5509 del symbols[i + j + 1]
5510 break
5511 elif masks(cur, other):
5512 del symbols[i + j + 1]
5513 symbols.insert(i, other)
5514 break
5515 else:
5516 i += 1
5517
5518 if not (caseless or asKeyword) and useRegex:
5519
5520 try:
5521 if len(symbols) == len("".join(symbols)):
5522 return Regex("[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols)).setName(' | '.join(symbols))
5523 else:
5524 return Regex("|".join(re.escape(sym) for sym in symbols)).setName(' | '.join(symbols))
5525 except Exception:
5526 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
5527 SyntaxWarning, stacklevel=2)
5528
5529
5530 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
5531
5533 """Helper to easily and clearly define a dictionary by specifying
5534 the respective patterns for the key and value. Takes care of
5535 defining the :class:`Dict`, :class:`ZeroOrMore`, and
5536 :class:`Group` tokens in the proper order. The key pattern
5537 can include delimiting markers or punctuation, as long as they are
5538 suppressed, thereby leaving the significant key text. The value
5539 pattern can include named results, so that the :class:`Dict` results
5540 can include named token fields.
5541
5542 Example::
5543
5544 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
5545 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
5546 print(OneOrMore(attr_expr).parseString(text).dump())
5547
5548 attr_label = label
5549 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
5550
5551 # similar to Dict, but simpler call format
5552 result = dictOf(attr_label, attr_value).parseString(text)
5553 print(result.dump())
5554 print(result['shape'])
5555 print(result.shape) # object attribute access works too
5556 print(result.asDict())
5557
5558 prints::
5559
5560 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
5561 - color: light blue
5562 - posn: upper left
5563 - shape: SQUARE
5564 - texture: burlap
5565 SQUARE
5566 SQUARE
5567 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
5568 """
5569 return Dict(OneOrMore(Group(key + value)))
5570
5571 -def originalTextFor(expr, asString=True):
5572 """Helper to return the original, untokenized text for a given
5573 expression. Useful to restore the parsed fields of an HTML start
5574 tag into the raw tag text itself, or to revert separate tokens with
5575 intervening whitespace back to the original matching input text. By
5576 default, returns astring containing the original parsed text.
5577
5578 If the optional ``asString`` argument is passed as
5579 ``False``, then the return value is
5580 a :class:`ParseResults` containing any results names that
5581 were originally matched, and a single token containing the original
5582 matched text from the input string. So if the expression passed to
5583 :class:`originalTextFor` contains expressions with defined
5584 results names, you must set ``asString`` to ``False`` if you
5585 want to preserve those results name values.
5586
5587 Example::
5588
5589 src = "this is test <b> bold <i>text</i> </b> normal text "
5590 for tag in ("b", "i"):
5591 opener, closer = makeHTMLTags(tag)
5592 patt = originalTextFor(opener + SkipTo(closer) + closer)
5593 print(patt.searchString(src)[0])
5594
5595 prints::
5596
5597 ['<b> bold <i>text</i> </b>']
5598 ['<i>text</i>']
5599 """
5600 locMarker = Empty().setParseAction(lambda s, loc, t: loc)
5601 endlocMarker = locMarker.copy()
5602 endlocMarker.callPreparse = False
5603 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
5604 if asString:
5605 extractText = lambda s, l, t: s[t._original_start: t._original_end]
5606 else:
5607 def extractText(s, l, t):
5608 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
5609 matchExpr.setParseAction(extractText)
5610 matchExpr.ignoreExprs = expr.ignoreExprs
5611 return matchExpr
5612
5614 """Helper to undo pyparsing's default grouping of And expressions,
5615 even if all but one are non-empty.
5616 """
5617 return TokenConverter(expr).addParseAction(lambda t: t[0])
5618
5620 """Helper to decorate a returned token with its starting and ending
5621 locations in the input string.
5622
5623 This helper adds the following results names:
5624
5625 - locn_start = location where matched expression begins
5626 - locn_end = location where matched expression ends
5627 - value = the actual parsed results
5628
5629 Be careful if the input text contains ``<TAB>`` characters, you
5630 may want to call :class:`ParserElement.parseWithTabs`
5631
5632 Example::
5633
5634 wd = Word(alphas)
5635 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
5636 print(match)
5637
5638 prints::
5639
5640 [[0, 'ljsdf', 5]]
5641 [[8, 'lksdjjf', 15]]
5642 [[18, 'lkkjj', 23]]
5643 """
5644 locator = Empty().setParseAction(lambda s, l, t: l)
5645 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
5646
5647
5648
5649 empty = Empty().setName("empty")
5650 lineStart = LineStart().setName("lineStart")
5651 lineEnd = LineEnd().setName("lineEnd")
5652 stringStart = StringStart().setName("stringStart")
5653 stringEnd = StringEnd().setName("stringEnd")
5654
5655 _escapedPunc = Word(_bslash, r"\[]-*.$+^?()~ ", exact=2).setParseAction(lambda s, l, t: t[0][1])
5656 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s, l, t: unichr(int(t[0].lstrip(r'\0x'), 16)))
5657 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s, l, t: unichr(int(t[0][1:], 8)))
5658 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1)
5659 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
5660 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group(OneOrMore(_charRange | _singleChar)).setResultsName("body") + "]"
5663 r"""Helper to easily define string ranges for use in Word
5664 construction. Borrows syntax from regexp '[]' string range
5665 definitions::
5666
5667 srange("[0-9]") -> "0123456789"
5668 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
5669 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
5670
5671 The input string must be enclosed in []'s, and the returned string
5672 is the expanded character set joined into a single string. The
5673 values enclosed in the []'s may be:
5674
5675 - a single character
5676 - an escaped character with a leading backslash (such as ``\-``
5677 or ``\]``)
5678 - an escaped hex character with a leading ``'\x'``
5679 (``\x21``, which is a ``'!'`` character) (``\0x##``
5680 is also supported for backwards compatibility)
5681 - an escaped octal character with a leading ``'\0'``
5682 (``\041``, which is a ``'!'`` character)
5683 - a range of any of the above, separated by a dash (``'a-z'``,
5684 etc.)
5685 - any combination of the above (``'aeiouy'``,
5686 ``'a-zA-Z0-9_$'``, etc.)
5687 """
5688 _expanded = lambda p: p if not isinstance(p, ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]), ord(p[1]) + 1))
5689 try:
5690 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
5691 except Exception:
5692 return ""
5693
5695 """Helper method for defining parse actions that require matching at
5696 a specific column in the input text.
5697 """
5698 def verifyCol(strg, locn, toks):
5699 if col(locn, strg) != n:
5700 raise ParseException(strg, locn, "matched token not at column %d" % n)
5701 return verifyCol
5702
5704 """Helper method for common parse actions that simply return
5705 a literal value. Especially useful when used with
5706 :class:`transformString<ParserElement.transformString>` ().
5707
5708 Example::
5709
5710 num = Word(nums).setParseAction(lambda toks: int(toks[0]))
5711 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
5712 term = na | num
5713
5714 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
5715 """
5716 return lambda s, l, t: [replStr]
5717
5719 """Helper parse action for removing quotation marks from parsed
5720 quoted strings.
5721
5722 Example::
5723
5724 # by default, quotation marks are included in parsed results
5725 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
5726
5727 # use removeQuotes to strip quotation marks from parsed results
5728 quotedString.setParseAction(removeQuotes)
5729 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
5730 """
5731 return t[0][1:-1]
5732
5734 """Helper to define a parse action by mapping a function to all
5735 elements of a ParseResults list. If any additional args are passed,
5736 they are forwarded to the given function as additional arguments
5737 after the token, as in
5738 ``hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))``,
5739 which will convert the parsed data to an integer using base 16.
5740
5741 Example (compare the last to example in :class:`ParserElement.transformString`::
5742
5743 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
5744 hex_ints.runTests('''
5745 00 11 22 aa FF 0a 0d 1a
5746 ''')
5747
5748 upperword = Word(alphas).setParseAction(tokenMap(str.upper))
5749 OneOrMore(upperword).runTests('''
5750 my kingdom for a horse
5751 ''')
5752
5753 wd = Word(alphas).setParseAction(tokenMap(str.title))
5754 OneOrMore(wd).setParseAction(' '.join).runTests('''
5755 now is the winter of our discontent made glorious summer by this sun of york
5756 ''')
5757
5758 prints::
5759
5760 00 11 22 aa FF 0a 0d 1a
5761 [0, 17, 34, 170, 255, 10, 13, 26]
5762
5763 my kingdom for a horse
5764 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
5765
5766 now is the winter of our discontent made glorious summer by this sun of york
5767 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
5768 """
5769 def pa(s, l, t):
5770 return [func(tokn, *args) for tokn in t]
5771
5772 try:
5773 func_name = getattr(func, '__name__',
5774 getattr(func, '__class__').__name__)
5775 except Exception:
5776 func_name = str(func)
5777 pa.__name__ = func_name
5778
5779 return pa
5780
5781 upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
5782 """(Deprecated) Helper parse action to convert tokens to upper case.
5783 Deprecated in favor of :class:`pyparsing_common.upcaseTokens`"""
5784
5785 downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
5786 """(Deprecated) Helper parse action to convert tokens to lower case.
5787 Deprecated in favor of :class:`pyparsing_common.downcaseTokens`"""
5825
5849
5857
5859 """Helper to create a validating parse action to be used with start
5860 tags created with :class:`makeXMLTags` or
5861 :class:`makeHTMLTags`. Use ``withAttribute`` to qualify
5862 a starting tag with a required attribute value, to avoid false
5863 matches on common tags such as ``<TD>`` or ``<DIV>``.
5864
5865 Call ``withAttribute`` with a series of attribute names and
5866 values. Specify the list of filter attributes names and values as:
5867
5868 - keyword arguments, as in ``(align="right")``, or
5869 - as an explicit dict with ``**`` operator, when an attribute
5870 name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}``
5871 - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))``
5872
5873 For attribute names with a namespace prefix, you must use the second
5874 form. Attribute names are matched insensitive to upper/lower case.
5875
5876 If just testing for ``class`` (with or without a namespace), use
5877 :class:`withClass`.
5878
5879 To verify that the attribute exists, but without specifying a value,
5880 pass ``withAttribute.ANY_VALUE`` as the value.
5881
5882 Example::
5883
5884 html = '''
5885 <div>
5886 Some text
5887 <div type="grid">1 4 0 1 0</div>
5888 <div type="graph">1,3 2,3 1,1</div>
5889 <div>this has no type</div>
5890 </div>
5891
5892 '''
5893 div,div_end = makeHTMLTags("div")
5894
5895 # only match div tag having a type attribute with value "grid"
5896 div_grid = div().setParseAction(withAttribute(type="grid"))
5897 grid_expr = div_grid + SkipTo(div | div_end)("body")
5898 for grid_header in grid_expr.searchString(html):
5899 print(grid_header.body)
5900
5901 # construct a match with any div tag having a type attribute, regardless of the value
5902 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
5903 div_expr = div_any_type + SkipTo(div | div_end)("body")
5904 for div_header in div_expr.searchString(html):
5905 print(div_header.body)
5906
5907 prints::
5908
5909 1 4 0 1 0
5910
5911 1 4 0 1 0
5912 1,3 2,3 1,1
5913 """
5914 if args:
5915 attrs = args[:]
5916 else:
5917 attrs = attrDict.items()
5918 attrs = [(k, v) for k, v in attrs]
5919 def pa(s, l, tokens):
5920 for attrName, attrValue in attrs:
5921 if attrName not in tokens:
5922 raise ParseException(s, l, "no matching attribute " + attrName)
5923 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
5924 raise ParseException(s, l, "attribute '%s' has value '%s', must be '%s'" %
5925 (attrName, tokens[attrName], attrValue))
5926 return pa
5927 withAttribute.ANY_VALUE = object()
5928
5929 -def withClass(classname, namespace=''):
5930 """Simplified version of :class:`withAttribute` when
5931 matching on a div class - made difficult because ``class`` is
5932 a reserved word in Python.
5933
5934 Example::
5935
5936 html = '''
5937 <div>
5938 Some text
5939 <div class="grid">1 4 0 1 0</div>
5940 <div class="graph">1,3 2,3 1,1</div>
5941 <div>this <div> has no class</div>
5942 </div>
5943
5944 '''
5945 div,div_end = makeHTMLTags("div")
5946 div_grid = div().setParseAction(withClass("grid"))
5947
5948 grid_expr = div_grid + SkipTo(div | div_end)("body")
5949 for grid_header in grid_expr.searchString(html):
5950 print(grid_header.body)
5951
5952 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
5953 div_expr = div_any_type + SkipTo(div | div_end)("body")
5954 for div_header in div_expr.searchString(html):
5955 print(div_header.body)
5956
5957 prints::
5958
5959 1 4 0 1 0
5960
5961 1 4 0 1 0
5962 1,3 2,3 1,1
5963 """
5964 classattr = "%s:class" % namespace if namespace else "class"
5965 return withAttribute(**{classattr: classname})
5966
5967 opAssoc = SimpleNamespace()
5968 opAssoc.LEFT = object()
5969 opAssoc.RIGHT = object()
5972 """Helper method for constructing grammars of expressions made up of
5973 operators working in a precedence hierarchy. Operators may be unary
5974 or binary, left- or right-associative. Parse actions can also be
5975 attached to operator expressions. The generated parser will also
5976 recognize the use of parentheses to override operator precedences
5977 (see example below).
5978
5979 Note: if you define a deep operator list, you may see performance
5980 issues when using infixNotation. See
5981 :class:`ParserElement.enablePackrat` for a mechanism to potentially
5982 improve your parser performance.
5983
5984 Parameters:
5985 - baseExpr - expression representing the most basic element for the
5986 nested
5987 - opList - list of tuples, one for each operator precedence level
5988 in the expression grammar; each tuple is of the form ``(opExpr,
5989 numTerms, rightLeftAssoc, parseAction)``, where:
5990
5991 - opExpr is the pyparsing expression for the operator; may also
5992 be a string, which will be converted to a Literal; if numTerms
5993 is 3, opExpr is a tuple of two expressions, for the two
5994 operators separating the 3 terms
5995 - numTerms is the number of terms for this operator (must be 1,
5996 2, or 3)
5997 - rightLeftAssoc is the indicator whether the operator is right
5998 or left associative, using the pyparsing-defined constants
5999 ``opAssoc.RIGHT`` and ``opAssoc.LEFT``.
6000 - parseAction is the parse action to be associated with
6001 expressions matching this operator expression (the parse action
6002 tuple member may be omitted); if the parse action is passed
6003 a tuple or list of functions, this is equivalent to calling
6004 ``setParseAction(*fn)``
6005 (:class:`ParserElement.setParseAction`)
6006 - lpar - expression for matching left-parentheses
6007 (default= ``Suppress('(')``)
6008 - rpar - expression for matching right-parentheses
6009 (default= ``Suppress(')')``)
6010
6011 Example::
6012
6013 # simple example of four-function arithmetic with ints and
6014 # variable names
6015 integer = pyparsing_common.signed_integer
6016 varname = pyparsing_common.identifier
6017
6018 arith_expr = infixNotation(integer | varname,
6019 [
6020 ('-', 1, opAssoc.RIGHT),
6021 (oneOf('* /'), 2, opAssoc.LEFT),
6022 (oneOf('+ -'), 2, opAssoc.LEFT),
6023 ])
6024
6025 arith_expr.runTests('''
6026 5+3*6
6027 (5+3)*6
6028 -2--11
6029 ''', fullDump=False)
6030
6031 prints::
6032
6033 5+3*6
6034 [[5, '+', [3, '*', 6]]]
6035
6036 (5+3)*6
6037 [[[5, '+', 3], '*', 6]]
6038
6039 -2--11
6040 [[['-', 2], '-', ['-', 11]]]
6041 """
6042
6043 class _FB(FollowedBy):
6044 def parseImpl(self, instring, loc, doActions=True):
6045 self.expr.tryParse(instring, loc)
6046 return loc, []
6047
6048 ret = Forward()
6049 lastExpr = baseExpr | (lpar + ret + rpar)
6050 for i, operDef in enumerate(opList):
6051 opExpr, arity, rightLeftAssoc, pa = (operDef + (None, ))[:4]
6052 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
6053 if arity == 3:
6054 if opExpr is None or len(opExpr) != 2:
6055 raise ValueError(
6056 "if numterms=3, opExpr must be a tuple or list of two expressions")
6057 opExpr1, opExpr2 = opExpr
6058 thisExpr = Forward().setName(termName)
6059 if rightLeftAssoc == opAssoc.LEFT:
6060 if arity == 1:
6061 matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + OneOrMore(opExpr))
6062 elif arity == 2:
6063 if opExpr is not None:
6064 matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group(lastExpr + OneOrMore(opExpr + lastExpr))
6065 else:
6066 matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr + OneOrMore(lastExpr))
6067 elif arity == 3:
6068 matchExpr = (_FB(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr)
6069 + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr)))
6070 else:
6071 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
6072 elif rightLeftAssoc == opAssoc.RIGHT:
6073 if arity == 1:
6074
6075 if not isinstance(opExpr, Optional):
6076 opExpr = Optional(opExpr)
6077 matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr)
6078 elif arity == 2:
6079 if opExpr is not None:
6080 matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group(lastExpr + OneOrMore(opExpr + thisExpr))
6081 else:
6082 matchExpr = _FB(lastExpr + thisExpr) + Group(lastExpr + OneOrMore(thisExpr))
6083 elif arity == 3:
6084 matchExpr = (_FB(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)
6085 + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr))
6086 else:
6087 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
6088 else:
6089 raise ValueError("operator must indicate right or left associativity")
6090 if pa:
6091 if isinstance(pa, (tuple, list)):
6092 matchExpr.setParseAction(*pa)
6093 else:
6094 matchExpr.setParseAction(pa)
6095 thisExpr <<= (matchExpr.setName(termName) | lastExpr)
6096 lastExpr = thisExpr
6097 ret <<= lastExpr
6098 return ret
6099
6100 operatorPrecedence = infixNotation
6101 """(Deprecated) Former name of :class:`infixNotation`, will be
6102 dropped in a future release."""
6103
6104 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').setName("string enclosed in double quotes")
6105 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").setName("string enclosed in single quotes")
6106 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'
6107 | Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").setName("quotedString using single or double quotes")
6108 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
6111 """Helper method for defining nested lists enclosed in opening and
6112 closing delimiters ("(" and ")" are the default).
6113
6114 Parameters:
6115 - opener - opening character for a nested list
6116 (default= ``"("``); can also be a pyparsing expression
6117 - closer - closing character for a nested list
6118 (default= ``")"``); can also be a pyparsing expression
6119 - content - expression for items within the nested lists
6120 (default= ``None``)
6121 - ignoreExpr - expression for ignoring opening and closing
6122 delimiters (default= :class:`quotedString`)
6123
6124 If an expression is not provided for the content argument, the
6125 nested expression will capture all whitespace-delimited content
6126 between delimiters as a list of separate values.
6127
6128 Use the ``ignoreExpr`` argument to define expressions that may
6129 contain opening or closing characters that should not be treated as
6130 opening or closing characters for nesting, such as quotedString or
6131 a comment expression. Specify multiple expressions using an
6132 :class:`Or` or :class:`MatchFirst`. The default is
6133 :class:`quotedString`, but if no expressions are to be ignored, then
6134 pass ``None`` for this argument.
6135
6136 Example::
6137
6138 data_type = oneOf("void int short long char float double")
6139 decl_data_type = Combine(data_type + Optional(Word('*')))
6140 ident = Word(alphas+'_', alphanums+'_')
6141 number = pyparsing_common.number
6142 arg = Group(decl_data_type + ident)
6143 LPAR, RPAR = map(Suppress, "()")
6144
6145 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
6146
6147 c_function = (decl_data_type("type")
6148 + ident("name")
6149 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR
6150 + code_body("body"))
6151 c_function.ignore(cStyleComment)
6152
6153 source_code = '''
6154 int is_odd(int x) {
6155 return (x%2);
6156 }
6157
6158 int dec_to_hex(char hchar) {
6159 if (hchar >= '0' && hchar <= '9') {
6160 return (ord(hchar)-ord('0'));
6161 } else {
6162 return (10+ord(hchar)-ord('A'));
6163 }
6164 }
6165 '''
6166 for func in c_function.searchString(source_code):
6167 print("%(name)s (%(type)s) args: %(args)s" % func)
6168
6169
6170 prints::
6171
6172 is_odd (int) args: [['int', 'x']]
6173 dec_to_hex (int) args: [['char', 'hchar']]
6174 """
6175 if opener == closer:
6176 raise ValueError("opening and closing strings cannot be the same")
6177 if content is None:
6178 if isinstance(opener, basestring) and isinstance(closer, basestring):
6179 if len(opener) == 1 and len(closer) == 1:
6180 if ignoreExpr is not None:
6181 content = (Combine(OneOrMore(~ignoreExpr
6182 + CharsNotIn(opener
6183 + closer
6184 + ParserElement.DEFAULT_WHITE_CHARS, exact=1)
6185 )
6186 ).setParseAction(lambda t: t[0].strip()))
6187 else:
6188 content = (empty.copy() + CharsNotIn(opener
6189 + closer
6190 + ParserElement.DEFAULT_WHITE_CHARS
6191 ).setParseAction(lambda t: t[0].strip()))
6192 else:
6193 if ignoreExpr is not None:
6194 content = (Combine(OneOrMore(~ignoreExpr
6195 + ~Literal(opener)
6196 + ~Literal(closer)
6197 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1))
6198 ).setParseAction(lambda t: t[0].strip()))
6199 else:
6200 content = (Combine(OneOrMore(~Literal(opener)
6201 + ~Literal(closer)
6202 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1))
6203 ).setParseAction(lambda t: t[0].strip()))
6204 else:
6205 raise ValueError("opening and closing arguments must be strings if no content expression is given")
6206 ret = Forward()
6207 if ignoreExpr is not None:
6208 ret <<= Group(Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer))
6209 else:
6210 ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))
6211 ret.setName('nested %s%s expression' % (opener, closer))
6212 return ret
6213
6214 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
6215 """Helper method for defining space-delimited indentation blocks,
6216 such as those used to define block statements in Python source code.
6217
6218 Parameters:
6219
6220 - blockStatementExpr - expression defining syntax of statement that
6221 is repeated within the indented block
6222 - indentStack - list created by caller to manage indentation stack
6223 (multiple statementWithIndentedBlock expressions within a single
6224 grammar should share a common indentStack)
6225 - indent - boolean indicating whether block must be indented beyond
6226 the current level; set to False for block of left-most
6227 statements (default= ``True``)
6228
6229 A valid block must contain at least one ``blockStatement``.
6230
6231 Example::
6232
6233 data = '''
6234 def A(z):
6235 A1
6236 B = 100
6237 G = A2
6238 A2
6239 A3
6240 B
6241 def BB(a,b,c):
6242 BB1
6243 def BBA():
6244 bba1
6245 bba2
6246 bba3
6247 C
6248 D
6249 def spam(x,y):
6250 def eggs(z):
6251 pass
6252 '''
6253
6254
6255 indentStack = [1]
6256 stmt = Forward()
6257
6258 identifier = Word(alphas, alphanums)
6259 funcDecl = ("def" + identifier + Group("(" + Optional(delimitedList(identifier)) + ")") + ":")
6260 func_body = indentedBlock(stmt, indentStack)
6261 funcDef = Group(funcDecl + func_body)
6262
6263 rvalue = Forward()
6264 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
6265 rvalue << (funcCall | identifier | Word(nums))
6266 assignment = Group(identifier + "=" + rvalue)
6267 stmt << (funcDef | assignment | identifier)
6268
6269 module_body = OneOrMore(stmt)
6270
6271 parseTree = module_body.parseString(data)
6272 parseTree.pprint()
6273
6274 prints::
6275
6276 [['def',
6277 'A',
6278 ['(', 'z', ')'],
6279 ':',
6280 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
6281 'B',
6282 ['def',
6283 'BB',
6284 ['(', 'a', 'b', 'c', ')'],
6285 ':',
6286 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
6287 'C',
6288 'D',
6289 ['def',
6290 'spam',
6291 ['(', 'x', 'y', ')'],
6292 ':',
6293 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
6294 """
6295 backup_stack = indentStack[:]
6296
6297 def reset_stack():
6298 indentStack[:] = backup_stack
6299
6300 def checkPeerIndent(s, l, t):
6301 if l >= len(s): return
6302 curCol = col(l, s)
6303 if curCol != indentStack[-1]:
6304 if curCol > indentStack[-1]:
6305 raise ParseException(s, l, "illegal nesting")
6306 raise ParseException(s, l, "not a peer entry")
6307
6308 def checkSubIndent(s, l, t):
6309 curCol = col(l, s)
6310 if curCol > indentStack[-1]:
6311 indentStack.append(curCol)
6312 else:
6313 raise ParseException(s, l, "not a subentry")
6314
6315 def checkUnindent(s, l, t):
6316 if l >= len(s): return
6317 curCol = col(l, s)
6318 if not(indentStack and curCol in indentStack):
6319 raise ParseException(s, l, "not an unindent")
6320 if curCol < indentStack[-1]:
6321 indentStack.pop()
6322
6323 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress(), stopOn=StringEnd())
6324 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
6325 PEER = Empty().setParseAction(checkPeerIndent).setName('')
6326 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
6327 if indent:
6328 smExpr = Group(Optional(NL)
6329 + INDENT
6330 + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL), stopOn=StringEnd())
6331 + UNDENT)
6332 else:
6333 smExpr = Group(Optional(NL)
6334 + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL), stopOn=StringEnd())
6335 + UNDENT)
6336 smExpr.setFailAction(lambda a, b, c, d: reset_stack())
6337 blockStatementExpr.ignore(_bslash + LineEnd())
6338 return smExpr.setName('indented block')
6339
6340 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
6341 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
6342
6343 anyOpenTag, anyCloseTag = makeHTMLTags(Word(alphas, alphanums + "_:").setName('any tag'))
6344 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(), '><& "\''))
6345 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
6347 """Helper parser action to replace common HTML entities with their special characters"""
6348 return _htmlEntityMap.get(t.entity)
6349
6350
6351 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
6352 "Comment of the form ``/* ... */``"
6353
6354 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
6355 "Comment of the form ``<!-- ... -->``"
6356
6357 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
6358 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
6359 "Comment of the form ``// ... (to end of line)``"
6360
6361 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/' | dblSlashComment).setName("C++ style comment")
6362 "Comment of either form :class:`cStyleComment` or :class:`dblSlashComment`"
6363
6364 javaStyleComment = cppStyleComment
6365 "Same as :class:`cppStyleComment`"
6366
6367 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
6368 "Comment of the form ``# ... (to end of line)``"
6369
6370 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',')
6371 + Optional(Word(" \t")
6372 + ~Literal(",") + ~LineEnd()))).streamline().setName("commaItem")
6373 commaSeparatedList = delimitedList(Optional(quotedString.copy() | _commasepitem, default="")).setName("commaSeparatedList")
6374 """(Deprecated) Predefined expression of 1 or more printable words or
6375 quoted strings, separated by commas.
6376
6377 This expression is deprecated in favor of :class:`pyparsing_common.comma_separated_list`.
6378 """
6382 """Here are some common low-level expressions that may be useful in
6383 jump-starting parser development:
6384
6385 - numeric forms (:class:`integers<integer>`, :class:`reals<real>`,
6386 :class:`scientific notation<sci_real>`)
6387 - common :class:`programming identifiers<identifier>`
6388 - network addresses (:class:`MAC<mac_address>`,
6389 :class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`)
6390 - ISO8601 :class:`dates<iso8601_date>` and
6391 :class:`datetime<iso8601_datetime>`
6392 - :class:`UUID<uuid>`
6393 - :class:`comma-separated list<comma_separated_list>`
6394
6395 Parse actions:
6396
6397 - :class:`convertToInteger`
6398 - :class:`convertToFloat`
6399 - :class:`convertToDate`
6400 - :class:`convertToDatetime`
6401 - :class:`stripHTMLTags`
6402 - :class:`upcaseTokens`
6403 - :class:`downcaseTokens`
6404
6405 Example::
6406
6407 pyparsing_common.number.runTests('''
6408 # any int or real number, returned as the appropriate type
6409 100
6410 -100
6411 +100
6412 3.14159
6413 6.02e23
6414 1e-12
6415 ''')
6416
6417 pyparsing_common.fnumber.runTests('''
6418 # any int or real number, returned as float
6419 100
6420 -100
6421 +100
6422 3.14159
6423 6.02e23
6424 1e-12
6425 ''')
6426
6427 pyparsing_common.hex_integer.runTests('''
6428 # hex numbers
6429 100
6430 FF
6431 ''')
6432
6433 pyparsing_common.fraction.runTests('''
6434 # fractions
6435 1/2
6436 -3/4
6437 ''')
6438
6439 pyparsing_common.mixed_integer.runTests('''
6440 # mixed fractions
6441 1
6442 1/2
6443 -3/4
6444 1-3/4
6445 ''')
6446
6447 import uuid
6448 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
6449 pyparsing_common.uuid.runTests('''
6450 # uuid
6451 12345678-1234-5678-1234-567812345678
6452 ''')
6453
6454 prints::
6455
6456 # any int or real number, returned as the appropriate type
6457 100
6458 [100]
6459
6460 -100
6461 [-100]
6462
6463 +100
6464 [100]
6465
6466 3.14159
6467 [3.14159]
6468
6469 6.02e23
6470 [6.02e+23]
6471
6472 1e-12
6473 [1e-12]
6474
6475 # any int or real number, returned as float
6476 100
6477 [100.0]
6478
6479 -100
6480 [-100.0]
6481
6482 +100
6483 [100.0]
6484
6485 3.14159
6486 [3.14159]
6487
6488 6.02e23
6489 [6.02e+23]
6490
6491 1e-12
6492 [1e-12]
6493
6494 # hex numbers
6495 100
6496 [256]
6497
6498 FF
6499 [255]
6500
6501 # fractions
6502 1/2
6503 [0.5]
6504
6505 -3/4
6506 [-0.75]
6507
6508 # mixed fractions
6509 1
6510 [1]
6511
6512 1/2
6513 [0.5]
6514
6515 -3/4
6516 [-0.75]
6517
6518 1-3/4
6519 [1.75]
6520
6521 # uuid
6522 12345678-1234-5678-1234-567812345678
6523 [UUID('12345678-1234-5678-1234-567812345678')]
6524 """
6525
6526 convertToInteger = tokenMap(int)
6527 """
6528 Parse action for converting parsed integers to Python int
6529 """
6530
6531 convertToFloat = tokenMap(float)
6532 """
6533 Parse action for converting parsed numbers to Python float
6534 """
6535
6536 integer = Word(nums).setName("integer").setParseAction(convertToInteger)
6537 """expression that parses an unsigned integer, returns an int"""
6538
6539 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int, 16))
6540 """expression that parses a hexadecimal integer, returns an int"""
6541
6542 signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
6543 """expression that parses an integer with optional leading sign, returns an int"""
6544
6545 fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
6546 """fractional expression of an integer divided by an integer, returns a float"""
6547 fraction.addParseAction(lambda t: t[0]/t[-1])
6548
6549 mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
6550 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
6551 mixed_integer.addParseAction(sum)
6552
6553 real = Regex(r'[+-]?(:?\d+\.\d*|\.\d+)').setName("real number").setParseAction(convertToFloat)
6554 """expression that parses a floating point number and returns a float"""
6555
6556 sci_real = Regex(r'[+-]?(:?\d+(:?[eE][+-]?\d+)|(:?\d+\.\d*|\.\d+)(:?[eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
6557 """expression that parses a floating point number with optional
6558 scientific notation and returns a float"""
6559
6560
6561 number = (sci_real | real | signed_integer).streamline()
6562 """any numeric expression, returns the corresponding Python type"""
6563
6564 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
6565 """any int or real number, returned as float"""
6566
6567 identifier = Word(alphas + '_', alphanums + '_').setName("identifier")
6568 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
6569
6570 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
6571 "IPv4 address (``0.0.0.0 - 255.255.255.255``)"
6572
6573 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
6574 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part) * 7).setName("full IPv6 address")
6575 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part) * (0, 6))
6576 + "::"
6577 + Optional(_ipv6_part + (':' + _ipv6_part) * (0, 6))
6578 ).setName("short IPv6 address")
6579 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
6580 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
6581 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
6582 "IPv6 address (long, short, or mixed form)"
6583
6584 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
6585 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
6586
6587 @staticmethod
6589 """
6590 Helper to create a parse action for converting parsed date string to Python datetime.date
6591
6592 Params -
6593 - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``)
6594
6595 Example::
6596
6597 date_expr = pyparsing_common.iso8601_date.copy()
6598 date_expr.setParseAction(pyparsing_common.convertToDate())
6599 print(date_expr.parseString("1999-12-31"))
6600
6601 prints::
6602
6603 [datetime.date(1999, 12, 31)]
6604 """
6605 def cvt_fn(s, l, t):
6606 try:
6607 return datetime.strptime(t[0], fmt).date()
6608 except ValueError as ve:
6609 raise ParseException(s, l, str(ve))
6610 return cvt_fn
6611
6612 @staticmethod
6614 """Helper to create a parse action for converting parsed
6615 datetime string to Python datetime.datetime
6616
6617 Params -
6618 - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``)
6619
6620 Example::
6621
6622 dt_expr = pyparsing_common.iso8601_datetime.copy()
6623 dt_expr.setParseAction(pyparsing_common.convertToDatetime())
6624 print(dt_expr.parseString("1999-12-31T23:59:59.999"))
6625
6626 prints::
6627
6628 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
6629 """
6630 def cvt_fn(s, l, t):
6631 try:
6632 return datetime.strptime(t[0], fmt)
6633 except ValueError as ve:
6634 raise ParseException(s, l, str(ve))
6635 return cvt_fn
6636
6637 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
6638 "ISO8601 date (``yyyy-mm-dd``)"
6639
6640 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
6641 "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``"
6642
6643 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
6644 "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)"
6645
6646 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
6647 @staticmethod
6664
6665 _commasepitem = Combine(OneOrMore(~Literal(",")
6666 + ~LineEnd()
6667 + Word(printables, excludeChars=',')
6668 + Optional(White(" \t")))).streamline().setName("commaItem")
6669 comma_separated_list = delimitedList(Optional(quotedString.copy()
6670 | _commasepitem, default='')
6671 ).setName("comma separated list")
6672 """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
6673
6674 upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
6675 """Parse action to convert tokens to upper case."""
6676
6677 downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
6678 """Parse action to convert tokens to lower case."""
6679
6683 self.fn = fn
6684 self.__doc__ = fn.__doc__
6685 self.__name__ = fn.__name__
6686
6688 if cls is None:
6689 cls = type(obj)
6690 if not hasattr(cls, '_intern') or any(cls._intern is getattr(superclass, '_intern', [])
6691 for superclass in cls.__mro__[1:]):
6692 cls._intern = {}
6693 attrname = self.fn.__name__
6694 if attrname not in cls._intern:
6695 cls._intern[attrname] = self.fn(cls)
6696 return cls._intern[attrname]
6697
6700 """
6701 A set of Unicode characters, for language-specific strings for
6702 ``alphas``, ``nums``, ``alphanums``, and ``printables``.
6703 A unicode_set is defined by a list of ranges in the Unicode character
6704 set, in a class attribute ``_ranges``, such as::
6705
6706 _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),]
6707
6708 A unicode set can also be defined using multiple inheritance of other unicode sets::
6709
6710 class CJK(Chinese, Japanese, Korean):
6711 pass
6712 """
6713 _ranges = []
6714
6715 @classmethod
6717 ret = []
6718 for cc in cls.__mro__:
6719 if cc is unicode_set:
6720 break
6721 for rr in cc._ranges:
6722 ret.extend(range(rr[0], rr[-1] + 1))
6723 return [unichr(c) for c in sorted(set(ret))]
6724
6725 @_lazyclassproperty
6727 "all non-whitespace characters in this range"
6728 return u''.join(filterfalse(unicode.isspace, cls._get_chars_for_ranges()))
6729
6730 @_lazyclassproperty
6732 "all alphabetic characters in this range"
6733 return u''.join(filter(unicode.isalpha, cls._get_chars_for_ranges()))
6734
6735 @_lazyclassproperty
6737 "all numeric digit characters in this range"
6738 return u''.join(filter(unicode.isdigit, cls._get_chars_for_ranges()))
6739
6740 @_lazyclassproperty
6742 "all alphanumeric characters in this range"
6743 return cls.alphas + cls.nums
6744
6747 """
6748 A namespace class for defining common language unicode_sets.
6749 """
6750 _ranges = [(32, sys.maxunicode)]
6751
6753 "Unicode set for Latin-1 Unicode Character Range"
6754 _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),]
6755
6757 "Unicode set for Latin-A Unicode Character Range"
6758 _ranges = [(0x0100, 0x017f),]
6759
6761 "Unicode set for Latin-B Unicode Character Range"
6762 _ranges = [(0x0180, 0x024f),]
6763
6764 - class Greek(unicode_set):
6765 "Unicode set for Greek Unicode Character Ranges"
6766 _ranges = [
6767 (0x0370, 0x03ff), (0x1f00, 0x1f15), (0x1f18, 0x1f1d), (0x1f20, 0x1f45), (0x1f48, 0x1f4d),
6768 (0x1f50, 0x1f57), (0x1f59,), (0x1f5b,), (0x1f5d,), (0x1f5f, 0x1f7d), (0x1f80, 0x1fb4), (0x1fb6, 0x1fc4),
6769 (0x1fc6, 0x1fd3), (0x1fd6, 0x1fdb), (0x1fdd, 0x1fef), (0x1ff2, 0x1ff4), (0x1ff6, 0x1ffe),
6770 ]
6771
6773 "Unicode set for Cyrillic Unicode Character Range"
6774 _ranges = [(0x0400, 0x04ff)]
6775
6777 "Unicode set for Chinese Unicode Character Range"
6778 _ranges = [(0x4e00, 0x9fff), (0x3000, 0x303f),]
6779
6781 "Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges"
6782 _ranges = []
6783
6784 - class Kanji(unicode_set):
6785 "Unicode set for Kanji Unicode Character Range"
6786 _ranges = [(0x4E00, 0x9Fbf), (0x3000, 0x303f),]
6787
6789 "Unicode set for Hiragana Unicode Character Range"
6790 _ranges = [(0x3040, 0x309f),]
6791
6793 "Unicode set for Katakana Unicode Character Range"
6794 _ranges = [(0x30a0, 0x30ff),]
6795
6797 "Unicode set for Korean Unicode Character Range"
6798 _ranges = [(0xac00, 0xd7af), (0x1100, 0x11ff), (0x3130, 0x318f), (0xa960, 0xa97f), (0xd7b0, 0xd7ff), (0x3000, 0x303f),]
6799
6800 - class CJK(Chinese, Japanese, Korean):
6801 "Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range"
6802 pass
6803
6804 - class Thai(unicode_set):
6805 "Unicode set for Thai Unicode Character Range"
6806 _ranges = [(0x0e01, 0x0e3a), (0x0e3f, 0x0e5b),]
6807
6809 "Unicode set for Arabic Unicode Character Range"
6810 _ranges = [(0x0600, 0x061b), (0x061e, 0x06ff), (0x0700, 0x077f),]
6811
6813 "Unicode set for Hebrew Unicode Character Range"
6814 _ranges = [(0x0590, 0x05ff),]
6815
6817 "Unicode set for Devanagari Unicode Character Range"
6818 _ranges = [(0x0900, 0x097f), (0xa8e0, 0xa8ff)]
6819
6820 pyparsing_unicode.Japanese._ranges = (pyparsing_unicode.Japanese.Kanji._ranges
6821 + pyparsing_unicode.Japanese.Hiragana._ranges
6822 + pyparsing_unicode.Japanese.Katakana._ranges)
6823
6824
6825 if PY_3:
6826 setattr(pyparsing_unicode, u"العربية", pyparsing_unicode.Arabic)
6827 setattr(pyparsing_unicode, u"中文", pyparsing_unicode.Chinese)
6828 setattr(pyparsing_unicode, u"кириллица", pyparsing_unicode.Cyrillic)
6829 setattr(pyparsing_unicode, u"Ελληνικά", pyparsing_unicode.Greek)
6830 setattr(pyparsing_unicode, u"עִברִית", pyparsing_unicode.Hebrew)
6831 setattr(pyparsing_unicode, u"日本語", pyparsing_unicode.Japanese)
6832 setattr(pyparsing_unicode.Japanese, u"漢字", pyparsing_unicode.Japanese.Kanji)
6833 setattr(pyparsing_unicode.Japanese, u"カタカナ", pyparsing_unicode.Japanese.Katakana)
6834 setattr(pyparsing_unicode.Japanese, u"ひらがな", pyparsing_unicode.Japanese.Hiragana)
6835 setattr(pyparsing_unicode, u"한국어", pyparsing_unicode.Korean)
6836 setattr(pyparsing_unicode, u"ไทย", pyparsing_unicode.Thai)
6837 setattr(pyparsing_unicode, u"देवनागरी", pyparsing_unicode.Devanagari)
6841 """
6842 namespace class for classes useful in writing unit tests
6843 """
6844
6846 """
6847 Context manager to be used when writing unit tests that modify pyparsing config values:
6848 - packrat parsing
6849 - default whitespace characters.
6850 - default keyword characters
6851 - literal string auto-conversion class
6852 - __diag__ settings
6853
6854 Example:
6855 with reset_pyparsing_context():
6856 # test that literals used to construct a grammar are automatically suppressed
6857 ParserElement.inlineLiteralsUsing(Suppress)
6858
6859 term = Word(alphas) | Word(nums)
6860 group = Group('(' + term[...] + ')')
6861
6862 # assert that the '()' characters are not included in the parsed tokens
6863 self.assertParseAndCheckLisst(group, "(abc 123 def)", ['abc', '123', 'def'])
6864
6865 # after exiting context manager, literals are converted to Literal expressions again
6866 """
6867
6868 - def __init__(self):
6869 self._save_context = {}
6870
6872 self._save_context["default_whitespace"] = ParserElement.DEFAULT_WHITE_CHARS
6873 self._save_context["default_keyword_chars"] = Keyword.DEFAULT_KEYWORD_CHARS
6874 self._save_context[
6875 "literal_string_class"
6876 ] = ParserElement._literalStringClass
6877 self._save_context["packrat_enabled"] = ParserElement._packratEnabled
6878 self._save_context["packrat_parse"] = ParserElement._parse
6879 self._save_context["__diag__"] = {
6880 name: getattr(__diag__, name) for name in __diag__._all_names
6881 }
6882 self._save_context["__compat__"] = {
6883 "collect_all_And_tokens": __compat__.collect_all_And_tokens
6884 }
6885 return self
6886
6887 - def restore(self):
6888
6889 if (
6890 ParserElement.DEFAULT_WHITE_CHARS
6891 != self._save_context["default_whitespace"]
6892 ):
6893 ParserElement.setDefaultWhitespaceChars(
6894 self._save_context["default_whitespace"]
6895 )
6896 Keyword.DEFAULT_KEYWORD_CHARS = self._save_context["default_keyword_chars"]
6897 ParserElement.inlineLiteralsUsing(
6898 self._save_context["literal_string_class"]
6899 )
6900 for name, value in self._save_context["__diag__"].items():
6901 setattr(__diag__, name, value)
6902 ParserElement._packratEnabled = self._save_context["packrat_enabled"]
6903 ParserElement._parse = self._save_context["packrat_parse"]
6904 __compat__.collect_all_And_tokens = self._save_context["__compat__"]
6905
6906 - def __enter__(self):
6908
6909 - def __exit__(self, *args):
6910 return self.restore()
6911
6913 """
6914 A mixin class to add parse results assertion methods to normal unittest.TestCase classes.
6915 """
6919 """
6920 Unit test assertion to compare a ParseResults object with an optional expected_list,
6921 and compare any defined results names with an optional expected_dict.
6922 """
6923 if expected_list is not None:
6924 self.assertEqual(expected_list, result.asList(), msg=msg)
6925 if expected_dict is not None:
6926 self.assertEqual(expected_dict, result.asDict(), msg=msg)
6927
6931 """
6932 Convenience wrapper assert to test a parser element and input string, and assert that
6933 the resulting ParseResults.asList() is equal to the expected_list.
6934 """
6935 result = expr.parseString(test_string, parseAll=True)
6936 if verbose:
6937 print(result.dump())
6938 self.assertParseResultsEquals(result, expected_list=expected_list, msg=msg)
6939
6943 """
6944 Convenience wrapper assert to test a parser element and input string, and assert that
6945 the resulting ParseResults.asDict() is equal to the expected_dict.
6946 """
6947 result = expr.parseString(test_string, parseAll=True)
6948 if verbose:
6949 print(result.dump())
6950 self.assertParseResultsEquals(result, expected_dict=expected_dict, msg=msg)
6951
6952 - def assertRunTestResults(
6953 self, run_tests_report, expected_parse_results=None, msg=None
6954 ):
6955 """
6956 Unit test assertion to evaluate output of ParserElement.runTests(). If a list of
6957 list-dict tuples is given as the expected_parse_results argument, then these are zipped
6958 with the report tuples returned by runTests and evaluated using assertParseResultsEquals.
6959 Finally, asserts that the overall runTests() success value is True.
6960
6961 :param run_tests_report: tuple(bool, [tuple(str, ParseResults or Exception)]) returned from runTests
6962 :param expected_parse_results (optional): [tuple(str, list, dict, Exception)]
6963 """
6964 run_test_success, run_test_results = run_tests_report
6965
6966 if expected_parse_results is not None:
6967 merged = [
6968 (rpt[0], rpt[1], expected)
6969 for rpt, expected in zip(run_test_results, expected_parse_results)
6970 ]
6971 for test_string, result, expected in merged:
6972
6973
6974
6975 fail_msg = next(
6976 (exp for exp in expected if isinstance(exp, str)), None
6977 )
6978 expected_exception = next(
6979 (
6980 exp
6981 for exp in expected
6982 if isinstance(exp, type) and issubclass(exp, Exception)
6983 ),
6984 None,
6985 )
6986 if expected_exception is not None:
6987 with self.assertRaises(
6988 expected_exception=expected_exception, msg=fail_msg or msg
6989 ):
6990 if isinstance(result, Exception):
6991 raise result
6992 else:
6993 expected_list = next(
6994 (exp for exp in expected if isinstance(exp, list)), None
6995 )
6996 expected_dict = next(
6997 (exp for exp in expected if isinstance(exp, dict)), None
6998 )
6999 if (expected_list, expected_dict) != (None, None):
7000 self.assertParseResultsEquals(
7001 result,
7002 expected_list=expected_list,
7003 expected_dict=expected_dict,
7004 msg=fail_msg or msg,
7005 )
7006 else:
7007
7008 print("no validation for {!r}".format(test_string))
7009
7010
7011 self.assertTrue(
7012 run_test_success, msg=msg if msg is not None else "failed runTests"
7013 )
7014
7015 @contextmanager
7017 with self.assertRaises(exc_type, msg=msg):
7018 yield
7019
7020
7021 if __name__ == "__main__":
7022
7023 selectToken = CaselessLiteral("select")
7024 fromToken = CaselessLiteral("from")
7025
7026 ident = Word(alphas, alphanums + "_$")
7027
7028 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
7029 columnNameList = Group(delimitedList(columnName)).setName("columns")
7030 columnSpec = ('*' | columnNameList)
7031
7032 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
7033 tableNameList = Group(delimitedList(tableName)).setName("tables")
7034
7035 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
7036
7037
7038 simpleSQL.runTests("""
7039 # '*' as column list and dotted table name
7040 select * from SYS.XYZZY
7041
7042 # caseless match on "SELECT", and casts back to "select"
7043 SELECT * from XYZZY, ABC
7044
7045 # list of column names, and mixed case SELECT keyword
7046 Select AA,BB,CC from Sys.dual
7047
7048 # multiple tables
7049 Select A, B, C from Sys.dual, Table2
7050
7051 # invalid SELECT keyword - should fail
7052 Xelect A, B, C from Sys.dual
7053
7054 # incomplete command - should fail
7055 Select
7056
7057 # invalid column name - should fail
7058 Select ^^^ frox Sys.dual
7059
7060 """)
7061
7062 pyparsing_common.number.runTests("""
7063 100
7064 -100
7065 +100
7066 3.14159
7067 6.02e23
7068 1e-12
7069 """)
7070
7071
7072 pyparsing_common.fnumber.runTests("""
7073 100
7074 -100
7075 +100
7076 3.14159
7077 6.02e23
7078 1e-12
7079 """)
7080
7081 pyparsing_common.hex_integer.runTests("""
7082 100
7083 FF
7084 """)
7085
7086 import uuid
7087 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
7088 pyparsing_common.uuid.runTests("""
7089 12345678-1234-5678-1234-567812345678
7090 """)
7091