Module pyparsing
[hide private]
[frames] | no frames]

Source Code for Module pyparsing

   1  # -*- coding: utf-8 -*- 
   2  # module pyparsing.py 
   3  # 
   4  # Copyright (c) 2003-2019  Paul T. McGuire 
   5  # 
   6  # Permission is hereby granted, free of charge, to any person obtaining 
   7  # a copy of this software and associated documentation files (the 
   8  # "Software"), to deal in the Software without restriction, including 
   9  # without limitation the rights to use, copy, modify, merge, publish, 
  10  # distribute, sublicense, and/or sell copies of the Software, and to 
  11  # permit persons to whom the Software is furnished to do so, subject to 
  12  # the following conditions: 
  13  # 
  14  # The above copyright notice and this permission notice shall be 
  15  # included in all copies or substantial portions of the Software. 
  16  # 
  17  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  18  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  19  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  20  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  21  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  22  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  23  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  24  # 
  25   
  26  __doc__ = \ 
  27  """ 
  28  pyparsing module - Classes and methods to define and execute parsing grammars 
  29  ============================================================================= 
  30   
  31  The pyparsing module is an alternative approach to creating and 
  32  executing simple grammars, vs. the traditional lex/yacc approach, or the 
  33  use of regular expressions.  With pyparsing, you don't need to learn 
  34  a new syntax for defining grammars or matching expressions - the parsing 
  35  module provides a library of classes that you use to construct the 
  36  grammar directly in Python. 
  37   
  38  Here is a program to parse "Hello, World!" (or any greeting of the form 
  39  ``"<salutation>, <addressee>!"``), built up using :class:`Word`, 
  40  :class:`Literal`, and :class:`And` elements 
  41  (the :class:`'+'<ParserElement.__add__>` operators create :class:`And` expressions, 
  42  and the strings are auto-converted to :class:`Literal` expressions):: 
  43   
  44      from pyparsing import Word, alphas 
  45   
  46      # define grammar of a greeting 
  47      greet = Word(alphas) + "," + Word(alphas) + "!" 
  48   
  49      hello = "Hello, World!" 
  50      print (hello, "->", greet.parseString(hello)) 
  51   
  52  The program outputs the following:: 
  53   
  54      Hello, World! -> ['Hello', ',', 'World', '!'] 
  55   
  56  The Python representation of the grammar is quite readable, owing to the 
  57  self-explanatory class names, and the use of '+', '|' and '^' operators. 
  58   
  59  The :class:`ParseResults` object returned from 
  60  :class:`ParserElement.parseString` can be 
  61  accessed as a nested list, a dictionary, or an object with named 
  62  attributes. 
  63   
  64  The pyparsing module handles some of the problems that are typically 
  65  vexing when writing text parsers: 
  66   
  67    - extra or missing whitespace (the above program will also handle 
  68      "Hello,World!", "Hello  ,  World  !", etc.) 
  69    - quoted strings 
  70    - embedded comments 
  71   
  72   
  73  Getting Started - 
  74  ----------------- 
  75  Visit the classes :class:`ParserElement` and :class:`ParseResults` to 
  76  see the base classes that most other pyparsing 
  77  classes inherit from. Use the docstrings for examples of how to: 
  78   
  79   - construct literal match expressions from :class:`Literal` and 
  80     :class:`CaselessLiteral` classes 
  81   - construct character word-group expressions using the :class:`Word` 
  82     class 
  83   - see how to create repetitive expressions using :class:`ZeroOrMore` 
  84     and :class:`OneOrMore` classes 
  85   - use :class:`'+'<And>`, :class:`'|'<MatchFirst>`, :class:`'^'<Or>`, 
  86     and :class:`'&'<Each>` operators to combine simple expressions into 
  87     more complex ones 
  88   - associate names with your parsed results using 
  89     :class:`ParserElement.setResultsName` 
  90   - access the parsed data, which is returned as a :class:`ParseResults` 
  91     object 
  92   - find some helpful expression short-cuts like :class:`delimitedList` 
  93     and :class:`oneOf` 
  94   - find more useful common expressions in the :class:`pyparsing_common` 
  95     namespace class 
  96  """ 
  97   
  98  __version__ = "2.4.6" 
  99  __versionTime__ = "24 Dec 2019 04:27 UTC" 
 100  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
 101   
 102  import string 
 103  from weakref import ref as wkref 
 104  import copy 
 105  import sys 
 106  import warnings 
 107  import re 
 108  import sre_constants 
 109  import collections 
 110  import pprint 
 111  import traceback 
 112  import types 
 113  from datetime import datetime 
 114  from operator import itemgetter 
 115  import itertools 
 116  from functools import wraps 
 117  from contextlib import contextmanager 
 118   
 119  try: 
 120      # Python 3 
 121      from itertools import filterfalse 
 122  except ImportError: 
 123      from itertools import ifilterfalse as filterfalse 
 124   
 125  try: 
 126      from _thread import RLock 
 127  except ImportError: 
 128      from threading import RLock 
 129   
 130  try: 
 131      # Python 3 
 132      from collections.abc import Iterable 
 133      from collections.abc import MutableMapping, Mapping 
 134  except ImportError: 
 135      # Python 2.7 
 136      from collections import Iterable 
 137      from collections import MutableMapping, Mapping 
 138   
 139  try: 
 140      from collections import OrderedDict as _OrderedDict 
 141  except ImportError: 
 142      try: 
 143          from ordereddict import OrderedDict as _OrderedDict 
 144      except ImportError: 
 145          _OrderedDict = None 
 146   
 147  try: 
 148      from types import SimpleNamespace 
 149  except ImportError: 
150 - class SimpleNamespace: pass
151 152 # version compatibility configuration 153 __compat__ = SimpleNamespace() 154 __compat__.__doc__ = """ 155 A cross-version compatibility configuration for pyparsing features that will be 156 released in a future version. By setting values in this configuration to True, 157 those features can be enabled in prior versions for compatibility development 158 and testing. 159 160 - collect_all_And_tokens - flag to enable fix for Issue #63 that fixes erroneous grouping 161 of results names when an And expression is nested within an Or or MatchFirst; set to 162 True to enable bugfix released in pyparsing 2.3.0, or False to preserve 163 pre-2.3.0 handling of named results 164 """ 165 __compat__.collect_all_And_tokens = True 166 167 __diag__ = SimpleNamespace() 168 __diag__.__doc__ = """ 169 Diagnostic configuration (all default to False) 170 - warn_multiple_tokens_in_named_alternation - flag to enable warnings when a results 171 name is defined on a MatchFirst or Or expression with one or more And subexpressions 172 (only warns if __compat__.collect_all_And_tokens is False) 173 - warn_ungrouped_named_tokens_in_collection - flag to enable warnings when a results 174 name is defined on a containing expression with ungrouped subexpressions that also 175 have results names 176 - warn_name_set_on_empty_Forward - flag to enable warnings whan a Forward is defined 177 with a results name, but has no contents defined 178 - warn_on_multiple_string_args_to_oneof - flag to enable warnings whan oneOf is 179 incorrectly called with multiple str arguments 180 - enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent 181 calls to ParserElement.setName() 182 """ 183 __diag__.warn_multiple_tokens_in_named_alternation = False 184 __diag__.warn_ungrouped_named_tokens_in_collection = False 185 __diag__.warn_name_set_on_empty_Forward = False 186 __diag__.warn_on_multiple_string_args_to_oneof = False 187 __diag__.enable_debug_on_named_expressions = False 188 __diag__._all_names = [nm for nm in vars(__diag__) if nm.startswith("enable_") or nm.startswith("warn_")]
189 190 -def _enable_all_warnings():
191 __diag__.warn_multiple_tokens_in_named_alternation = True 192 __diag__.warn_ungrouped_named_tokens_in_collection = True 193 __diag__.warn_name_set_on_empty_Forward = True 194 __diag__.warn_on_multiple_string_args_to_oneof = True
195 __diag__.enable_all_warnings = _enable_all_warnings 196 197 198 __all__ = ['__version__', '__versionTime__', '__author__', '__compat__', '__diag__', 199 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 200 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 201 'PrecededBy', 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 202 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 203 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 204 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 205 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 'Char', 206 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 207 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 208 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 209 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', 210 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 211 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 212 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 213 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 214 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 215 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation', 'locatedExpr', 'withClass', 216 'CloseMatch', 'tokenMap', 'pyparsing_common', 'pyparsing_unicode', 'unicode_set', 217 'conditionAsParseAction', 're', 218 ] 219 220 system_version = tuple(sys.version_info)[:3] 221 PY_3 = system_version[0] == 3 222 if PY_3: 223 _MAX_INT = sys.maxsize 224 basestring = str 225 unichr = chr 226 unicode = str 227 _ustr = str 228 229 # build list of single arg builtins, that can be used as parse actions 230 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 231 232 else: 233 _MAX_INT = sys.maxint 234 range = xrange
235 236 - def _ustr(obj):
237 """Drop-in replacement for str(obj) that tries to be Unicode 238 friendly. It first tries str(obj). If that fails with 239 a UnicodeEncodeError, then it tries unicode(obj). It then 240 < returns the unicode object | encodes it with the default 241 encoding | ... >. 242 """ 243 if isinstance(obj, unicode): 244 return obj 245 246 try: 247 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 248 # it won't break any existing code. 249 return str(obj) 250 251 except UnicodeEncodeError: 252 # Else encode it 253 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') 254 xmlcharref = Regex(r'&#\d+;') 255 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) 256 return xmlcharref.transformString(ret)
257 258 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 259 singleArgBuiltins = [] 260 import __builtin__ 261 262 for fname in "sum len sorted reversed list tuple set any all min max".split(): 263 try: 264 singleArgBuiltins.append(getattr(__builtin__, fname)) 265 except AttributeError: 266 continue 267 268 _generatorType = type((y for y in range(1)))
269 270 -def _xml_escape(data):
271 """Escape &, <, >, ", ', etc. in a string of data.""" 272 273 # ampersand must be replaced first 274 from_symbols = '&><"\'' 275 to_symbols = ('&' + s + ';' for s in "amp gt lt quot apos".split()) 276 for from_, to_ in zip(from_symbols, to_symbols): 277 data = data.replace(from_, to_) 278 return data
279 280 alphas = string.ascii_uppercase + string.ascii_lowercase 281 nums = "0123456789" 282 hexnums = nums + "ABCDEFabcdef" 283 alphanums = alphas + nums 284 _bslash = chr(92) 285 printables = "".join(c for c in string.printable if c not in string.whitespace)
286 287 288 -def conditionAsParseAction(fn, message=None, fatal=False):
289 msg = message if message is not None else "failed user-defined condition" 290 exc_type = ParseFatalException if fatal else ParseException 291 fn = _trim_arity(fn) 292 293 @wraps(fn) 294 def pa(s, l, t): 295 if not bool(fn(s, l, t)): 296 raise exc_type(s, l, msg)
297 298 return pa 299
300 -class ParseBaseException(Exception):
301 """base exception class for all parsing runtime exceptions""" 302 # Performance tuning: we construct a *lot* of these, so keep this 303 # constructor as small and fast as possible
304 - def __init__(self, pstr, loc=0, msg=None, elem=None):
305 self.loc = loc 306 if msg is None: 307 self.msg = pstr 308 self.pstr = "" 309 else: 310 self.msg = msg 311 self.pstr = pstr 312 self.parserElement = elem 313 self.args = (pstr, loc, msg)
314 315 @classmethod
316 - def _from_exception(cls, pe):
317 """ 318 internal factory method to simplify creating one type of ParseException 319 from another - avoids having __init__ signature conflicts among subclasses 320 """ 321 return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
322
323 - def __getattr__(self, aname):
324 """supported attributes by name are: 325 - lineno - returns the line number of the exception text 326 - col - returns the column number of the exception text 327 - line - returns the line containing the exception text 328 """ 329 if aname == "lineno": 330 return lineno(self.loc, self.pstr) 331 elif aname in ("col", "column"): 332 return col(self.loc, self.pstr) 333 elif aname == "line": 334 return line(self.loc, self.pstr) 335 else: 336 raise AttributeError(aname)
337
338 - def __str__(self):
339 if self.pstr: 340 if self.loc >= len(self.pstr): 341 foundstr = ', found end of text' 342 else: 343 foundstr = (', found %r' % self.pstr[self.loc:self.loc + 1]).replace(r'\\', '\\') 344 else: 345 foundstr = '' 346 return ("%s%s (at char %d), (line:%d, col:%d)" % 347 (self.msg, foundstr, self.loc, self.lineno, self.column))
348 - def __repr__(self):
349 return _ustr(self)
350 - def markInputline(self, markerString=">!<"):
351 """Extracts the exception line from the input string, and marks 352 the location of the exception with a special symbol. 353 """ 354 line_str = self.line 355 line_column = self.column - 1 356 if markerString: 357 line_str = "".join((line_str[:line_column], 358 markerString, line_str[line_column:])) 359 return line_str.strip()
360 - def __dir__(self):
361 return "lineno col line".split() + dir(type(self))
362
363 -class ParseException(ParseBaseException):
364 """ 365 Exception thrown when parse expressions don't match class; 366 supported attributes by name are: 367 - lineno - returns the line number of the exception text 368 - col - returns the column number of the exception text 369 - line - returns the line containing the exception text 370 371 Example:: 372 373 try: 374 Word(nums).setName("integer").parseString("ABC") 375 except ParseException as pe: 376 print(pe) 377 print("column: {}".format(pe.col)) 378 379 prints:: 380 381 Expected integer (at char 0), (line:1, col:1) 382 column: 1 383 384 """ 385 386 @staticmethod
387 - def explain(exc, depth=16):
388 """ 389 Method to take an exception and translate the Python internal traceback into a list 390 of the pyparsing expressions that caused the exception to be raised. 391 392 Parameters: 393 394 - exc - exception raised during parsing (need not be a ParseException, in support 395 of Python exceptions that might be raised in a parse action) 396 - depth (default=16) - number of levels back in the stack trace to list expression 397 and function names; if None, the full stack trace names will be listed; if 0, only 398 the failing input line, marker, and exception string will be shown 399 400 Returns a multi-line string listing the ParserElements and/or function names in the 401 exception's stack trace. 402 403 Note: the diagnostic output will include string representations of the expressions 404 that failed to parse. These representations will be more helpful if you use `setName` to 405 give identifiable names to your expressions. Otherwise they will use the default string 406 forms, which may be cryptic to read. 407 408 explain() is only supported under Python 3. 409 """ 410 import inspect 411 412 if depth is None: 413 depth = sys.getrecursionlimit() 414 ret = [] 415 if isinstance(exc, ParseBaseException): 416 ret.append(exc.line) 417 ret.append(' ' * (exc.col - 1) + '^') 418 ret.append("{0}: {1}".format(type(exc).__name__, exc)) 419 420 if depth > 0: 421 callers = inspect.getinnerframes(exc.__traceback__, context=depth) 422 seen = set() 423 for i, ff in enumerate(callers[-depth:]): 424 frm = ff[0] 425 426 f_self = frm.f_locals.get('self', None) 427 if isinstance(f_self, ParserElement): 428 if frm.f_code.co_name not in ('parseImpl', '_parseNoCache'): 429 continue 430 if f_self in seen: 431 continue 432 seen.add(f_self) 433 434 self_type = type(f_self) 435 ret.append("{0}.{1} - {2}".format(self_type.__module__, 436 self_type.__name__, 437 f_self)) 438 elif f_self is not None: 439 self_type = type(f_self) 440 ret.append("{0}.{1}".format(self_type.__module__, 441 self_type.__name__)) 442 else: 443 code = frm.f_code 444 if code.co_name in ('wrapper', '<module>'): 445 continue 446 447 ret.append("{0}".format(code.co_name)) 448 449 depth -= 1 450 if not depth: 451 break 452 453 return '\n'.join(ret)
454
455 456 -class ParseFatalException(ParseBaseException):
457 """user-throwable exception thrown when inconsistent parse content 458 is found; stops all parsing immediately""" 459 pass
460
461 -class ParseSyntaxException(ParseFatalException):
462 """just like :class:`ParseFatalException`, but thrown internally 463 when an :class:`ErrorStop<And._ErrorStop>` ('-' operator) indicates 464 that parsing is to stop immediately because an unbacktrackable 465 syntax error has been found. 466 """ 467 pass
468
469 #~ class ReparseException(ParseBaseException): 470 #~ """Experimental class - parse actions can raise this exception to cause 471 #~ pyparsing to reparse the input string: 472 #~ - with a modified input string, and/or 473 #~ - with a modified start location 474 #~ Set the values of the ReparseException in the constructor, and raise the 475 #~ exception in a parse action to cause pyparsing to use the new string/location. 476 #~ Setting the values as None causes no change to be made. 477 #~ """ 478 #~ def __init_( self, newstring, restartLoc ): 479 #~ self.newParseText = newstring 480 #~ self.reparseLoc = restartLoc 481 482 -class RecursiveGrammarException(Exception):
483 """exception thrown by :class:`ParserElement.validate` if the 484 grammar could be improperly recursive 485 """
486 - def __init__(self, parseElementList):
487 self.parseElementTrace = parseElementList
488
489 - def __str__(self):
490 return "RecursiveGrammarException: %s" % self.parseElementTrace
491
492 -class _ParseResultsWithOffset(object):
493 - def __init__(self, p1, p2):
494 self.tup = (p1, p2)
495 - def __getitem__(self, i):
496 return self.tup[i]
497 - def __repr__(self):
498 return repr(self.tup[0])
499 - def setOffset(self, i):
500 self.tup = (self.tup[0], i)
501
502 -class ParseResults(object):
503 """Structured parse results, to provide multiple means of access to 504 the parsed data: 505 506 - as a list (``len(results)``) 507 - by list index (``results[0], results[1]``, etc.) 508 - by attribute (``results.<resultsName>`` - see :class:`ParserElement.setResultsName`) 509 510 Example:: 511 512 integer = Word(nums) 513 date_str = (integer.setResultsName("year") + '/' 514 + integer.setResultsName("month") + '/' 515 + integer.setResultsName("day")) 516 # equivalent form: 517 # date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 518 519 # parseString returns a ParseResults object 520 result = date_str.parseString("1999/12/31") 521 522 def test(s, fn=repr): 523 print("%s -> %s" % (s, fn(eval(s)))) 524 test("list(result)") 525 test("result[0]") 526 test("result['month']") 527 test("result.day") 528 test("'month' in result") 529 test("'minutes' in result") 530 test("result.dump()", str) 531 532 prints:: 533 534 list(result) -> ['1999', '/', '12', '/', '31'] 535 result[0] -> '1999' 536 result['month'] -> '12' 537 result.day -> '31' 538 'month' in result -> True 539 'minutes' in result -> False 540 result.dump() -> ['1999', '/', '12', '/', '31'] 541 - day: 31 542 - month: 12 543 - year: 1999 544 """
545 - def __new__(cls, toklist=None, name=None, asList=True, modal=True):
546 if isinstance(toklist, cls): 547 return toklist 548 retobj = object.__new__(cls) 549 retobj.__doinit = True 550 return retobj
551 552 # Performance tuning: we construct a *lot* of these, so keep this 553 # constructor as small and fast as possible
554 - def __init__(self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance):
555 if self.__doinit: 556 self.__doinit = False 557 self.__name = None 558 self.__parent = None 559 self.__accumNames = {} 560 self.__asList = asList 561 self.__modal = modal 562 if toklist is None: 563 toklist = [] 564 if isinstance(toklist, list): 565 self.__toklist = toklist[:] 566 elif isinstance(toklist, _generatorType): 567 self.__toklist = list(toklist) 568 else: 569 self.__toklist = [toklist] 570 self.__tokdict = dict() 571 572 if name is not None and name: 573 if not modal: 574 self.__accumNames[name] = 0 575 if isinstance(name, int): 576 name = _ustr(name) # will always return a str, but use _ustr for consistency 577 self.__name = name 578 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None, '', [])): 579 if isinstance(toklist, basestring): 580 toklist = [toklist] 581 if asList: 582 if isinstance(toklist, ParseResults): 583 self[name] = _ParseResultsWithOffset(ParseResults(toklist.__toklist), 0) 584 else: 585 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]), 0) 586 self[name].__name = name 587 else: 588 try: 589 self[name] = toklist[0] 590 except (KeyError, TypeError, IndexError): 591 self[name] = toklist
592
593 - def __getitem__(self, i):
594 if isinstance(i, (int, slice)): 595 return self.__toklist[i] 596 else: 597 if i not in self.__accumNames: 598 return self.__tokdict[i][-1][0] 599 else: 600 return ParseResults([v[0] for v in self.__tokdict[i]])
601
602 - def __setitem__(self, k, v, isinstance=isinstance):
603 if isinstance(v, _ParseResultsWithOffset): 604 self.__tokdict[k] = self.__tokdict.get(k, list()) + [v] 605 sub = v[0] 606 elif isinstance(k, (int, slice)): 607 self.__toklist[k] = v 608 sub = v 609 else: 610 self.__tokdict[k] = self.__tokdict.get(k, list()) + [_ParseResultsWithOffset(v, 0)] 611 sub = v 612 if isinstance(sub, ParseResults): 613 sub.__parent = wkref(self)
614
615 - def __delitem__(self, i):
616 if isinstance(i, (int, slice)): 617 mylen = len(self.__toklist) 618 del self.__toklist[i] 619 620 # convert int to slice 621 if isinstance(i, int): 622 if i < 0: 623 i += mylen 624 i = slice(i, i + 1) 625 # get removed indices 626 removed = list(range(*i.indices(mylen))) 627 removed.reverse() 628 # fixup indices in token dictionary 629 for name, occurrences in self.__tokdict.items(): 630 for j in removed: 631 for k, (value, position) in enumerate(occurrences): 632 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 633 else: 634 del self.__tokdict[i]
635
636 - def __contains__(self, k):
637 return k in self.__tokdict
638
639 - def __len__(self):
640 return len(self.__toklist)
641
642 - def __bool__(self):
643 return (not not self.__toklist)
644 __nonzero__ = __bool__ 645
646 - def __iter__(self):
647 return iter(self.__toklist)
648
649 - def __reversed__(self):
650 return iter(self.__toklist[::-1])
651
652 - def _iterkeys(self):
653 if hasattr(self.__tokdict, "iterkeys"): 654 return self.__tokdict.iterkeys() 655 else: 656 return iter(self.__tokdict)
657
658 - def _itervalues(self):
659 return (self[k] for k in self._iterkeys())
660
661 - def _iteritems(self):
662 return ((k, self[k]) for k in self._iterkeys())
663 664 if PY_3: 665 keys = _iterkeys 666 """Returns an iterator of all named result keys.""" 667 668 values = _itervalues 669 """Returns an iterator of all named result values.""" 670 671 items = _iteritems 672 """Returns an iterator of all named result key-value tuples.""" 673 674 else: 675 iterkeys = _iterkeys 676 """Returns an iterator of all named result keys (Python 2.x only).""" 677 678 itervalues = _itervalues 679 """Returns an iterator of all named result values (Python 2.x only).""" 680 681 iteritems = _iteritems 682 """Returns an iterator of all named result key-value tuples (Python 2.x only).""" 683
684 - def keys(self):
685 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x).""" 686 return list(self.iterkeys())
687
688 - def values(self):
689 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x).""" 690 return list(self.itervalues())
691
692 - def items(self):
693 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x).""" 694 return list(self.iteritems())
695
696 - def haskeys(self):
697 """Since keys() returns an iterator, this method is helpful in bypassing 698 code that looks for the existence of any defined results names.""" 699 return bool(self.__tokdict)
700
701 - def pop(self, *args, **kwargs):
702 """ 703 Removes and returns item at specified index (default= ``last``). 704 Supports both ``list`` and ``dict`` semantics for ``pop()``. If 705 passed no argument or an integer argument, it will use ``list`` 706 semantics and pop tokens from the list of parsed tokens. If passed 707 a non-integer argument (most likely a string), it will use ``dict`` 708 semantics and pop the corresponding value from any defined results 709 names. A second default return value argument is supported, just as in 710 ``dict.pop()``. 711 712 Example:: 713 714 def remove_first(tokens): 715 tokens.pop(0) 716 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 717 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321'] 718 719 label = Word(alphas) 720 patt = label("LABEL") + OneOrMore(Word(nums)) 721 print(patt.parseString("AAB 123 321").dump()) 722 723 # Use pop() in a parse action to remove named result (note that corresponding value is not 724 # removed from list form of results) 725 def remove_LABEL(tokens): 726 tokens.pop("LABEL") 727 return tokens 728 patt.addParseAction(remove_LABEL) 729 print(patt.parseString("AAB 123 321").dump()) 730 731 prints:: 732 733 ['AAB', '123', '321'] 734 - LABEL: AAB 735 736 ['AAB', '123', '321'] 737 """ 738 if not args: 739 args = [-1] 740 for k, v in kwargs.items(): 741 if k == 'default': 742 args = (args[0], v) 743 else: 744 raise TypeError("pop() got an unexpected keyword argument '%s'" % k) 745 if (isinstance(args[0], int) 746 or len(args) == 1 747 or args[0] in self): 748 index = args[0] 749 ret = self[index] 750 del self[index] 751 return ret 752 else: 753 defaultvalue = args[1] 754 return defaultvalue
755
756 - def get(self, key, defaultValue=None):
757 """ 758 Returns named result matching the given key, or if there is no 759 such name, then returns the given ``defaultValue`` or ``None`` if no 760 ``defaultValue`` is specified. 761 762 Similar to ``dict.get()``. 763 764 Example:: 765 766 integer = Word(nums) 767 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 768 769 result = date_str.parseString("1999/12/31") 770 print(result.get("year")) # -> '1999' 771 print(result.get("hour", "not specified")) # -> 'not specified' 772 print(result.get("hour")) # -> None 773 """ 774 if key in self: 775 return self[key] 776 else: 777 return defaultValue
778
779 - def insert(self, index, insStr):
780 """ 781 Inserts new element at location index in the list of parsed tokens. 782 783 Similar to ``list.insert()``. 784 785 Example:: 786 787 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 788 789 # use a parse action to insert the parse location in the front of the parsed results 790 def insert_locn(locn, tokens): 791 tokens.insert(0, locn) 792 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321'] 793 """ 794 self.__toklist.insert(index, insStr) 795 # fixup indices in token dictionary 796 for name, occurrences in self.__tokdict.items(): 797 for k, (value, position) in enumerate(occurrences): 798 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
799
800 - def append(self, item):
801 """ 802 Add single element to end of ParseResults list of elements. 803 804 Example:: 805 806 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 807 808 # use a parse action to compute the sum of the parsed integers, and add it to the end 809 def append_sum(tokens): 810 tokens.append(sum(map(int, tokens))) 811 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444] 812 """ 813 self.__toklist.append(item)
814
815 - def extend(self, itemseq):
816 """ 817 Add sequence of elements to end of ParseResults list of elements. 818 819 Example:: 820 821 patt = OneOrMore(Word(alphas)) 822 823 # use a parse action to append the reverse of the matched strings, to make a palindrome 824 def make_palindrome(tokens): 825 tokens.extend(reversed([t[::-1] for t in tokens])) 826 return ''.join(tokens) 827 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' 828 """ 829 if isinstance(itemseq, ParseResults): 830 self.__iadd__(itemseq) 831 else: 832 self.__toklist.extend(itemseq)
833
834 - def clear(self):
835 """ 836 Clear all elements and results names. 837 """ 838 del self.__toklist[:] 839 self.__tokdict.clear()
840
841 - def __getattr__(self, name):
842 try: 843 return self[name] 844 except KeyError: 845 return ""
846
847 - def __add__(self, other):
848 ret = self.copy() 849 ret += other 850 return ret
851
852 - def __iadd__(self, other):
853 if other.__tokdict: 854 offset = len(self.__toklist) 855 addoffset = lambda a: offset if a < 0 else a + offset 856 otheritems = other.__tokdict.items() 857 otherdictitems = [(k, _ParseResultsWithOffset(v[0], addoffset(v[1]))) 858 for k, vlist in otheritems for v in vlist] 859 for k, v in otherdictitems: 860 self[k] = v 861 if isinstance(v[0], ParseResults): 862 v[0].__parent = wkref(self) 863 864 self.__toklist += other.__toklist 865 self.__accumNames.update(other.__accumNames) 866 return self
867
868 - def __radd__(self, other):
869 if isinstance(other, int) and other == 0: 870 # useful for merging many ParseResults using sum() builtin 871 return self.copy() 872 else: 873 # this may raise a TypeError - so be it 874 return other + self
875
876 - def __repr__(self):
877 return "(%s, %s)" % (repr(self.__toklist), repr(self.__tokdict))
878
879 - def __str__(self):
880 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
881
882 - def _asStringList(self, sep=''):
883 out = [] 884 for item in self.__toklist: 885 if out and sep: 886 out.append(sep) 887 if isinstance(item, ParseResults): 888 out += item._asStringList() 889 else: 890 out.append(_ustr(item)) 891 return out
892
893 - def asList(self):
894 """ 895 Returns the parse results as a nested list of matching tokens, all converted to strings. 896 897 Example:: 898 899 patt = OneOrMore(Word(alphas)) 900 result = patt.parseString("sldkj lsdkj sldkj") 901 # even though the result prints in string-like form, it is actually a pyparsing ParseResults 902 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj'] 903 904 # Use asList() to create an actual list 905 result_list = result.asList() 906 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj'] 907 """ 908 return [res.asList() if isinstance(res, ParseResults) else res for res in self.__toklist]
909
910 - def asDict(self):
911 """ 912 Returns the named parse results as a nested dictionary. 913 914 Example:: 915 916 integer = Word(nums) 917 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 918 919 result = date_str.parseString('12/31/1999') 920 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) 921 922 result_dict = result.asDict() 923 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'} 924 925 # even though a ParseResults supports dict-like access, sometime you just need to have a dict 926 import json 927 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable 928 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"} 929 """ 930 if PY_3: 931 item_fn = self.items 932 else: 933 item_fn = self.iteritems 934 935 def toItem(obj): 936 if isinstance(obj, ParseResults): 937 if obj.haskeys(): 938 return obj.asDict() 939 else: 940 return [toItem(v) for v in obj] 941 else: 942 return obj
943 944 return dict((k, toItem(v)) for k, v in item_fn())
945
946 - def copy(self):
947 """ 948 Returns a new copy of a :class:`ParseResults` object. 949 """ 950 ret = ParseResults(self.__toklist) 951 ret.__tokdict = dict(self.__tokdict.items()) 952 ret.__parent = self.__parent 953 ret.__accumNames.update(self.__accumNames) 954 ret.__name = self.__name 955 return ret
956
957 - def asXML(self, doctag=None, namedItemsOnly=False, indent="", formatted=True):
958 """ 959 (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names. 960 """ 961 nl = "\n" 962 out = [] 963 namedItems = dict((v[1], k) for (k, vlist) in self.__tokdict.items() 964 for v in vlist) 965 nextLevelIndent = indent + " " 966 967 # collapse out indents if formatting is not desired 968 if not formatted: 969 indent = "" 970 nextLevelIndent = "" 971 nl = "" 972 973 selfTag = None 974 if doctag is not None: 975 selfTag = doctag 976 else: 977 if self.__name: 978 selfTag = self.__name 979 980 if not selfTag: 981 if namedItemsOnly: 982 return "" 983 else: 984 selfTag = "ITEM" 985 986 out += [nl, indent, "<", selfTag, ">"] 987 988 for i, res in enumerate(self.__toklist): 989 if isinstance(res, ParseResults): 990 if i in namedItems: 991 out += [res.asXML(namedItems[i], 992 namedItemsOnly and doctag is None, 993 nextLevelIndent, 994 formatted)] 995 else: 996 out += [res.asXML(None, 997 namedItemsOnly and doctag is None, 998 nextLevelIndent, 999 formatted)] 1000 else: 1001 # individual token, see if there is a name for it 1002 resTag = None 1003 if i in namedItems: 1004 resTag = namedItems[i] 1005 if not resTag: 1006 if namedItemsOnly: 1007 continue 1008 else: 1009 resTag = "ITEM" 1010 xmlBodyText = _xml_escape(_ustr(res)) 1011 out += [nl, nextLevelIndent, "<", resTag, ">", 1012 xmlBodyText, 1013 "</", resTag, ">"] 1014 1015 out += [nl, indent, "</", selfTag, ">"] 1016 return "".join(out)
1017
1018 - def __lookup(self, sub):
1019 for k, vlist in self.__tokdict.items(): 1020 for v, loc in vlist: 1021 if sub is v: 1022 return k 1023 return None
1024
1025 - def getName(self):
1026 r""" 1027 Returns the results name for this token expression. Useful when several 1028 different expressions might match at a particular location. 1029 1030 Example:: 1031 1032 integer = Word(nums) 1033 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") 1034 house_number_expr = Suppress('#') + Word(nums, alphanums) 1035 user_data = (Group(house_number_expr)("house_number") 1036 | Group(ssn_expr)("ssn") 1037 | Group(integer)("age")) 1038 user_info = OneOrMore(user_data) 1039 1040 result = user_info.parseString("22 111-22-3333 #221B") 1041 for item in result: 1042 print(item.getName(), ':', item[0]) 1043 1044 prints:: 1045 1046 age : 22 1047 ssn : 111-22-3333 1048 house_number : 221B 1049 """ 1050 if self.__name: 1051 return self.__name 1052 elif self.__parent: 1053 par = self.__parent() 1054 if par: 1055 return par.__lookup(self) 1056 else: 1057 return None 1058 elif (len(self) == 1 1059 and len(self.__tokdict) == 1 1060 and next(iter(self.__tokdict.values()))[0][1] in (0, -1)): 1061 return next(iter(self.__tokdict.keys())) 1062 else: 1063 return None
1064
1065 - def dump(self, indent='', full=True, include_list=True, _depth=0):
1066 """ 1067 Diagnostic method for listing out the contents of 1068 a :class:`ParseResults`. Accepts an optional ``indent`` argument so 1069 that this string can be embedded in a nested display of other data. 1070 1071 Example:: 1072 1073 integer = Word(nums) 1074 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1075 1076 result = date_str.parseString('12/31/1999') 1077 print(result.dump()) 1078 1079 prints:: 1080 1081 ['12', '/', '31', '/', '1999'] 1082 - day: 1999 1083 - month: 31 1084 - year: 12 1085 """ 1086 out = [] 1087 NL = '\n' 1088 if include_list: 1089 out.append(indent + _ustr(self.asList())) 1090 else: 1091 out.append('') 1092 1093 if full: 1094 if self.haskeys(): 1095 items = sorted((str(k), v) for k, v in self.items()) 1096 for k, v in items: 1097 if out: 1098 out.append(NL) 1099 out.append("%s%s- %s: " % (indent, (' ' * _depth), k)) 1100 if isinstance(v, ParseResults): 1101 if v: 1102 out.append(v.dump(indent=indent, full=full, include_list=include_list, _depth=_depth + 1)) 1103 else: 1104 out.append(_ustr(v)) 1105 else: 1106 out.append(repr(v)) 1107 elif any(isinstance(vv, ParseResults) for vv in self): 1108 v = self 1109 for i, vv in enumerate(v): 1110 if isinstance(vv, ParseResults): 1111 out.append("\n%s%s[%d]:\n%s%s%s" % (indent, 1112 (' ' * (_depth)), 1113 i, 1114 indent, 1115 (' ' * (_depth + 1)), 1116 vv.dump(indent=indent, 1117 full=full, 1118 include_list=include_list, 1119 _depth=_depth + 1))) 1120 else: 1121 out.append("\n%s%s[%d]:\n%s%s%s" % (indent, 1122 (' ' * (_depth)), 1123 i, 1124 indent, 1125 (' ' * (_depth + 1)), 1126 _ustr(vv))) 1127 1128 return "".join(out)
1129
1130 - def pprint(self, *args, **kwargs):
1131 """ 1132 Pretty-printer for parsed results as a list, using the 1133 `pprint <https://docs.python.org/3/library/pprint.html>`_ module. 1134 Accepts additional positional or keyword args as defined for 1135 `pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ . 1136 1137 Example:: 1138 1139 ident = Word(alphas, alphanums) 1140 num = Word(nums) 1141 func = Forward() 1142 term = ident | num | Group('(' + func + ')') 1143 func <<= ident + Group(Optional(delimitedList(term))) 1144 result = func.parseString("fna a,b,(fnb c,d,200),100") 1145 result.pprint(width=40) 1146 1147 prints:: 1148 1149 ['fna', 1150 ['a', 1151 'b', 1152 ['(', 'fnb', ['c', 'd', '200'], ')'], 1153 '100']] 1154 """ 1155 pprint.pprint(self.asList(), *args, **kwargs)
1156 1157 # add support for pickle protocol
1158 - def __getstate__(self):
1159 return (self.__toklist, 1160 (self.__tokdict.copy(), 1161 self.__parent is not None and self.__parent() or None, 1162 self.__accumNames, 1163 self.__name))
1164
1165 - def __setstate__(self, state):
1166 self.__toklist = state[0] 1167 self.__tokdict, par, inAccumNames, self.__name = state[1] 1168 self.__accumNames = {} 1169 self.__accumNames.update(inAccumNames) 1170 if par is not None: 1171 self.__parent = wkref(par) 1172 else: 1173 self.__parent = None
1174
1175 - def __getnewargs__(self):
1176 return self.__toklist, self.__name, self.__asList, self.__modal
1177
1178 - def __dir__(self):
1179 return dir(type(self)) + list(self.keys())
1180 1181 @classmethod
1182 - def from_dict(cls, other, name=None):
1183 """ 1184 Helper classmethod to construct a ParseResults from a dict, preserving the 1185 name-value relations as results names. If an optional 'name' argument is 1186 given, a nested ParseResults will be returned 1187 """ 1188 def is_iterable(obj): 1189 try: 1190 iter(obj) 1191 except Exception: 1192 return False 1193 else: 1194 if PY_3: 1195 return not isinstance(obj, (str, bytes)) 1196 else: 1197 return not isinstance(obj, basestring)
1198 1199 ret = cls([]) 1200 for k, v in other.items(): 1201 if isinstance(v, Mapping): 1202 ret += cls.from_dict(v, name=k) 1203 else: 1204 ret += cls([v], name=k, asList=is_iterable(v)) 1205 if name is not None: 1206 ret = cls([ret], name=name) 1207 return ret 1208 1209 MutableMapping.register(ParseResults)
1210 1211 -def col (loc, strg):
1212 """Returns current column within a string, counting newlines as line separators. 1213 The first column is number 1. 1214 1215 Note: the default parsing behavior is to expand tabs in the input string 1216 before starting the parsing process. See 1217 :class:`ParserElement.parseString` for more 1218 information on parsing strings containing ``<TAB>`` s, and suggested 1219 methods to maintain a consistent view of the parsed string, the parse 1220 location, and line and column positions within the parsed string. 1221 """ 1222 s = strg 1223 return 1 if 0 < loc < len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
1224
1225 -def lineno(loc, strg):
1226 """Returns current line number within a string, counting newlines as line separators. 1227 The first line is number 1. 1228 1229 Note - the default parsing behavior is to expand tabs in the input string 1230 before starting the parsing process. See :class:`ParserElement.parseString` 1231 for more information on parsing strings containing ``<TAB>`` s, and 1232 suggested methods to maintain a consistent view of the parsed string, the 1233 parse location, and line and column positions within the parsed string. 1234 """ 1235 return strg.count("\n", 0, loc) + 1
1236
1237 -def line(loc, strg):
1238 """Returns the line of text containing loc within a string, counting newlines as line separators. 1239 """ 1240 lastCR = strg.rfind("\n", 0, loc) 1241 nextCR = strg.find("\n", loc) 1242 if nextCR >= 0: 1243 return strg[lastCR + 1:nextCR] 1244 else: 1245 return strg[lastCR + 1:]
1246
1247 -def _defaultStartDebugAction(instring, loc, expr):
1248 print(("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % (lineno(loc, instring), col(loc, instring))))
1249
1250 -def _defaultSuccessDebugAction(instring, startloc, endloc, expr, toks):
1251 print("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
1252
1253 -def _defaultExceptionDebugAction(instring, loc, expr, exc):
1254 print("Exception raised:" + _ustr(exc))
1255
1256 -def nullDebugAction(*args):
1257 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 1258 pass
1259 1260 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs 1261 #~ 'decorator to trim function calls to match the arity of the target' 1262 #~ def _trim_arity(func, maxargs=3): 1263 #~ if func in singleArgBuiltins: 1264 #~ return lambda s,l,t: func(t) 1265 #~ limit = 0 1266 #~ foundArity = False 1267 #~ def wrapper(*args): 1268 #~ nonlocal limit,foundArity 1269 #~ while 1: 1270 #~ try: 1271 #~ ret = func(*args[limit:]) 1272 #~ foundArity = True 1273 #~ return ret 1274 #~ except TypeError: 1275 #~ if limit == maxargs or foundArity: 1276 #~ raise 1277 #~ limit += 1 1278 #~ continue 1279 #~ return wrapper 1280 1281 # this version is Python 2.x-3.x cross-compatible 1282 'decorator to trim function calls to match the arity of the target'
1283 -def _trim_arity(func, maxargs=2):
1284 if func in singleArgBuiltins: 1285 return lambda s, l, t: func(t) 1286 limit = [0] 1287 foundArity = [False] 1288 1289 # traceback return data structure changed in Py3.5 - normalize back to plain tuples 1290 if system_version[:2] >= (3, 5): 1291 def extract_stack(limit=0): 1292 # special handling for Python 3.5.0 - extra deep call stack by 1 1293 offset = -3 if system_version == (3, 5, 0) else -2 1294 frame_summary = traceback.extract_stack(limit=-offset + limit - 1)[offset] 1295 return [frame_summary[:2]]
1296 def extract_tb(tb, limit=0): 1297 frames = traceback.extract_tb(tb, limit=limit) 1298 frame_summary = frames[-1] 1299 return [frame_summary[:2]] 1300 else: 1301 extract_stack = traceback.extract_stack 1302 extract_tb = traceback.extract_tb 1303 1304 # synthesize what would be returned by traceback.extract_stack at the call to 1305 # user's parse action 'func', so that we don't incur call penalty at parse time 1306 1307 LINE_DIFF = 6 1308 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 1309 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 1310 this_line = extract_stack(limit=2)[-1] 1311 pa_call_line_synth = (this_line[0], this_line[1] + LINE_DIFF) 1312 1313 def wrapper(*args): 1314 while 1: 1315 try: 1316 ret = func(*args[limit[0]:]) 1317 foundArity[0] = True 1318 return ret 1319 except TypeError: 1320 # re-raise TypeErrors if they did not come from our arity testing 1321 if foundArity[0]: 1322 raise 1323 else: 1324 try: 1325 tb = sys.exc_info()[-1] 1326 if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth: 1327 raise 1328 finally: 1329 try: 1330 del tb 1331 except NameError: 1332 pass 1333 1334 if limit[0] <= maxargs: 1335 limit[0] += 1 1336 continue 1337 raise 1338 1339 # copy func name to wrapper for sensible debug output 1340 func_name = "<parse action>" 1341 try: 1342 func_name = getattr(func, '__name__', 1343 getattr(func, '__class__').__name__) 1344 except Exception: 1345 func_name = str(func) 1346 wrapper.__name__ = func_name 1347 1348 return wrapper 1349
1350 1351 -class ParserElement(object):
1352 """Abstract base level parser element class.""" 1353 DEFAULT_WHITE_CHARS = " \n\t\r" 1354 verbose_stacktrace = False 1355 1356 @staticmethod
1357 - def setDefaultWhitespaceChars(chars):
1358 r""" 1359 Overrides the default whitespace chars 1360 1361 Example:: 1362 1363 # default whitespace chars are space, <TAB> and newline 1364 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] 1365 1366 # change to just treat newline as significant 1367 ParserElement.setDefaultWhitespaceChars(" \t") 1368 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def'] 1369 """ 1370 ParserElement.DEFAULT_WHITE_CHARS = chars
1371 1372 @staticmethod
1373 - def inlineLiteralsUsing(cls):
1374 """ 1375 Set class to be used for inclusion of string literals into a parser. 1376 1377 Example:: 1378 1379 # default literal class used is Literal 1380 integer = Word(nums) 1381 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1382 1383 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 1384 1385 1386 # change to Suppress 1387 ParserElement.inlineLiteralsUsing(Suppress) 1388 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1389 1390 date_str.parseString("1999/12/31") # -> ['1999', '12', '31'] 1391 """ 1392 ParserElement._literalStringClass = cls
1393
1394 - def __init__(self, savelist=False):
1395 self.parseAction = list() 1396 self.failAction = None 1397 # ~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 1398 self.strRepr = None 1399 self.resultsName = None 1400 self.saveAsList = savelist 1401 self.skipWhitespace = True 1402 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 1403 self.copyDefaultWhiteChars = True 1404 self.mayReturnEmpty = False # used when checking for left-recursion 1405 self.keepTabs = False 1406 self.ignoreExprs = list() 1407 self.debug = False 1408 self.streamlined = False 1409 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 1410 self.errmsg = "" 1411 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 1412 self.debugActions = (None, None, None) # custom debug actions 1413 self.re = None 1414 self.callPreparse = True # used to avoid redundant calls to preParse 1415 self.callDuringTry = False
1416
1417 - def copy(self):
1418 """ 1419 Make a copy of this :class:`ParserElement`. Useful for defining 1420 different parse actions for the same parsing pattern, using copies of 1421 the original parse element. 1422 1423 Example:: 1424 1425 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1426 integerK = integer.copy().addParseAction(lambda toks: toks[0] * 1024) + Suppress("K") 1427 integerM = integer.copy().addParseAction(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 1428 1429 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M")) 1430 1431 prints:: 1432 1433 [5120, 100, 655360, 268435456] 1434 1435 Equivalent form of ``expr.copy()`` is just ``expr()``:: 1436 1437 integerM = integer().addParseAction(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 1438 """ 1439 cpy = copy.copy(self) 1440 cpy.parseAction = self.parseAction[:] 1441 cpy.ignoreExprs = self.ignoreExprs[:] 1442 if self.copyDefaultWhiteChars: 1443 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 1444 return cpy
1445
1446 - def setName(self, name):
1447 """ 1448 Define name for this expression, makes debugging and exception messages clearer. 1449 1450 Example:: 1451 1452 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1) 1453 Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) 1454 """ 1455 self.name = name 1456 self.errmsg = "Expected " + self.name 1457 if __diag__.enable_debug_on_named_expressions: 1458 self.setDebug() 1459 return self
1460
1461 - def setResultsName(self, name, listAllMatches=False):
1462 """ 1463 Define name for referencing matching tokens as a nested attribute 1464 of the returned parse results. 1465 NOTE: this returns a *copy* of the original :class:`ParserElement` object; 1466 this is so that the client can define a basic element, such as an 1467 integer, and reference it in multiple places with different names. 1468 1469 You can also set results names using the abbreviated syntax, 1470 ``expr("name")`` in place of ``expr.setResultsName("name")`` 1471 - see :class:`__call__`. 1472 1473 Example:: 1474 1475 date_str = (integer.setResultsName("year") + '/' 1476 + integer.setResultsName("month") + '/' 1477 + integer.setResultsName("day")) 1478 1479 # equivalent form: 1480 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1481 """ 1482 return self._setResultsName(name, listAllMatches)
1483
1484 - def _setResultsName(self, name, listAllMatches=False):
1485 newself = self.copy() 1486 if name.endswith("*"): 1487 name = name[:-1] 1488 listAllMatches = True 1489 newself.resultsName = name 1490 newself.modalResults = not listAllMatches 1491 return newself
1492
1493 - def setBreak(self, breakFlag=True):
1494 """Method to invoke the Python pdb debugger when this element is 1495 about to be parsed. Set ``breakFlag`` to True to enable, False to 1496 disable. 1497 """ 1498 if breakFlag: 1499 _parseMethod = self._parse 1500 def breaker(instring, loc, doActions=True, callPreParse=True): 1501 import pdb 1502 # this call to pdb.set_trace() is intentional, not a checkin error 1503 pdb.set_trace() 1504 return _parseMethod(instring, loc, doActions, callPreParse)
1505 breaker._originalParseMethod = _parseMethod 1506 self._parse = breaker 1507 else: 1508 if hasattr(self._parse, "_originalParseMethod"): 1509 self._parse = self._parse._originalParseMethod 1510 return self
1511
1512 - def setParseAction(self, *fns, **kwargs):
1513 """ 1514 Define one or more actions to perform when successfully matching parse element definition. 1515 Parse action fn is a callable method with 0-3 arguments, called as ``fn(s, loc, toks)`` , 1516 ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where: 1517 1518 - s = the original string being parsed (see note below) 1519 - loc = the location of the matching substring 1520 - toks = a list of the matched tokens, packaged as a :class:`ParseResults` object 1521 1522 If the functions in fns modify the tokens, they can return them as the return 1523 value from fn, and the modified list of tokens will replace the original. 1524 Otherwise, fn does not need to return any value. 1525 1526 If None is passed as the parse action, all previously added parse actions for this 1527 expression are cleared. 1528 1529 Optional keyword arguments: 1530 - callDuringTry = (default= ``False``) indicate if parse action should be run during lookaheads and alternate testing 1531 1532 Note: the default parsing behavior is to expand tabs in the input string 1533 before starting the parsing process. See :class:`parseString for more 1534 information on parsing strings containing ``<TAB>`` s, and suggested 1535 methods to maintain a consistent view of the parsed string, the parse 1536 location, and line and column positions within the parsed string. 1537 1538 Example:: 1539 1540 integer = Word(nums) 1541 date_str = integer + '/' + integer + '/' + integer 1542 1543 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 1544 1545 # use parse action to convert to ints at parse time 1546 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1547 date_str = integer + '/' + integer + '/' + integer 1548 1549 # note that integer fields are now ints, not strings 1550 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31] 1551 """ 1552 if list(fns) == [None,]: 1553 self.parseAction = [] 1554 else: 1555 if not all(callable(fn) for fn in fns): 1556 raise TypeError("parse actions must be callable") 1557 self.parseAction = list(map(_trim_arity, list(fns))) 1558 self.callDuringTry = kwargs.get("callDuringTry", False) 1559 return self
1560
1561 - def addParseAction(self, *fns, **kwargs):
1562 """ 1563 Add one or more parse actions to expression's list of parse actions. See :class:`setParseAction`. 1564 1565 See examples in :class:`copy`. 1566 """ 1567 self.parseAction += list(map(_trim_arity, list(fns))) 1568 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 1569 return self
1570
1571 - def addCondition(self, *fns, **kwargs):
1572 """Add a boolean predicate function to expression's list of parse actions. See 1573 :class:`setParseAction` for function call signatures. Unlike ``setParseAction``, 1574 functions passed to ``addCondition`` need to return boolean success/fail of the condition. 1575 1576 Optional keyword arguments: 1577 - message = define a custom message to be used in the raised exception 1578 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException 1579 1580 Example:: 1581 1582 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1583 year_int = integer.copy() 1584 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") 1585 date_str = year_int + '/' + integer + '/' + integer 1586 1587 result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1) 1588 """ 1589 for fn in fns: 1590 self.parseAction.append(conditionAsParseAction(fn, message=kwargs.get('message'), 1591 fatal=kwargs.get('fatal', False))) 1592 1593 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 1594 return self
1595
1596 - def setFailAction(self, fn):
1597 """Define action to perform if parsing fails at this expression. 1598 Fail acton fn is a callable function that takes the arguments 1599 ``fn(s, loc, expr, err)`` where: 1600 - s = string being parsed 1601 - loc = location where expression match was attempted and failed 1602 - expr = the parse expression that failed 1603 - err = the exception thrown 1604 The function returns no value. It may throw :class:`ParseFatalException` 1605 if it is desired to stop parsing immediately.""" 1606 self.failAction = fn 1607 return self
1608
1609 - def _skipIgnorables(self, instring, loc):
1610 exprsFound = True 1611 while exprsFound: 1612 exprsFound = False 1613 for e in self.ignoreExprs: 1614 try: 1615 while 1: 1616 loc, dummy = e._parse(instring, loc) 1617 exprsFound = True 1618 except ParseException: 1619 pass 1620 return loc
1621
1622 - def preParse(self, instring, loc):
1623 if self.ignoreExprs: 1624 loc = self._skipIgnorables(instring, loc) 1625 1626 if self.skipWhitespace: 1627 wt = self.whiteChars 1628 instrlen = len(instring) 1629 while loc < instrlen and instring[loc] in wt: 1630 loc += 1 1631 1632 return loc
1633
1634 - def parseImpl(self, instring, loc, doActions=True):
1635 return loc, []
1636
1637 - def postParse(self, instring, loc, tokenlist):
1638 return tokenlist
1639 1640 # ~ @profile
1641 - def _parseNoCache(self, instring, loc, doActions=True, callPreParse=True):
1642 TRY, MATCH, FAIL = 0, 1, 2 1643 debugging = (self.debug) # and doActions) 1644 1645 if debugging or self.failAction: 1646 # ~ print ("Match", self, "at loc", loc, "(%d, %d)" % (lineno(loc, instring), col(loc, instring))) 1647 if self.debugActions[TRY]: 1648 self.debugActions[TRY](instring, loc, self) 1649 try: 1650 if callPreParse and self.callPreparse: 1651 preloc = self.preParse(instring, loc) 1652 else: 1653 preloc = loc 1654 tokensStart = preloc 1655 if self.mayIndexError or preloc >= len(instring): 1656 try: 1657 loc, tokens = self.parseImpl(instring, preloc, doActions) 1658 except IndexError: 1659 raise ParseException(instring, len(instring), self.errmsg, self) 1660 else: 1661 loc, tokens = self.parseImpl(instring, preloc, doActions) 1662 except Exception as err: 1663 # ~ print ("Exception raised:", err) 1664 if self.debugActions[FAIL]: 1665 self.debugActions[FAIL](instring, tokensStart, self, err) 1666 if self.failAction: 1667 self.failAction(instring, tokensStart, self, err) 1668 raise 1669 else: 1670 if callPreParse and self.callPreparse: 1671 preloc = self.preParse(instring, loc) 1672 else: 1673 preloc = loc 1674 tokensStart = preloc 1675 if self.mayIndexError or preloc >= len(instring): 1676 try: 1677 loc, tokens = self.parseImpl(instring, preloc, doActions) 1678 except IndexError: 1679 raise ParseException(instring, len(instring), self.errmsg, self) 1680 else: 1681 loc, tokens = self.parseImpl(instring, preloc, doActions) 1682 1683 tokens = self.postParse(instring, loc, tokens) 1684 1685 retTokens = ParseResults(tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults) 1686 if self.parseAction and (doActions or self.callDuringTry): 1687 if debugging: 1688 try: 1689 for fn in self.parseAction: 1690 try: 1691 tokens = fn(instring, tokensStart, retTokens) 1692 except IndexError as parse_action_exc: 1693 exc = ParseException("exception raised in parse action") 1694 exc.__cause__ = parse_action_exc 1695 raise exc 1696 1697 if tokens is not None and tokens is not retTokens: 1698 retTokens = ParseResults(tokens, 1699 self.resultsName, 1700 asList=self.saveAsList and isinstance(tokens, (ParseResults, list)), 1701 modal=self.modalResults) 1702 except Exception as err: 1703 # ~ print "Exception raised in user parse action:", err 1704 if self.debugActions[FAIL]: 1705 self.debugActions[FAIL](instring, tokensStart, self, err) 1706 raise 1707 else: 1708 for fn in self.parseAction: 1709 try: 1710 tokens = fn(instring, tokensStart, retTokens) 1711 except IndexError as parse_action_exc: 1712 exc = ParseException("exception raised in parse action") 1713 exc.__cause__ = parse_action_exc 1714 raise exc 1715 1716 if tokens is not None and tokens is not retTokens: 1717 retTokens = ParseResults(tokens, 1718 self.resultsName, 1719 asList=self.saveAsList and isinstance(tokens, (ParseResults, list)), 1720 modal=self.modalResults) 1721 if debugging: 1722 # ~ print ("Matched", self, "->", retTokens.asList()) 1723 if self.debugActions[MATCH]: 1724 self.debugActions[MATCH](instring, tokensStart, loc, self, retTokens) 1725 1726 return loc, retTokens
1727
1728 - def tryParse(self, instring, loc):
1729 try: 1730 return self._parse(instring, loc, doActions=False)[0] 1731 except ParseFatalException: 1732 raise ParseException(instring, loc, self.errmsg, self)
1733
1734 - def canParseNext(self, instring, loc):
1735 try: 1736 self.tryParse(instring, loc) 1737 except (ParseException, IndexError): 1738 return False 1739 else: 1740 return True
1741
1742 - class _UnboundedCache(object):
1743 - def __init__(self):
1744 cache = {} 1745 self.not_in_cache = not_in_cache = object() 1746 1747 def get(self, key): 1748 return cache.get(key, not_in_cache)
1749 1750 def set(self, key, value): 1751 cache[key] = value
1752 1753 def clear(self): 1754 cache.clear() 1755 1756 def cache_len(self): 1757 return len(cache) 1758 1759 self.get = types.MethodType(get, self) 1760 self.set = types.MethodType(set, self) 1761 self.clear = types.MethodType(clear, self) 1762 self.__len__ = types.MethodType(cache_len, self) 1763 1764 if _OrderedDict is not None:
1765 - class _FifoCache(object):
1766 - def __init__(self, size):
1767 self.not_in_cache = not_in_cache = object() 1768 1769 cache = _OrderedDict() 1770 1771 def get(self, key): 1772 return cache.get(key, not_in_cache)
1773 1774 def set(self, key, value): 1775 cache[key] = value 1776 while len(cache) > size: 1777 try: 1778 cache.popitem(False) 1779 except KeyError: 1780 pass
1781 1782 def clear(self): 1783 cache.clear() 1784 1785 def cache_len(self): 1786 return len(cache) 1787 1788 self.get = types.MethodType(get, self) 1789 self.set = types.MethodType(set, self) 1790 self.clear = types.MethodType(clear, self) 1791 self.__len__ = types.MethodType(cache_len, self) 1792 1793 else:
1794 - class _FifoCache(object):
1795 - def __init__(self, size):
1796 self.not_in_cache = not_in_cache = object() 1797 1798 cache = {} 1799 key_fifo = collections.deque([], size) 1800 1801 def get(self, key): 1802 return cache.get(key, not_in_cache)
1803 1804 def set(self, key, value): 1805 cache[key] = value 1806 while len(key_fifo) > size: 1807 cache.pop(key_fifo.popleft(), None) 1808 key_fifo.append(key)
1809 1810 def clear(self): 1811 cache.clear() 1812 key_fifo.clear() 1813 1814 def cache_len(self): 1815 return len(cache) 1816 1817 self.get = types.MethodType(get, self) 1818 self.set = types.MethodType(set, self) 1819 self.clear = types.MethodType(clear, self) 1820 self.__len__ = types.MethodType(cache_len, self) 1821 1822 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1823 packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail 1824 packrat_cache_lock = RLock() 1825 packrat_cache_stats = [0, 0] 1826 1827 # this method gets repeatedly called during backtracking with the same arguments - 1828 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1829 - def _parseCache(self, instring, loc, doActions=True, callPreParse=True):
1830 HIT, MISS = 0, 1 1831 lookup = (self, instring, loc, callPreParse, doActions) 1832 with ParserElement.packrat_cache_lock: 1833 cache = ParserElement.packrat_cache 1834 value = cache.get(lookup) 1835 if value is cache.not_in_cache: 1836 ParserElement.packrat_cache_stats[MISS] += 1 1837 try: 1838 value = self._parseNoCache(instring, loc, doActions, callPreParse) 1839 except ParseBaseException as pe: 1840 # cache a copy of the exception, without the traceback 1841 cache.set(lookup, pe.__class__(*pe.args)) 1842 raise 1843 else: 1844 cache.set(lookup, (value[0], value[1].copy())) 1845 return value 1846 else: 1847 ParserElement.packrat_cache_stats[HIT] += 1 1848 if isinstance(value, Exception): 1849 raise value 1850 return value[0], value[1].copy()
1851 1852 _parse = _parseNoCache 1853 1854 @staticmethod
1855 - def resetCache():
1856 ParserElement.packrat_cache.clear() 1857 ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
1858 1859 _packratEnabled = False 1860 @staticmethod
1861 - def enablePackrat(cache_size_limit=128):
1862 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1863 Repeated parse attempts at the same string location (which happens 1864 often in many complex grammars) can immediately return a cached value, 1865 instead of re-executing parsing/validating code. Memoizing is done of 1866 both valid results and parsing exceptions. 1867 1868 Parameters: 1869 1870 - cache_size_limit - (default= ``128``) - if an integer value is provided 1871 will limit the size of the packrat cache; if None is passed, then 1872 the cache size will be unbounded; if 0 is passed, the cache will 1873 be effectively disabled. 1874 1875 This speedup may break existing programs that use parse actions that 1876 have side-effects. For this reason, packrat parsing is disabled when 1877 you first import pyparsing. To activate the packrat feature, your 1878 program must call the class method :class:`ParserElement.enablePackrat`. 1879 For best results, call ``enablePackrat()`` immediately after 1880 importing pyparsing. 1881 1882 Example:: 1883 1884 import pyparsing 1885 pyparsing.ParserElement.enablePackrat() 1886 """ 1887 if not ParserElement._packratEnabled: 1888 ParserElement._packratEnabled = True 1889 if cache_size_limit is None: 1890 ParserElement.packrat_cache = ParserElement._UnboundedCache() 1891 else: 1892 ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit) 1893 ParserElement._parse = ParserElement._parseCache
1894
1895 - def parseString(self, instring, parseAll=False):
1896 """ 1897 Execute the parse expression with the given string. 1898 This is the main interface to the client code, once the complete 1899 expression has been built. 1900 1901 Returns the parsed data as a :class:`ParseResults` object, which may be 1902 accessed as a list, or as a dict or object with attributes if the given parser 1903 includes results names. 1904 1905 If you want the grammar to require that the entire input string be 1906 successfully parsed, then set ``parseAll`` to True (equivalent to ending 1907 the grammar with ``StringEnd()``). 1908 1909 Note: ``parseString`` implicitly calls ``expandtabs()`` on the input string, 1910 in order to report proper column numbers in parse actions. 1911 If the input string contains tabs and 1912 the grammar uses parse actions that use the ``loc`` argument to index into the 1913 string being parsed, you can ensure you have a consistent view of the input 1914 string by: 1915 1916 - calling ``parseWithTabs`` on your grammar before calling ``parseString`` 1917 (see :class:`parseWithTabs`) 1918 - define your parse action using the full ``(s, loc, toks)`` signature, and 1919 reference the input string using the parse action's ``s`` argument 1920 - explictly expand the tabs in your input string before calling 1921 ``parseString`` 1922 1923 Example:: 1924 1925 Word('a').parseString('aaaaabaaa') # -> ['aaaaa'] 1926 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text 1927 """ 1928 ParserElement.resetCache() 1929 if not self.streamlined: 1930 self.streamline() 1931 # ~ self.saveAsList = True 1932 for e in self.ignoreExprs: 1933 e.streamline() 1934 if not self.keepTabs: 1935 instring = instring.expandtabs() 1936 try: 1937 loc, tokens = self._parse(instring, 0) 1938 if parseAll: 1939 loc = self.preParse(instring, loc) 1940 se = Empty() + StringEnd() 1941 se._parse(instring, loc) 1942 except ParseBaseException as exc: 1943 if ParserElement.verbose_stacktrace: 1944 raise 1945 else: 1946 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1947 raise exc 1948 else: 1949 return tokens
1950
1951 - def scanString(self, instring, maxMatches=_MAX_INT, overlap=False):
1952 """ 1953 Scan the input string for expression matches. Each match will return the 1954 matching tokens, start location, and end location. May be called with optional 1955 ``maxMatches`` argument, to clip scanning after 'n' matches are found. If 1956 ``overlap`` is specified, then overlapping matches will be reported. 1957 1958 Note that the start and end locations are reported relative to the string 1959 being parsed. See :class:`parseString` for more information on parsing 1960 strings with embedded tabs. 1961 1962 Example:: 1963 1964 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 1965 print(source) 1966 for tokens, start, end in Word(alphas).scanString(source): 1967 print(' '*start + '^'*(end-start)) 1968 print(' '*start + tokens[0]) 1969 1970 prints:: 1971 1972 sldjf123lsdjjkf345sldkjf879lkjsfd987 1973 ^^^^^ 1974 sldjf 1975 ^^^^^^^ 1976 lsdjjkf 1977 ^^^^^^ 1978 sldkjf 1979 ^^^^^^ 1980 lkjsfd 1981 """ 1982 if not self.streamlined: 1983 self.streamline() 1984 for e in self.ignoreExprs: 1985 e.streamline() 1986 1987 if not self.keepTabs: 1988 instring = _ustr(instring).expandtabs() 1989 instrlen = len(instring) 1990 loc = 0 1991 preparseFn = self.preParse 1992 parseFn = self._parse 1993 ParserElement.resetCache() 1994 matches = 0 1995 try: 1996 while loc <= instrlen and matches < maxMatches: 1997 try: 1998 preloc = preparseFn(instring, loc) 1999 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) 2000 except ParseException: 2001 loc = preloc + 1 2002 else: 2003 if nextLoc > loc: 2004 matches += 1 2005 yield tokens, preloc, nextLoc 2006 if overlap: 2007 nextloc = preparseFn(instring, loc) 2008 if nextloc > loc: 2009 loc = nextLoc 2010 else: 2011 loc += 1 2012 else: 2013 loc = nextLoc 2014 else: 2015 loc = preloc + 1 2016 except ParseBaseException as exc: 2017 if ParserElement.verbose_stacktrace: 2018 raise 2019 else: 2020 # catch and re-raise exception from here, clears out pyparsing internal stack trace 2021 raise exc
2022
2023 - def transformString(self, instring):
2024 """ 2025 Extension to :class:`scanString`, to modify matching text with modified tokens that may 2026 be returned from a parse action. To use ``transformString``, define a grammar and 2027 attach a parse action to it that modifies the returned token list. 2028 Invoking ``transformString()`` on a target string will then scan for matches, 2029 and replace the matched text patterns according to the logic in the parse 2030 action. ``transformString()`` returns the resulting transformed string. 2031 2032 Example:: 2033 2034 wd = Word(alphas) 2035 wd.setParseAction(lambda toks: toks[0].title()) 2036 2037 print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york.")) 2038 2039 prints:: 2040 2041 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. 2042 """ 2043 out = [] 2044 lastE = 0 2045 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 2046 # keep string locs straight between transformString and scanString 2047 self.keepTabs = True 2048 try: 2049 for t, s, e in self.scanString(instring): 2050 out.append(instring[lastE:s]) 2051 if t: 2052 if isinstance(t, ParseResults): 2053 out += t.asList() 2054 elif isinstance(t, list): 2055 out += t 2056 else: 2057 out.append(t) 2058 lastE = e 2059 out.append(instring[lastE:]) 2060 out = [o for o in out if o] 2061 return "".join(map(_ustr, _flatten(out))) 2062 except ParseBaseException as exc: 2063 if ParserElement.verbose_stacktrace: 2064 raise 2065 else: 2066 # catch and re-raise exception from here, clears out pyparsing internal stack trace 2067 raise exc
2068
2069 - def searchString(self, instring, maxMatches=_MAX_INT):
2070 """ 2071 Another extension to :class:`scanString`, simplifying the access to the tokens found 2072 to match the given parse expression. May be called with optional 2073 ``maxMatches`` argument, to clip searching after 'n' matches are found. 2074 2075 Example:: 2076 2077 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters 2078 cap_word = Word(alphas.upper(), alphas.lower()) 2079 2080 print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")) 2081 2082 # the sum() builtin can be used to merge results into a single ParseResults object 2083 print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))) 2084 2085 prints:: 2086 2087 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] 2088 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] 2089 """ 2090 try: 2091 return ParseResults([t for t, s, e in self.scanString(instring, maxMatches)]) 2092 except ParseBaseException as exc: 2093 if ParserElement.verbose_stacktrace: 2094 raise 2095 else: 2096 # catch and re-raise exception from here, clears out pyparsing internal stack trace 2097 raise exc
2098
2099 - def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
2100 """ 2101 Generator method to split a string using the given expression as a separator. 2102 May be called with optional ``maxsplit`` argument, to limit the number of splits; 2103 and the optional ``includeSeparators`` argument (default= ``False``), if the separating 2104 matching text should be included in the split results. 2105 2106 Example:: 2107 2108 punc = oneOf(list(".,;:/-!?")) 2109 print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) 2110 2111 prints:: 2112 2113 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 2114 """ 2115 splits = 0 2116 last = 0 2117 for t, s, e in self.scanString(instring, maxMatches=maxsplit): 2118 yield instring[last:s] 2119 if includeSeparators: 2120 yield t[0] 2121 last = e 2122 yield instring[last:]
2123
2124 - def __add__(self, other):
2125 """ 2126 Implementation of + operator - returns :class:`And`. Adding strings to a ParserElement 2127 converts them to :class:`Literal`s by default. 2128 2129 Example:: 2130 2131 greet = Word(alphas) + "," + Word(alphas) + "!" 2132 hello = "Hello, World!" 2133 print (hello, "->", greet.parseString(hello)) 2134 2135 prints:: 2136 2137 Hello, World! -> ['Hello', ',', 'World', '!'] 2138 2139 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`. 2140 2141 Literal('start') + ... + Literal('end') 2142 2143 is equivalent to: 2144 2145 Literal('start') + SkipTo('end')("_skipped*") + Literal('end') 2146 2147 Note that the skipped text is returned with '_skipped' as a results name, 2148 and to support having multiple skips in the same parser, the value returned is 2149 a list of all skipped text. 2150 """ 2151 if other is Ellipsis: 2152 return _PendingSkip(self) 2153 2154 if isinstance(other, basestring): 2155 other = self._literalStringClass(other) 2156 if not isinstance(other, ParserElement): 2157 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2158 SyntaxWarning, stacklevel=2) 2159 return None 2160 return And([self, other])
2161
2162 - def __radd__(self, other):
2163 """ 2164 Implementation of + operator when left operand is not a :class:`ParserElement` 2165 """ 2166 if other is Ellipsis: 2167 return SkipTo(self)("_skipped*") + self 2168 2169 if isinstance(other, basestring): 2170 other = self._literalStringClass(other) 2171 if not isinstance(other, ParserElement): 2172 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2173 SyntaxWarning, stacklevel=2) 2174 return None 2175 return other + self
2176
2177 - def __sub__(self, other):
2178 """ 2179 Implementation of - operator, returns :class:`And` with error stop 2180 """ 2181 if isinstance(other, basestring): 2182 other = self._literalStringClass(other) 2183 if not isinstance(other, ParserElement): 2184 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2185 SyntaxWarning, stacklevel=2) 2186 return None 2187 return self + And._ErrorStop() + other
2188
2189 - def __rsub__(self, other):
2190 """ 2191 Implementation of - operator when left operand is not a :class:`ParserElement` 2192 """ 2193 if isinstance(other, basestring): 2194 other = self._literalStringClass(other) 2195 if not isinstance(other, ParserElement): 2196 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2197 SyntaxWarning, stacklevel=2) 2198 return None 2199 return other - self
2200
2201 - def __mul__(self, other):
2202 """ 2203 Implementation of * operator, allows use of ``expr * 3`` in place of 2204 ``expr + expr + expr``. Expressions may also me multiplied by a 2-integer 2205 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples 2206 may also include ``None`` as in: 2207 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent 2208 to ``expr*n + ZeroOrMore(expr)`` 2209 (read as "at least n instances of ``expr``") 2210 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)`` 2211 (read as "0 to n instances of ``expr``") 2212 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)`` 2213 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)`` 2214 2215 Note that ``expr*(None, n)`` does not raise an exception if 2216 more than n exprs exist in the input stream; that is, 2217 ``expr*(None, n)`` does not enforce a maximum number of expr 2218 occurrences. If this behavior is desired, then write 2219 ``expr*(None, n) + ~expr`` 2220 """ 2221 if other is Ellipsis: 2222 other = (0, None) 2223 elif isinstance(other, tuple) and other[:1] == (Ellipsis,): 2224 other = ((0, ) + other[1:] + (None,))[:2] 2225 2226 if isinstance(other, int): 2227 minElements, optElements = other, 0 2228 elif isinstance(other, tuple): 2229 other = tuple(o if o is not Ellipsis else None for o in other) 2230 other = (other + (None, None))[:2] 2231 if other[0] is None: 2232 other = (0, other[1]) 2233 if isinstance(other[0], int) and other[1] is None: 2234 if other[0] == 0: 2235 return ZeroOrMore(self) 2236 if other[0] == 1: 2237 return OneOrMore(self) 2238 else: 2239 return self * other[0] + ZeroOrMore(self) 2240 elif isinstance(other[0], int) and isinstance(other[1], int): 2241 minElements, optElements = other 2242 optElements -= minElements 2243 else: 2244 raise TypeError("cannot multiply 'ParserElement' and ('%s', '%s') objects", type(other[0]), type(other[1])) 2245 else: 2246 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 2247 2248 if minElements < 0: 2249 raise ValueError("cannot multiply ParserElement by negative value") 2250 if optElements < 0: 2251 raise ValueError("second tuple value must be greater or equal to first tuple value") 2252 if minElements == optElements == 0: 2253 raise ValueError("cannot multiply ParserElement by 0 or (0, 0)") 2254 2255 if optElements: 2256 def makeOptionalList(n): 2257 if n > 1: 2258 return Optional(self + makeOptionalList(n - 1)) 2259 else: 2260 return Optional(self)
2261 if minElements: 2262 if minElements == 1: 2263 ret = self + makeOptionalList(optElements) 2264 else: 2265 ret = And([self] * minElements) + makeOptionalList(optElements) 2266 else: 2267 ret = makeOptionalList(optElements) 2268 else: 2269 if minElements == 1: 2270 ret = self 2271 else: 2272 ret = And([self] * minElements) 2273 return ret 2274
2275 - def __rmul__(self, other):
2276 return self.__mul__(other)
2277
2278 - def __or__(self, other):
2279 """ 2280 Implementation of | operator - returns :class:`MatchFirst` 2281 """ 2282 if other is Ellipsis: 2283 return _PendingSkip(self, must_skip=True) 2284 2285 if isinstance(other, basestring): 2286 other = self._literalStringClass(other) 2287 if not isinstance(other, ParserElement): 2288 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2289 SyntaxWarning, stacklevel=2) 2290 return None 2291 return MatchFirst([self, other])
2292
2293 - def __ror__(self, other):
2294 """ 2295 Implementation of | operator when left operand is not a :class:`ParserElement` 2296 """ 2297 if isinstance(other, basestring): 2298 other = self._literalStringClass(other) 2299 if not isinstance(other, ParserElement): 2300 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2301 SyntaxWarning, stacklevel=2) 2302 return None 2303 return other | self
2304
2305 - def __xor__(self, other):
2306 """ 2307 Implementation of ^ operator - returns :class:`Or` 2308 """ 2309 if isinstance(other, basestring): 2310 other = self._literalStringClass(other) 2311 if not isinstance(other, ParserElement): 2312 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2313 SyntaxWarning, stacklevel=2) 2314 return None 2315 return Or([self, other])
2316
2317 - def __rxor__(self, other):
2318 """ 2319 Implementation of ^ operator when left operand is not a :class:`ParserElement` 2320 """ 2321 if isinstance(other, basestring): 2322 other = self._literalStringClass(other) 2323 if not isinstance(other, ParserElement): 2324 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2325 SyntaxWarning, stacklevel=2) 2326 return None 2327 return other ^ self
2328
2329 - def __and__(self, other):
2330 """ 2331 Implementation of & operator - returns :class:`Each` 2332 """ 2333 if isinstance(other, basestring): 2334 other = self._literalStringClass(other) 2335 if not isinstance(other, ParserElement): 2336 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2337 SyntaxWarning, stacklevel=2) 2338 return None 2339 return Each([self, other])
2340
2341 - def __rand__(self, other):
2342 """ 2343 Implementation of & operator when left operand is not a :class:`ParserElement` 2344 """ 2345 if isinstance(other, basestring): 2346 other = self._literalStringClass(other) 2347 if not isinstance(other, ParserElement): 2348 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2349 SyntaxWarning, stacklevel=2) 2350 return None 2351 return other & self
2352
2353 - def __invert__(self):
2354 """ 2355 Implementation of ~ operator - returns :class:`NotAny` 2356 """ 2357 return NotAny(self)
2358
2359 - def __iter__(self):
2360 # must implement __iter__ to override legacy use of sequential access to __getitem__ to 2361 # iterate over a sequence 2362 raise TypeError('%r object is not iterable' % self.__class__.__name__)
2363
2364 - def __getitem__(self, key):
2365 """ 2366 use ``[]`` indexing notation as a short form for expression repetition: 2367 - ``expr[n]`` is equivalent to ``expr*n`` 2368 - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` 2369 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent 2370 to ``expr*n + ZeroOrMore(expr)`` 2371 (read as "at least n instances of ``expr``") 2372 - ``expr[..., n]`` is equivalent to ``expr*(0, n)`` 2373 (read as "0 to n instances of ``expr``") 2374 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` 2375 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` 2376 ``None`` may be used in place of ``...``. 2377 2378 Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception 2379 if more than ``n`` ``expr``s exist in the input stream. If this behavior is 2380 desired, then write ``expr[..., n] + ~expr``. 2381 """ 2382 2383 # convert single arg keys to tuples 2384 try: 2385 if isinstance(key, str): 2386 key = (key,) 2387 iter(key) 2388 except TypeError: 2389 key = (key, key) 2390 2391 if len(key) > 2: 2392 warnings.warn("only 1 or 2 index arguments supported ({0}{1})".format(key[:5], 2393 '... [{0}]'.format(len(key)) 2394 if len(key) > 5 else '')) 2395 2396 # clip to 2 elements 2397 ret = self * tuple(key[:2]) 2398 return ret
2399
2400 - def __call__(self, name=None):
2401 """ 2402 Shortcut for :class:`setResultsName`, with ``listAllMatches=False``. 2403 2404 If ``name`` is given with a trailing ``'*'`` character, then ``listAllMatches`` will be 2405 passed as ``True``. 2406 2407 If ``name` is omitted, same as calling :class:`copy`. 2408 2409 Example:: 2410 2411 # these are equivalent 2412 userdata = Word(alphas).setResultsName("name") + Word(nums + "-").setResultsName("socsecno") 2413 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno") 2414 """ 2415 if name is not None: 2416 return self._setResultsName(name) 2417 else: 2418 return self.copy()
2419
2420 - def suppress(self):
2421 """ 2422 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from 2423 cluttering up returned output. 2424 """ 2425 return Suppress(self)
2426
2427 - def leaveWhitespace(self):
2428 """ 2429 Disables the skipping of whitespace before matching the characters in the 2430 :class:`ParserElement`'s defined pattern. This is normally only used internally by 2431 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 2432 """ 2433 self.skipWhitespace = False 2434 return self
2435
2436 - def setWhitespaceChars(self, chars):
2437 """ 2438 Overrides the default whitespace chars 2439 """ 2440 self.skipWhitespace = True 2441 self.whiteChars = chars 2442 self.copyDefaultWhiteChars = False 2443 return self
2444
2445 - def parseWithTabs(self):
2446 """ 2447 Overrides default behavior to expand ``<TAB>``s to spaces before parsing the input string. 2448 Must be called before ``parseString`` when the input grammar contains elements that 2449 match ``<TAB>`` characters. 2450 """ 2451 self.keepTabs = True 2452 return self
2453
2454 - def ignore(self, other):
2455 """ 2456 Define expression to be ignored (e.g., comments) while doing pattern 2457 matching; may be called repeatedly, to define multiple comment or other 2458 ignorable patterns. 2459 2460 Example:: 2461 2462 patt = OneOrMore(Word(alphas)) 2463 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj'] 2464 2465 patt.ignore(cStyleComment) 2466 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd'] 2467 """ 2468 if isinstance(other, basestring): 2469 other = Suppress(other) 2470 2471 if isinstance(other, Suppress): 2472 if other not in self.ignoreExprs: 2473 self.ignoreExprs.append(other) 2474 else: 2475 self.ignoreExprs.append(Suppress(other.copy())) 2476 return self
2477
2478 - def setDebugActions(self, startAction, successAction, exceptionAction):
2479 """ 2480 Enable display of debugging messages while doing pattern matching. 2481 """ 2482 self.debugActions = (startAction or _defaultStartDebugAction, 2483 successAction or _defaultSuccessDebugAction, 2484 exceptionAction or _defaultExceptionDebugAction) 2485 self.debug = True 2486 return self
2487
2488 - def setDebug(self, flag=True):
2489 """ 2490 Enable display of debugging messages while doing pattern matching. 2491 Set ``flag`` to True to enable, False to disable. 2492 2493 Example:: 2494 2495 wd = Word(alphas).setName("alphaword") 2496 integer = Word(nums).setName("numword") 2497 term = wd | integer 2498 2499 # turn on debugging for wd 2500 wd.setDebug() 2501 2502 OneOrMore(term).parseString("abc 123 xyz 890") 2503 2504 prints:: 2505 2506 Match alphaword at loc 0(1,1) 2507 Matched alphaword -> ['abc'] 2508 Match alphaword at loc 3(1,4) 2509 Exception raised:Expected alphaword (at char 4), (line:1, col:5) 2510 Match alphaword at loc 7(1,8) 2511 Matched alphaword -> ['xyz'] 2512 Match alphaword at loc 11(1,12) 2513 Exception raised:Expected alphaword (at char 12), (line:1, col:13) 2514 Match alphaword at loc 15(1,16) 2515 Exception raised:Expected alphaword (at char 15), (line:1, col:16) 2516 2517 The output shown is that produced by the default debug actions - custom debug actions can be 2518 specified using :class:`setDebugActions`. Prior to attempting 2519 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"`` 2520 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"`` 2521 message is shown. Also note the use of :class:`setName` to assign a human-readable name to the expression, 2522 which makes debugging and exception messages easier to understand - for instance, the default 2523 name created for the :class:`Word` expression without calling ``setName`` is ``"W:(ABCD...)"``. 2524 """ 2525 if flag: 2526 self.setDebugActions(_defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction) 2527 else: 2528 self.debug = False 2529 return self
2530
2531 - def __str__(self):
2532 return self.name
2533
2534 - def __repr__(self):
2535 return _ustr(self)
2536
2537 - def streamline(self):
2538 self.streamlined = True 2539 self.strRepr = None 2540 return self
2541
2542 - def checkRecursion(self, parseElementList):
2543 pass
2544
2545 - def validate(self, validateTrace=None):
2546 """ 2547 Check defined expressions for valid structure, check for infinite recursive definitions. 2548 """ 2549 self.checkRecursion([])
2550
2551 - def parseFile(self, file_or_filename, parseAll=False):
2552 """ 2553 Execute the parse expression on the given file or filename. 2554 If a filename is specified (instead of a file object), 2555 the entire file is opened, read, and closed before parsing. 2556 """ 2557 try: 2558 file_contents = file_or_filename.read() 2559 except AttributeError: 2560 with open(file_or_filename, "r") as f: 2561 file_contents = f.read() 2562 try: 2563 return self.parseString(file_contents, parseAll) 2564 except ParseBaseException as exc: 2565 if ParserElement.verbose_stacktrace: 2566 raise 2567 else: 2568 # catch and re-raise exception from here, clears out pyparsing internal stack trace 2569 raise exc
2570
2571 - def __eq__(self, other):
2572 if self is other: 2573 return True 2574 elif isinstance(other, basestring): 2575 return self.matches(other) 2576 elif isinstance(other, ParserElement): 2577 return vars(self) == vars(other) 2578 return False
2579
2580 - def __ne__(self, other):
2581 return not (self == other)
2582
2583 - def __hash__(self):
2584 return id(self)
2585
2586 - def __req__(self, other):
2587 return self == other
2588
2589 - def __rne__(self, other):
2590 return not (self == other)
2591
2592 - def matches(self, testString, parseAll=True):
2593 """ 2594 Method for quick testing of a parser against a test string. Good for simple 2595 inline microtests of sub expressions while building up larger parser. 2596 2597 Parameters: 2598 - testString - to test against this expression for a match 2599 - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests 2600 2601 Example:: 2602 2603 expr = Word(nums) 2604 assert expr.matches("100") 2605 """ 2606 try: 2607 self.parseString(_ustr(testString), parseAll=parseAll) 2608 return True 2609 except ParseBaseException: 2610 return False
2611
2612 - def runTests(self, tests, parseAll=True, comment='#', 2613 fullDump=True, printResults=True, failureTests=False, postParse=None, 2614 file=None):
2615 """ 2616 Execute the parse expression on a series of test strings, showing each 2617 test, the parsed results or where the parse failed. Quick and easy way to 2618 run a parse expression against a list of sample strings. 2619 2620 Parameters: 2621 - tests - a list of separate test strings, or a multiline string of test strings 2622 - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests 2623 - comment - (default= ``'#'``) - expression for indicating embedded comments in the test 2624 string; pass None to disable comment filtering 2625 - fullDump - (default= ``True``) - dump results as list followed by results names in nested outline; 2626 if False, only dump nested list 2627 - printResults - (default= ``True``) prints test output to stdout 2628 - failureTests - (default= ``False``) indicates if these tests are expected to fail parsing 2629 - postParse - (default= ``None``) optional callback for successful parse results; called as 2630 `fn(test_string, parse_results)` and returns a string to be added to the test output 2631 - file - (default=``None``) optional file-like object to which test output will be written; 2632 if None, will default to ``sys.stdout`` 2633 2634 Returns: a (success, results) tuple, where success indicates that all tests succeeded 2635 (or failed if ``failureTests`` is True), and the results contain a list of lines of each 2636 test's output 2637 2638 Example:: 2639 2640 number_expr = pyparsing_common.number.copy() 2641 2642 result = number_expr.runTests(''' 2643 # unsigned integer 2644 100 2645 # negative integer 2646 -100 2647 # float with scientific notation 2648 6.02e23 2649 # integer with scientific notation 2650 1e-12 2651 ''') 2652 print("Success" if result[0] else "Failed!") 2653 2654 result = number_expr.runTests(''' 2655 # stray character 2656 100Z 2657 # missing leading digit before '.' 2658 -.100 2659 # too many '.' 2660 3.14.159 2661 ''', failureTests=True) 2662 print("Success" if result[0] else "Failed!") 2663 2664 prints:: 2665 2666 # unsigned integer 2667 100 2668 [100] 2669 2670 # negative integer 2671 -100 2672 [-100] 2673 2674 # float with scientific notation 2675 6.02e23 2676 [6.02e+23] 2677 2678 # integer with scientific notation 2679 1e-12 2680 [1e-12] 2681 2682 Success 2683 2684 # stray character 2685 100Z 2686 ^ 2687 FAIL: Expected end of text (at char 3), (line:1, col:4) 2688 2689 # missing leading digit before '.' 2690 -.100 2691 ^ 2692 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) 2693 2694 # too many '.' 2695 3.14.159 2696 ^ 2697 FAIL: Expected end of text (at char 4), (line:1, col:5) 2698 2699 Success 2700 2701 Each test string must be on a single line. If you want to test a string that spans multiple 2702 lines, create a test like this:: 2703 2704 expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines") 2705 2706 (Note that this is a raw string literal, you must include the leading 'r'.) 2707 """ 2708 if isinstance(tests, basestring): 2709 tests = list(map(str.strip, tests.rstrip().splitlines())) 2710 if isinstance(comment, basestring): 2711 comment = Literal(comment) 2712 if file is None: 2713 file = sys.stdout 2714 print_ = file.write 2715 2716 allResults = [] 2717 comments = [] 2718 success = True 2719 NL = Literal(r'\n').addParseAction(replaceWith('\n')).ignore(quotedString) 2720 BOM = u'\ufeff' 2721 for t in tests: 2722 if comment is not None and comment.matches(t, False) or comments and not t: 2723 comments.append(t) 2724 continue 2725 if not t: 2726 continue 2727 out = ['\n'.join(comments), t] 2728 comments = [] 2729 try: 2730 # convert newline marks to actual newlines, and strip leading BOM if present 2731 t = NL.transformString(t.lstrip(BOM)) 2732 result = self.parseString(t, parseAll=parseAll) 2733 except ParseBaseException as pe: 2734 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" 2735 if '\n' in t: 2736 out.append(line(pe.loc, t)) 2737 out.append(' ' * (col(pe.loc, t) - 1) + '^' + fatal) 2738 else: 2739 out.append(' ' * pe.loc + '^' + fatal) 2740 out.append("FAIL: " + str(pe)) 2741 success = success and failureTests 2742 result = pe 2743 except Exception as exc: 2744 out.append("FAIL-EXCEPTION: " + str(exc)) 2745 success = success and failureTests 2746 result = exc 2747 else: 2748 success = success and not failureTests 2749 if postParse is not None: 2750 try: 2751 pp_value = postParse(t, result) 2752 if pp_value is not None: 2753 if isinstance(pp_value, ParseResults): 2754 out.append(pp_value.dump()) 2755 else: 2756 out.append(str(pp_value)) 2757 else: 2758 out.append(result.dump()) 2759 except Exception as e: 2760 out.append(result.dump(full=fullDump)) 2761 out.append("{0} failed: {1}: {2}".format(postParse.__name__, type(e).__name__, e)) 2762 else: 2763 out.append(result.dump(full=fullDump)) 2764 2765 if printResults: 2766 if fullDump: 2767 out.append('') 2768 print_('\n'.join(out)) 2769 2770 allResults.append((t, result)) 2771 2772 return success, allResults
2773
2774 2775 -class _PendingSkip(ParserElement):
2776 # internal placeholder class to hold a place were '...' is added to a parser element, 2777 # once another ParserElement is added, this placeholder will be replaced with a SkipTo
2778 - def __init__(self, expr, must_skip=False):
2779 super(_PendingSkip, self).__init__() 2780 self.strRepr = str(expr + Empty()).replace('Empty', '...') 2781 self.name = self.strRepr 2782 self.anchor = expr 2783 self.must_skip = must_skip
2784
2785 - def __add__(self, other):
2786 skipper = SkipTo(other).setName("...")("_skipped*") 2787 if self.must_skip: 2788 def must_skip(t): 2789 if not t._skipped or t._skipped.asList() == ['']: 2790 del t[0] 2791 t.pop("_skipped", None)
2792 def show_skip(t): 2793 if t._skipped.asList()[-1:] == ['']: 2794 skipped = t.pop('_skipped') 2795 t['_skipped'] = 'missing <' + repr(self.anchor) + '>'
2796 return (self.anchor + skipper().addParseAction(must_skip) 2797 | skipper().addParseAction(show_skip)) + other 2798 2799 return self.anchor + skipper + other 2800
2801 - def __repr__(self):
2802 return self.strRepr
2803
2804 - def parseImpl(self, *args):
2805 raise Exception("use of `...` expression without following SkipTo target expression")
2806
2807 2808 -class Token(ParserElement):
2809 """Abstract :class:`ParserElement` subclass, for defining atomic 2810 matching patterns. 2811 """
2812 - def __init__(self):
2813 super(Token, self).__init__(savelist=False)
2814
2815 2816 -class Empty(Token):
2817 """An empty token, will always match. 2818 """
2819 - def __init__(self):
2820 super(Empty, self).__init__() 2821 self.name = "Empty" 2822 self.mayReturnEmpty = True 2823 self.mayIndexError = False
2824
2825 2826 -class NoMatch(Token):
2827 """A token that will never match. 2828 """
2829 - def __init__(self):
2830 super(NoMatch, self).__init__() 2831 self.name = "NoMatch" 2832 self.mayReturnEmpty = True 2833 self.mayIndexError = False 2834 self.errmsg = "Unmatchable token"
2835
2836 - def parseImpl(self, instring, loc, doActions=True):
2837 raise ParseException(instring, loc, self.errmsg, self)
2838
2839 2840 -class Literal(Token):
2841 """Token to exactly match a specified string. 2842 2843 Example:: 2844 2845 Literal('blah').parseString('blah') # -> ['blah'] 2846 Literal('blah').parseString('blahfooblah') # -> ['blah'] 2847 Literal('blah').parseString('bla') # -> Exception: Expected "blah" 2848 2849 For case-insensitive matching, use :class:`CaselessLiteral`. 2850 2851 For keyword matching (force word break before and after the matched string), 2852 use :class:`Keyword` or :class:`CaselessKeyword`. 2853 """
2854 - def __init__(self, matchString):
2855 super(Literal, self).__init__() 2856 self.match = matchString 2857 self.matchLen = len(matchString) 2858 try: 2859 self.firstMatchChar = matchString[0] 2860 except IndexError: 2861 warnings.warn("null string passed to Literal; use Empty() instead", 2862 SyntaxWarning, stacklevel=2) 2863 self.__class__ = Empty 2864 self.name = '"%s"' % _ustr(self.match) 2865 self.errmsg = "Expected " + self.name 2866 self.mayReturnEmpty = False 2867 self.mayIndexError = False 2868 2869 # Performance tuning: modify __class__ to select 2870 # a parseImpl optimized for single-character check 2871 if self.matchLen == 1 and type(self) is Literal: 2872 self.__class__ = _SingleCharLiteral
2873
2874 - def parseImpl(self, instring, loc, doActions=True):
2875 if instring[loc] == self.firstMatchChar and instring.startswith(self.match, loc): 2876 return loc + self.matchLen, self.match 2877 raise ParseException(instring, loc, self.errmsg, self)
2878
2879 -class _SingleCharLiteral(Literal):
2880 - def parseImpl(self, instring, loc, doActions=True):
2881 if instring[loc] == self.firstMatchChar: 2882 return loc + 1, self.match 2883 raise ParseException(instring, loc, self.errmsg, self)
2884 2885 _L = Literal 2886 ParserElement._literalStringClass = Literal
2887 2888 -class Keyword(Token):
2889 """Token to exactly match a specified string as a keyword, that is, 2890 it must be immediately followed by a non-keyword character. Compare 2891 with :class:`Literal`: 2892 2893 - ``Literal("if")`` will match the leading ``'if'`` in 2894 ``'ifAndOnlyIf'``. 2895 - ``Keyword("if")`` will not; it will only match the leading 2896 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'`` 2897 2898 Accepts two optional constructor arguments in addition to the 2899 keyword string: 2900 2901 - ``identChars`` is a string of characters that would be valid 2902 identifier characters, defaulting to all alphanumerics + "_" and 2903 "$" 2904 - ``caseless`` allows case-insensitive matching, default is ``False``. 2905 2906 Example:: 2907 2908 Keyword("start").parseString("start") # -> ['start'] 2909 Keyword("start").parseString("starting") # -> Exception 2910 2911 For case-insensitive matching, use :class:`CaselessKeyword`. 2912 """ 2913 DEFAULT_KEYWORD_CHARS = alphanums + "_$" 2914
2915 - def __init__(self, matchString, identChars=None, caseless=False):
2916 super(Keyword, self).__init__() 2917 if identChars is None: 2918 identChars = Keyword.DEFAULT_KEYWORD_CHARS 2919 self.match = matchString 2920 self.matchLen = len(matchString) 2921 try: 2922 self.firstMatchChar = matchString[0] 2923 except IndexError: 2924 warnings.warn("null string passed to Keyword; use Empty() instead", 2925 SyntaxWarning, stacklevel=2) 2926 self.name = '"%s"' % self.match 2927 self.errmsg = "Expected " + self.name 2928 self.mayReturnEmpty = False 2929 self.mayIndexError = False 2930 self.caseless = caseless 2931 if caseless: 2932 self.caselessmatch = matchString.upper() 2933 identChars = identChars.upper() 2934 self.identChars = set(identChars)
2935
2936 - def parseImpl(self, instring, loc, doActions=True):
2937 if self.caseless: 2938 if ((instring[loc:loc + self.matchLen].upper() == self.caselessmatch) 2939 and (loc >= len(instring) - self.matchLen 2940 or instring[loc + self.matchLen].upper() not in self.identChars) 2941 and (loc == 0 2942 or instring[loc - 1].upper() not in self.identChars)): 2943 return loc + self.matchLen, self.match 2944 2945 else: 2946 if instring[loc] == self.firstMatchChar: 2947 if ((self.matchLen == 1 or instring.startswith(self.match, loc)) 2948 and (loc >= len(instring) - self.matchLen 2949 or instring[loc + self.matchLen] not in self.identChars) 2950 and (loc == 0 or instring[loc - 1] not in self.identChars)): 2951 return loc + self.matchLen, self.match 2952 2953 raise ParseException(instring, loc, self.errmsg, self)
2954
2955 - def copy(self):
2956 c = super(Keyword, self).copy() 2957 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 2958 return c
2959 2960 @staticmethod
2961 - def setDefaultKeywordChars(chars):
2962 """Overrides the default Keyword chars 2963 """ 2964 Keyword.DEFAULT_KEYWORD_CHARS = chars
2965
2966 -class CaselessLiteral(Literal):
2967 """Token to match a specified string, ignoring case of letters. 2968 Note: the matched results will always be in the case of the given 2969 match string, NOT the case of the input text. 2970 2971 Example:: 2972 2973 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD'] 2974 2975 (Contrast with example for :class:`CaselessKeyword`.) 2976 """
2977 - def __init__(self, matchString):
2978 super(CaselessLiteral, self).__init__(matchString.upper()) 2979 # Preserve the defining literal. 2980 self.returnString = matchString 2981 self.name = "'%s'" % self.returnString 2982 self.errmsg = "Expected " + self.name
2983
2984 - def parseImpl(self, instring, loc, doActions=True):
2985 if instring[loc:loc + self.matchLen].upper() == self.match: 2986 return loc + self.matchLen, self.returnString 2987 raise ParseException(instring, loc, self.errmsg, self)
2988
2989 -class CaselessKeyword(Keyword):
2990 """ 2991 Caseless version of :class:`Keyword`. 2992 2993 Example:: 2994 2995 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD'] 2996 2997 (Contrast with example for :class:`CaselessLiteral`.) 2998 """
2999 - def __init__(self, matchString, identChars=None):
3000 super(CaselessKeyword, self).__init__(matchString, identChars, caseless=True)
3001
3002 -class CloseMatch(Token):
3003 """A variation on :class:`Literal` which matches "close" matches, 3004 that is, strings with at most 'n' mismatching characters. 3005 :class:`CloseMatch` takes parameters: 3006 3007 - ``match_string`` - string to be matched 3008 - ``maxMismatches`` - (``default=1``) maximum number of 3009 mismatches allowed to count as a match 3010 3011 The results from a successful parse will contain the matched text 3012 from the input string and the following named results: 3013 3014 - ``mismatches`` - a list of the positions within the 3015 match_string where mismatches were found 3016 - ``original`` - the original match_string used to compare 3017 against the input string 3018 3019 If ``mismatches`` is an empty list, then the match was an exact 3020 match. 3021 3022 Example:: 3023 3024 patt = CloseMatch("ATCATCGAATGGA") 3025 patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) 3026 patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) 3027 3028 # exact match 3029 patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) 3030 3031 # close match allowing up to 2 mismatches 3032 patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2) 3033 patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) 3034 """
3035 - def __init__(self, match_string, maxMismatches=1):
3036 super(CloseMatch, self).__init__() 3037 self.name = match_string 3038 self.match_string = match_string 3039 self.maxMismatches = maxMismatches 3040 self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches) 3041 self.mayIndexError = False 3042 self.mayReturnEmpty = False
3043
3044 - def parseImpl(self, instring, loc, doActions=True):
3045 start = loc 3046 instrlen = len(instring) 3047 maxloc = start + len(self.match_string) 3048 3049 if maxloc <= instrlen: 3050 match_string = self.match_string 3051 match_stringloc = 0 3052 mismatches = [] 3053 maxMismatches = self.maxMismatches 3054 3055 for match_stringloc, s_m in enumerate(zip(instring[loc:maxloc], match_string)): 3056 src, mat = s_m 3057 if src != mat: 3058 mismatches.append(match_stringloc) 3059 if len(mismatches) > maxMismatches: 3060 break 3061 else: 3062 loc = match_stringloc + 1 3063 results = ParseResults([instring[start:loc]]) 3064 results['original'] = match_string 3065 results['mismatches'] = mismatches 3066 return loc, results 3067 3068 raise ParseException(instring, loc, self.errmsg, self)
3069
3070 3071 -class Word(Token):
3072 """Token for matching words composed of allowed character sets. 3073 Defined with string containing all allowed initial characters, an 3074 optional string containing allowed body characters (if omitted, 3075 defaults to the initial character set), and an optional minimum, 3076 maximum, and/or exact length. The default value for ``min`` is 3077 1 (a minimum value < 1 is not valid); the default values for 3078 ``max`` and ``exact`` are 0, meaning no maximum or exact 3079 length restriction. An optional ``excludeChars`` parameter can 3080 list characters that might be found in the input ``bodyChars`` 3081 string; useful to define a word of all printables except for one or 3082 two characters, for instance. 3083 3084 :class:`srange` is useful for defining custom character set strings 3085 for defining ``Word`` expressions, using range notation from 3086 regular expression character sets. 3087 3088 A common mistake is to use :class:`Word` to match a specific literal 3089 string, as in ``Word("Address")``. Remember that :class:`Word` 3090 uses the string argument to define *sets* of matchable characters. 3091 This expression would match "Add", "AAA", "dAred", or any other word 3092 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an 3093 exact literal string, use :class:`Literal` or :class:`Keyword`. 3094 3095 pyparsing includes helper strings for building Words: 3096 3097 - :class:`alphas` 3098 - :class:`nums` 3099 - :class:`alphanums` 3100 - :class:`hexnums` 3101 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255 3102 - accented, tilded, umlauted, etc.) 3103 - :class:`punc8bit` (non-alphabetic characters in ASCII range 3104 128-255 - currency, symbols, superscripts, diacriticals, etc.) 3105 - :class:`printables` (any non-whitespace character) 3106 3107 Example:: 3108 3109 # a word composed of digits 3110 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) 3111 3112 # a word with a leading capital, and zero or more lowercase 3113 capital_word = Word(alphas.upper(), alphas.lower()) 3114 3115 # hostnames are alphanumeric, with leading alpha, and '-' 3116 hostname = Word(alphas, alphanums + '-') 3117 3118 # roman numeral (not a strict parser, accepts invalid mix of characters) 3119 roman = Word("IVXLCDM") 3120 3121 # any string of non-whitespace characters, except for ',' 3122 csv_value = Word(printables, excludeChars=",") 3123 """
3124 - def __init__(self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None):
3125 super(Word, self).__init__() 3126 if excludeChars: 3127 excludeChars = set(excludeChars) 3128 initChars = ''.join(c for c in initChars if c not in excludeChars) 3129 if bodyChars: 3130 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 3131 self.initCharsOrig = initChars 3132 self.initChars = set(initChars) 3133 if bodyChars: 3134 self.bodyCharsOrig = bodyChars 3135 self.bodyChars = set(bodyChars) 3136 else: 3137 self.bodyCharsOrig = initChars 3138 self.bodyChars = set(initChars) 3139 3140 self.maxSpecified = max > 0 3141 3142 if min < 1: 3143 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 3144 3145 self.minLen = min 3146 3147 if max > 0: 3148 self.maxLen = max 3149 else: 3150 self.maxLen = _MAX_INT 3151 3152 if exact > 0: 3153 self.maxLen = exact 3154 self.minLen = exact 3155 3156 self.name = _ustr(self) 3157 self.errmsg = "Expected " + self.name 3158 self.mayIndexError = False 3159 self.asKeyword = asKeyword 3160 3161 if ' ' not in self.initCharsOrig + self.bodyCharsOrig and (min == 1 and max == 0 and exact == 0): 3162 if self.bodyCharsOrig == self.initCharsOrig: 3163 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 3164 elif len(self.initCharsOrig) == 1: 3165 self.reString = "%s[%s]*" % (re.escape(self.initCharsOrig), 3166 _escapeRegexRangeChars(self.bodyCharsOrig),) 3167 else: 3168 self.reString = "[%s][%s]*" % (_escapeRegexRangeChars(self.initCharsOrig), 3169 _escapeRegexRangeChars(self.bodyCharsOrig),) 3170 if self.asKeyword: 3171 self.reString = r"\b" + self.reString + r"\b" 3172 3173 try: 3174 self.re = re.compile(self.reString) 3175 except Exception: 3176 self.re = None 3177 else: 3178 self.re_match = self.re.match 3179 self.__class__ = _WordRegex
3180
3181 - def parseImpl(self, instring, loc, doActions=True):
3182 if instring[loc] not in self.initChars: 3183 raise ParseException(instring, loc, self.errmsg, self) 3184 3185 start = loc 3186 loc += 1 3187 instrlen = len(instring) 3188 bodychars = self.bodyChars 3189 maxloc = start + self.maxLen 3190 maxloc = min(maxloc, instrlen) 3191 while loc < maxloc and instring[loc] in bodychars: 3192 loc += 1 3193 3194 throwException = False 3195 if loc - start < self.minLen: 3196 throwException = True 3197 elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 3198 throwException = True 3199 elif self.asKeyword: 3200 if (start > 0 and instring[start - 1] in bodychars 3201 or loc < instrlen and instring[loc] in bodychars): 3202 throwException = True 3203 3204 if throwException: 3205 raise ParseException(instring, loc, self.errmsg, self) 3206 3207 return loc, instring[start:loc]
3208
3209 - def __str__(self):
3210 try: 3211 return super(Word, self).__str__() 3212 except Exception: 3213 pass 3214 3215 if self.strRepr is None: 3216 3217 def charsAsStr(s): 3218 if len(s) > 4: 3219 return s[:4] + "..." 3220 else: 3221 return s
3222 3223 if self.initCharsOrig != self.bodyCharsOrig: 3224 self.strRepr = "W:(%s, %s)" % (charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig)) 3225 else: 3226 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 3227 3228 return self.strRepr
3229
3230 -class _WordRegex(Word):
3231 - def parseImpl(self, instring, loc, doActions=True):
3232 result = self.re_match(instring, loc) 3233 if not result: 3234 raise ParseException(instring, loc, self.errmsg, self) 3235 3236 loc = result.end() 3237 return loc, result.group()
3238
3239 3240 -class Char(_WordRegex):
3241 """A short-cut class for defining ``Word(characters, exact=1)``, 3242 when defining a match of any single character in a string of 3243 characters. 3244 """
3245 - def __init__(self, charset, asKeyword=False, excludeChars=None):
3246 super(Char, self).__init__(charset, exact=1, asKeyword=asKeyword, excludeChars=excludeChars) 3247 self.reString = "[%s]" % _escapeRegexRangeChars(''.join(self.initChars)) 3248 if asKeyword: 3249 self.reString = r"\b%s\b" % self.reString 3250 self.re = re.compile(self.reString) 3251 self.re_match = self.re.match
3252
3253 3254 -class Regex(Token):
3255 r"""Token for matching strings that match a given regular 3256 expression. Defined with string specifying the regular expression in 3257 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_. 3258 If the given regex contains named groups (defined using ``(?P<name>...)``), 3259 these will be preserved as named parse results. 3260 3261 If instead of the Python stdlib re module you wish to use a different RE module 3262 (such as the `regex` module), you can replace it by either building your 3263 Regex object with a compiled RE that was compiled using regex: 3264 3265 Example:: 3266 3267 realnum = Regex(r"[+-]?\d+\.\d*") 3268 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 3269 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 3270 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 3271 3272 # use regex module instead of stdlib re module to construct a Regex using 3273 # a compiled regular expression 3274 import regex 3275 parser = pp.Regex(regex.compile(r'[0-9]')) 3276 3277 """
3278 - def __init__(self, pattern, flags=0, asGroupList=False, asMatch=False):
3279 """The parameters ``pattern`` and ``flags`` are passed 3280 to the ``re.compile()`` function as-is. See the Python 3281 `re module <https://docs.python.org/3/library/re.html>`_ module for an 3282 explanation of the acceptable patterns and flags. 3283 """ 3284 super(Regex, self).__init__() 3285 3286 if isinstance(pattern, basestring): 3287 if not pattern: 3288 warnings.warn("null string passed to Regex; use Empty() instead", 3289 SyntaxWarning, stacklevel=2) 3290 3291 self.pattern = pattern 3292 self.flags = flags 3293 3294 try: 3295 self.re = re.compile(self.pattern, self.flags) 3296 self.reString = self.pattern 3297 except sre_constants.error: 3298 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 3299 SyntaxWarning, stacklevel=2) 3300 raise 3301 3302 elif hasattr(pattern, 'pattern') and hasattr(pattern, 'match'): 3303 self.re = pattern 3304 self.pattern = self.reString = pattern.pattern 3305 self.flags = flags 3306 3307 else: 3308 raise TypeError("Regex may only be constructed with a string or a compiled RE object") 3309 3310 self.re_match = self.re.match 3311 3312 self.name = _ustr(self) 3313 self.errmsg = "Expected " + self.name 3314 self.mayIndexError = False 3315 self.mayReturnEmpty = True 3316 self.asGroupList = asGroupList 3317 self.asMatch = asMatch 3318 if self.asGroupList: 3319 self.parseImpl = self.parseImplAsGroupList 3320 if self.asMatch: 3321 self.parseImpl = self.parseImplAsMatch
3322
3323 - def parseImpl(self, instring, loc, doActions=True):
3324 result = self.re_match(instring, loc) 3325 if not result: 3326 raise ParseException(instring, loc, self.errmsg, self) 3327 3328 loc = result.end() 3329 ret = ParseResults(result.group()) 3330 d = result.groupdict() 3331 if d: 3332 for k, v in d.items(): 3333 ret[k] = v 3334 return loc, ret
3335
3336 - def parseImplAsGroupList(self, instring, loc, doActions=True):
3337 result = self.re_match(instring, loc) 3338 if not result: 3339 raise ParseException(instring, loc, self.errmsg, self) 3340 3341 loc = result.end() 3342 ret = result.groups() 3343 return loc, ret
3344
3345 - def parseImplAsMatch(self, instring, loc, doActions=True):
3346 result = self.re_match(instring, loc) 3347 if not result: 3348 raise ParseException(instring, loc, self.errmsg, self) 3349 3350 loc = result.end() 3351 ret = result 3352 return loc, ret
3353
3354 - def __str__(self):
3355 try: 3356 return super(Regex, self).__str__() 3357 except Exception: 3358 pass 3359 3360 if self.strRepr is None: 3361 self.strRepr = "Re:(%s)" % repr(self.pattern) 3362 3363 return self.strRepr
3364
3365 - def sub(self, repl):
3366 r""" 3367 Return Regex with an attached parse action to transform the parsed 3368 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_. 3369 3370 Example:: 3371 3372 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>") 3373 print(make_html.transformString("h1:main title:")) 3374 # prints "<h1>main title</h1>" 3375 """ 3376 if self.asGroupList: 3377 warnings.warn("cannot use sub() with Regex(asGroupList=True)", 3378 SyntaxWarning, stacklevel=2) 3379 raise SyntaxError() 3380 3381 if self.asMatch and callable(repl): 3382 warnings.warn("cannot use sub() with a callable with Regex(asMatch=True)", 3383 SyntaxWarning, stacklevel=2) 3384 raise SyntaxError() 3385 3386 if self.asMatch: 3387 def pa(tokens): 3388 return tokens[0].expand(repl)
3389 else: 3390 def pa(tokens): 3391 return self.re.sub(repl, tokens[0])
3392 return self.addParseAction(pa) 3393
3394 -class QuotedString(Token):
3395 r""" 3396 Token for matching strings that are delimited by quoting characters. 3397 3398 Defined with the following parameters: 3399 3400 - quoteChar - string of one or more characters defining the 3401 quote delimiting string 3402 - escChar - character to escape quotes, typically backslash 3403 (default= ``None``) 3404 - escQuote - special quote sequence to escape an embedded quote 3405 string (such as SQL's ``""`` to escape an embedded ``"``) 3406 (default= ``None``) 3407 - multiline - boolean indicating whether quotes can span 3408 multiple lines (default= ``False``) 3409 - unquoteResults - boolean indicating whether the matched text 3410 should be unquoted (default= ``True``) 3411 - endQuoteChar - string of one or more characters defining the 3412 end of the quote delimited string (default= ``None`` => same as 3413 quoteChar) 3414 - convertWhitespaceEscapes - convert escaped whitespace 3415 (``'\t'``, ``'\n'``, etc.) to actual whitespace 3416 (default= ``True``) 3417 3418 Example:: 3419 3420 qs = QuotedString('"') 3421 print(qs.searchString('lsjdf "This is the quote" sldjf')) 3422 complex_qs = QuotedString('{{', endQuoteChar='}}') 3423 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf')) 3424 sql_qs = QuotedString('"', escQuote='""') 3425 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 3426 3427 prints:: 3428 3429 [['This is the quote']] 3430 [['This is the "quote"']] 3431 [['This is the quote with "embedded" quotes']] 3432 """
3433 - def __init__(self, quoteChar, escChar=None, escQuote=None, multiline=False, 3434 unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
3435 super(QuotedString, self).__init__() 3436 3437 # remove white space from quote chars - wont work anyway 3438 quoteChar = quoteChar.strip() 3439 if not quoteChar: 3440 warnings.warn("quoteChar cannot be the empty string", SyntaxWarning, stacklevel=2) 3441 raise SyntaxError() 3442 3443 if endQuoteChar is None: 3444 endQuoteChar = quoteChar 3445 else: 3446 endQuoteChar = endQuoteChar.strip() 3447 if not endQuoteChar: 3448 warnings.warn("endQuoteChar cannot be the empty string", SyntaxWarning, stacklevel=2) 3449 raise SyntaxError() 3450 3451 self.quoteChar = quoteChar 3452 self.quoteCharLen = len(quoteChar) 3453 self.firstQuoteChar = quoteChar[0] 3454 self.endQuoteChar = endQuoteChar 3455 self.endQuoteCharLen = len(endQuoteChar) 3456 self.escChar = escChar 3457 self.escQuote = escQuote 3458 self.unquoteResults = unquoteResults 3459 self.convertWhitespaceEscapes = convertWhitespaceEscapes 3460 3461 if multiline: 3462 self.flags = re.MULTILINE | re.DOTALL 3463 self.pattern = r'%s(?:[^%s%s]' % (re.escape(self.quoteChar), 3464 _escapeRegexRangeChars(self.endQuoteChar[0]), 3465 (escChar is not None and _escapeRegexRangeChars(escChar) or '')) 3466 else: 3467 self.flags = 0 3468 self.pattern = r'%s(?:[^%s\n\r%s]' % (re.escape(self.quoteChar), 3469 _escapeRegexRangeChars(self.endQuoteChar[0]), 3470 (escChar is not None and _escapeRegexRangeChars(escChar) or '')) 3471 if len(self.endQuoteChar) > 1: 3472 self.pattern += ( 3473 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 3474 _escapeRegexRangeChars(self.endQuoteChar[i])) 3475 for i in range(len(self.endQuoteChar) - 1, 0, -1)) + ')') 3476 3477 if escQuote: 3478 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 3479 if escChar: 3480 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 3481 self.escCharReplacePattern = re.escape(self.escChar) + "(.)" 3482 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 3483 3484 try: 3485 self.re = re.compile(self.pattern, self.flags) 3486 self.reString = self.pattern 3487 self.re_match = self.re.match 3488 except sre_constants.error: 3489 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 3490 SyntaxWarning, stacklevel=2) 3491 raise 3492 3493 self.name = _ustr(self) 3494 self.errmsg = "Expected " + self.name 3495 self.mayIndexError = False 3496 self.mayReturnEmpty = True
3497
3498 - def parseImpl(self, instring, loc, doActions=True):
3499 result = instring[loc] == self.firstQuoteChar and self.re_match(instring, loc) or None 3500 if not result: 3501 raise ParseException(instring, loc, self.errmsg, self) 3502 3503 loc = result.end() 3504 ret = result.group() 3505 3506 if self.unquoteResults: 3507 3508 # strip off quotes 3509 ret = ret[self.quoteCharLen: -self.endQuoteCharLen] 3510 3511 if isinstance(ret, basestring): 3512 # replace escaped whitespace 3513 if '\\' in ret and self.convertWhitespaceEscapes: 3514 ws_map = { 3515 r'\t': '\t', 3516 r'\n': '\n', 3517 r'\f': '\f', 3518 r'\r': '\r', 3519 } 3520 for wslit, wschar in ws_map.items(): 3521 ret = ret.replace(wslit, wschar) 3522 3523 # replace escaped characters 3524 if self.escChar: 3525 ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret) 3526 3527 # replace escaped quotes 3528 if self.escQuote: 3529 ret = ret.replace(self.escQuote, self.endQuoteChar) 3530 3531 return loc, ret
3532
3533 - def __str__(self):
3534 try: 3535 return super(QuotedString, self).__str__() 3536 except Exception: 3537 pass 3538 3539 if self.strRepr is None: 3540 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 3541 3542 return self.strRepr
3543
3544 3545 -class CharsNotIn(Token):
3546 """Token for matching words composed of characters *not* in a given 3547 set (will include whitespace in matched characters if not listed in 3548 the provided exclusion set - see example). Defined with string 3549 containing all disallowed characters, and an optional minimum, 3550 maximum, and/or exact length. The default value for ``min`` is 3551 1 (a minimum value < 1 is not valid); the default values for 3552 ``max`` and ``exact`` are 0, meaning no maximum or exact 3553 length restriction. 3554 3555 Example:: 3556 3557 # define a comma-separated-value as anything that is not a ',' 3558 csv_value = CharsNotIn(',') 3559 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213")) 3560 3561 prints:: 3562 3563 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 3564 """
3565 - def __init__(self, notChars, min=1, max=0, exact=0):
3566 super(CharsNotIn, self).__init__() 3567 self.skipWhitespace = False 3568 self.notChars = notChars 3569 3570 if min < 1: 3571 raise ValueError("cannot specify a minimum length < 1; use " 3572 "Optional(CharsNotIn()) if zero-length char group is permitted") 3573 3574 self.minLen = min 3575 3576 if max > 0: 3577 self.maxLen = max 3578 else: 3579 self.maxLen = _MAX_INT 3580 3581 if exact > 0: 3582 self.maxLen = exact 3583 self.minLen = exact 3584 3585 self.name = _ustr(self) 3586 self.errmsg = "Expected " + self.name 3587 self.mayReturnEmpty = (self.minLen == 0) 3588 self.mayIndexError = False
3589
3590 - def parseImpl(self, instring, loc, doActions=True):
3591 if instring[loc] in self.notChars: 3592 raise ParseException(instring, loc, self.errmsg, self) 3593 3594 start = loc 3595 loc += 1 3596 notchars = self.notChars 3597 maxlen = min(start + self.maxLen, len(instring)) 3598 while loc < maxlen and instring[loc] not in notchars: 3599 loc += 1 3600 3601 if loc - start < self.minLen: 3602 raise ParseException(instring, loc, self.errmsg, self) 3603 3604 return loc, instring[start:loc]
3605
3606 - def __str__(self):
3607 try: 3608 return super(CharsNotIn, self).__str__() 3609 except Exception: 3610 pass 3611 3612 if self.strRepr is None: 3613 if len(self.notChars) > 4: 3614 self.strRepr = "!W:(%s...)" % self.notChars[:4] 3615 else: 3616 self.strRepr = "!W:(%s)" % self.notChars 3617 3618 return self.strRepr
3619
3620 -class White(Token):
3621 """Special matching class for matching whitespace. Normally, 3622 whitespace is ignored by pyparsing grammars. This class is included 3623 when some whitespace structures are significant. Define with 3624 a string containing the whitespace characters to be matched; default 3625 is ``" \\t\\r\\n"``. Also takes optional ``min``, 3626 ``max``, and ``exact`` arguments, as defined for the 3627 :class:`Word` class. 3628 """ 3629 whiteStrs = { 3630 ' ' : '<SP>', 3631 '\t': '<TAB>', 3632 '\n': '<LF>', 3633 '\r': '<CR>', 3634 '\f': '<FF>', 3635 u'\u00A0': '<NBSP>', 3636 u'\u1680': '<OGHAM_SPACE_MARK>', 3637 u'\u180E': '<MONGOLIAN_VOWEL_SEPARATOR>', 3638 u'\u2000': '<EN_QUAD>', 3639 u'\u2001': '<EM_QUAD>', 3640 u'\u2002': '<EN_SPACE>', 3641 u'\u2003': '<EM_SPACE>', 3642 u'\u2004': '<THREE-PER-EM_SPACE>', 3643 u'\u2005': '<FOUR-PER-EM_SPACE>', 3644 u'\u2006': '<SIX-PER-EM_SPACE>', 3645 u'\u2007': '<FIGURE_SPACE>', 3646 u'\u2008': '<PUNCTUATION_SPACE>', 3647 u'\u2009': '<THIN_SPACE>', 3648 u'\u200A': '<HAIR_SPACE>', 3649 u'\u200B': '<ZERO_WIDTH_SPACE>', 3650 u'\u202F': '<NNBSP>', 3651 u'\u205F': '<MMSP>', 3652 u'\u3000': '<IDEOGRAPHIC_SPACE>', 3653 }
3654 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
3655 super(White, self).__init__() 3656 self.matchWhite = ws 3657 self.setWhitespaceChars("".join(c for c in self.whiteChars if c not in self.matchWhite)) 3658 # ~ self.leaveWhitespace() 3659 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 3660 self.mayReturnEmpty = True 3661 self.errmsg = "Expected " + self.name 3662 3663 self.minLen = min 3664 3665 if max > 0: 3666 self.maxLen = max 3667 else: 3668 self.maxLen = _MAX_INT 3669 3670 if exact > 0: 3671 self.maxLen = exact 3672 self.minLen = exact
3673
3674 - def parseImpl(self, instring, loc, doActions=True):
3675 if instring[loc] not in self.matchWhite: 3676 raise ParseException(instring, loc, self.errmsg, self) 3677 start = loc 3678 loc += 1 3679 maxloc = start + self.maxLen 3680 maxloc = min(maxloc, len(instring)) 3681 while loc < maxloc and instring[loc] in self.matchWhite: 3682 loc += 1 3683 3684 if loc - start < self.minLen: 3685 raise ParseException(instring, loc, self.errmsg, self) 3686 3687 return loc, instring[start:loc]
3688
3689 3690 -class _PositionToken(Token):
3691 - def __init__(self):
3692 super(_PositionToken, self).__init__() 3693 self.name = self.__class__.__name__ 3694 self.mayReturnEmpty = True 3695 self.mayIndexError = False
3696
3697 -class GoToColumn(_PositionToken):
3698 """Token to advance to a specific column of input text; useful for 3699 tabular report scraping. 3700 """
3701 - def __init__(self, colno):
3702 super(GoToColumn, self).