Package pyparsing :: Module pyparsing
[hide private]
[frames] | no frames]

Source Code for Module pyparsing.pyparsing

   1  # module pyparsing.py 
   2  # 
   3  # Copyright (c) 2003-2008  Paul T. McGuire 
   4  # 
   5  # Permission is hereby granted, free of charge, to any person obtaining 
   6  # a copy of this software and associated documentation files (the 
   7  # "Software"), to deal in the Software without restriction, including 
   8  # without limitation the rights to use, copy, modify, merge, publish, 
   9  # distribute, sublicense, and/or sell copies of the Software, and to 
  10  # permit persons to whom the Software is furnished to do so, subject to 
  11  # the following conditions: 
  12  # 
  13  # The above copyright notice and this permission notice shall be 
  14  # included in all copies or substantial portions of the Software. 
  15  # 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  23  # 
  24  #from __future__ import generators 
  25   
  26  __doc__ = \ 
  27  """ 
  28  pyparsing module - Classes and methods to define and execute parsing grammars 
  29   
  30  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  31  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  32  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  33  provides a library of classes that you use to construct the grammar directly in Python. 
  34   
  35  Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!"):: 
  36   
  37      from pyparsing import Word, alphas 
  38   
  39      # define grammar of a greeting 
  40      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  41   
  42      hello = "Hello, World!" 
  43      print hello, "->", greet.parseString( hello ) 
  44   
  45  The program outputs the following:: 
  46   
  47      Hello, World! -> ['Hello', ',', 'World', '!'] 
  48   
  49  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  50  class names, and the use of '+', '|' and '^' operators. 
  51   
  52  The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an 
  53  object with named attributes. 
  54   
  55  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  56   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  57   - quoted strings 
  58   - embedded comments 
  59  """ 
  60   
  61  __version__ = "1.5.1" 
  62  __versionTime__ = "2 October 2008 00:44" 
  63  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  64   
  65  import string 
  66  from weakref import ref as wkref 
  67  import copy 
  68  import sys 
  69  import warnings 
  70  import re 
  71  import sre_constants 
  72  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 
  73   
  74  __all__ = [ 
  75  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  76  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  77  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  78  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  79  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  80  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', 
  81  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
  82  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
  83  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
  84  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', 
  85  'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 
  86  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
  87  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
  88  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
  89  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
  90  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
  91  'indentedBlock', 'originalTextFor', 
  92  ] 
  93   
  94   
  95  """ 
  96  Detect if we are running version 3.X and make appropriate changes 
  97  Robert A. Clark 
  98  """ 
  99  if sys.version_info[0] > 2: 
 100      _PY3K = True 
 101      _MAX_INT = sys.maxsize 
 102      basestring = str 
 103  else: 
 104      _PY3K = False 
 105      _MAX_INT = sys.maxint 
 106   
 107  if not _PY3K: 
108 - def _ustr(obj):
109 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 110 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 111 then < returns the unicode object | encodes it with the default encoding | ... >. 112 """ 113 try: 114 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 115 # it won't break any existing code. 116 return str(obj) 117 118 except UnicodeEncodeError: 119 # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) 120 # state that "The return value must be a string object". However, does a 121 # unicode object (being a subclass of basestring) count as a "string 122 # object"? 123 # If so, then return a unicode object: 124 return unicode(obj)
125 # Else encode it... but how? There are many choices... :) 126 # Replace unprintables with escape codes? 127 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') 128 # Replace unprintables with question marks? 129 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') 130 # ... 131 else: 132 _ustr = str 133 unichr = chr 134
135 -def _str2dict(strg):
136 return dict( [(c,0) for c in strg] )
137 #~ return set( [c for c in strg] ) 138
139 -def _xml_escape(data):
140 """Escape &, <, >, ", ', etc. in a string of data.""" 141 142 # ampersand must be replaced first 143 from_symbols = '&><"\'' 144 to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()] 145 for from_,to_ in zip(from_symbols, to_symbols): 146 data = data.replace(from_, to_) 147 return data
148
149 -class _Constants(object):
150 pass
151 152 if not _PY3K: 153 alphas = string.lowercase + string.uppercase 154 else: 155 alphas = string.ascii_lowercase + string.ascii_uppercase 156 nums = string.digits 157 hexnums = nums + "ABCDEFabcdef" 158 alphanums = alphas + nums 159 _bslash = chr(92) 160 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) 161
162 -class ParseBaseException(Exception):
163 """base exception class for all parsing runtime exceptions""" 164 __slots__ = ( "loc","msg","pstr","parserElement" ) 165 # Performance tuning: we construct a *lot* of these, so keep this 166 # constructor as small and fast as possible
167 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
168 self.loc = loc 169 if msg is None: 170 self.msg = pstr 171 self.pstr = "" 172 else: 173 self.msg = msg 174 self.pstr = pstr 175 self.parserElement = elem
176
177 - def __getattr__( self, aname ):
178 """supported attributes by name are: 179 - lineno - returns the line number of the exception text 180 - col - returns the column number of the exception text 181 - line - returns the line containing the exception text 182 """ 183 if( aname == "lineno" ): 184 return lineno( self.loc, self.pstr ) 185 elif( aname in ("col", "column") ): 186 return col( self.loc, self.pstr ) 187 elif( aname == "line" ): 188 return line( self.loc, self.pstr ) 189 else: 190 raise AttributeError(aname)
191
192 - def __str__( self ):
193 return "%s (at char %d), (line:%d, col:%d)" % \ 194 ( self.msg, self.loc, self.lineno, self.column )
195 - def __repr__( self ):
196 return _ustr(self)
197 - def markInputline( self, markerString = ">!<" ):
198 """Extracts the exception line from the input string, and marks 199 the location of the exception with a special symbol. 200 """ 201 line_str = self.line 202 line_column = self.column - 1 203 if markerString: 204 line_str = "".join( [line_str[:line_column], 205 markerString, line_str[line_column:]]) 206 return line_str.strip()
207 - def __dir__(self):
208 return "loc msg pstr parserElement lineno col line " \ 209 "markInputLine __str__ __repr__".split()
210
211 -class ParseException(ParseBaseException):
212 """exception thrown when parse expressions don't match class; 213 supported attributes by name are: 214 - lineno - returns the line number of the exception text 215 - col - returns the column number of the exception text 216 - line - returns the line containing the exception text 217 """ 218 pass
219
220 -class ParseFatalException(ParseBaseException):
221 """user-throwable exception thrown when inconsistent parse content 222 is found; stops all parsing immediately""" 223 pass
224
225 -class ParseSyntaxException(ParseFatalException):
226 """just like ParseFatalException, but thrown internally when an 227 ErrorStop indicates that parsing is to stop immediately because 228 an unbacktrackable syntax error has been found"""
229 - def __init__(self, pe):
230 super(ParseSyntaxException, self).__init__( 231 pe.pstr, pe.loc, pe.msg, pe.parserElement)
232 233 #~ class ReparseException(ParseBaseException): 234 #~ """Experimental class - parse actions can raise this exception to cause 235 #~ pyparsing to reparse the input string: 236 #~ - with a modified input string, and/or 237 #~ - with a modified start location 238 #~ Set the values of the ReparseException in the constructor, and raise the 239 #~ exception in a parse action to cause pyparsing to use the new string/location. 240 #~ Setting the values as None causes no change to be made. 241 #~ """ 242 #~ def __init_( self, newstring, restartLoc ): 243 #~ self.newParseText = newstring 244 #~ self.reparseLoc = restartLoc 245
246 -class RecursiveGrammarException(Exception):
247 """exception thrown by validate() if the grammar could be improperly recursive"""
248 - def __init__( self, parseElementList ):
249 self.parseElementTrace = parseElementList
250
251 - def __str__( self ):
252 return "RecursiveGrammarException: %s" % self.parseElementTrace
253
254 -class _ParseResultsWithOffset(object):
255 - def __init__(self,p1,p2):
256 self.tup = (p1,p2)
257 - def __getitem__(self,i):
258 return self.tup[i]
259 - def __repr__(self):
260 return repr(self.tup)
261 - def setOffset(self,i):
262 self.tup = (self.tup[0],i)
263
264 -class ParseResults(object):
265 """Structured parse results, to provide multiple means of access to the parsed data: 266 - as a list (len(results)) 267 - by list index (results[0], results[1], etc.) 268 - by attribute (results.<resultsName>) 269 """ 270 __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
271 - def __new__(cls, toklist, name=None, asList=True, modal=True ):
272 if isinstance(toklist, cls): 273 return toklist 274 retobj = object.__new__(cls) 275 retobj.__doinit = True 276 return retobj
277 278 # Performance tuning: we construct a *lot* of these, so keep this 279 # constructor as small and fast as possible
280 - def __init__( self, toklist, name=None, asList=True, modal=True ):
281 if self.__doinit: 282 self.__doinit = False 283 self.__name = None 284 self.__parent = None 285 self.__accumNames = {} 286 if isinstance(toklist, list): 287 self.__toklist = toklist[:] 288 else: 289 self.__toklist = [toklist] 290 self.__tokdict = dict() 291 292 if name: 293 if not modal: 294 self.__accumNames[name] = 0 295 if isinstance(name,int): 296 name = _ustr(name) # will always return a str, but use _ustr for consistency 297 self.__name = name 298 if not toklist in (None,'',[]): 299 if isinstance(toklist,basestring): 300 toklist = [ toklist ] 301 if asList: 302 if isinstance(toklist,ParseResults): 303 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 304 else: 305 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 306 self[name].__name = name 307 else: 308 try: 309 self[name] = toklist[0] 310 except (KeyError,TypeError): 311 self[name] = toklist
312
313 - def __getitem__( self, i ):
314 if isinstance( i, (int,slice) ): 315 return self.__toklist[i] 316 else: 317 if i not in self.__accumNames: 318 return self.__tokdict[i][-1][0] 319 else: 320 return ParseResults([ v[0] for v in self.__tokdict[i] ])
321
322 - def __setitem__( self, k, v ):
323 if isinstance(v,_ParseResultsWithOffset): 324 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 325 sub = v[0] 326 elif isinstance(k,int): 327 self.__toklist[k] = v 328 sub = v 329 else: 330 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 331 sub = v 332 if isinstance(sub,ParseResults): 333 sub.__parent = wkref(self)
334
335 - def __delitem__( self, i ):
336 if isinstance(i,(int,slice)): 337 mylen = len( self.__toklist ) 338 del self.__toklist[i] 339 340 # convert int to slice 341 if isinstance(i, int): 342 if i < 0: 343 i += mylen 344 i = slice(i, i+1) 345 # get removed indices 346 removed = list(range(*i.indices(mylen))) 347 removed.reverse() 348 # fixup indices in token dictionary 349 for name in self.__tokdict: 350 occurrences = self.__tokdict[name] 351 for j in removed: 352 for k, (value, position) in enumerate(occurrences): 353 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 354 else: 355 del self.__tokdict[i]
356
357 - def __contains__( self, k ):
358 return k in self.__tokdict
359
360 - def __len__( self ): return len( self.__toklist )
361 - def __bool__(self): return len( self.__toklist ) > 0
362 __nonzero__ = __bool__
363 - def __iter__( self ): return iter( self.__toklist )
364 - def __reversed__( self ): return iter( reversed(self.__toklist) )
365 - def keys( self ):
366 """Returns all named result keys.""" 367 return self.__tokdict.keys()
368
369 - def pop( self, index=-1 ):
370 """Removes and returns item at specified index (default=last). 371 Will work with either numeric indices or dict-key indicies.""" 372 ret = self[index] 373 del self[index] 374 return ret
375
376 - def get(self, key, defaultValue=None):
377 """Returns named result matching the given key, or if there is no 378 such name, then returns the given defaultValue or None if no 379 defaultValue is specified.""" 380 if key in self: 381 return self[key] 382 else: 383 return defaultValue
384
385 - def insert( self, index, insStr ):
386 self.__toklist.insert(index, insStr) 387 # fixup indices in token dictionary 388 for name in self.__tokdict: 389 occurrences = self.__tokdict[name] 390 for k, (value, position) in enumerate(occurrences): 391 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
392
393 - def items( self ):
394 """Returns all named result keys and values as a list of tuples.""" 395 return [(k,self[k]) for k in self.__tokdict]
396
397 - def values( self ):
398 """Returns all named result values.""" 399 return [ v[-1][0] for v in self.__tokdict.values() ]
400
401 - def __getattr__( self, name ):
402 if name not in self.__slots__: 403 if name in self.__tokdict: 404 if name not in self.__accumNames: 405 return self.__tokdict[name][-1][0] 406 else: 407 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 408 else: 409 return "" 410 return None
411
412 - def __add__( self, other ):
413 ret = self.copy() 414 ret += other 415 return ret
416
417 - def __iadd__( self, other ):
418 if other.__tokdict: 419 offset = len(self.__toklist) 420 addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) 421 otheritems = other.__tokdict.items() 422 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 423 for (k,vlist) in otheritems for v in vlist] 424 for k,v in otherdictitems: 425 self[k] = v 426 if isinstance(v[0],ParseResults): 427 v[0].__parent = wkref(self) 428 429 self.__toklist += other.__toklist 430 self.__accumNames.update( other.__accumNames ) 431 del other 432 return self
433
434 - def __repr__( self ):
435 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
436
437 - def __str__( self ):
438 out = "[" 439 sep = "" 440 for i in self.__toklist: 441 if isinstance(i, ParseResults): 442 out += sep + _ustr(i) 443 else: 444 out += sep + repr(i) 445 sep = ", " 446 out += "]" 447 return out
448
449 - def _asStringList( self, sep='' ):
450 out = [] 451 for item in self.__toklist: 452 if out and sep: 453 out.append(sep) 454 if isinstance( item, ParseResults ): 455 out += item._asStringList() 456 else: 457 out.append( _ustr(item) ) 458 return out
459
460 - def asList( self ):
461 """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 462 out = [] 463 for res in self.__toklist: 464 if isinstance(res,ParseResults): 465 out.append( res.asList() ) 466 else: 467 out.append( res ) 468 return out
469
470 - def asDict( self ):
471 """Returns the named parse results as dictionary.""" 472 return dict( self.items() )
473
474 - def copy( self ):
475 """Returns a new copy of a ParseResults object.""" 476 ret = ParseResults( self.__toklist ) 477 ret.__tokdict = self.__tokdict.copy() 478 ret.__parent = self.__parent 479 ret.__accumNames.update( self.__accumNames ) 480 ret.__name = self.__name 481 return ret
482
483 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
484 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 485 nl = "\n" 486 out = [] 487 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() 488 for v in vlist ] ) 489 nextLevelIndent = indent + " " 490 491 # collapse out indents if formatting is not desired 492 if not formatted: 493 indent = "" 494 nextLevelIndent = "" 495 nl = "" 496 497 selfTag = None 498 if doctag is not None: 499 selfTag = doctag 500 else: 501 if self.__name: 502 selfTag = self.__name 503 504 if not selfTag: 505 if namedItemsOnly: 506 return "" 507 else: 508 selfTag = "ITEM" 509 510 out += [ nl, indent, "<", selfTag, ">" ] 511 512 worklist = self.__toklist 513 for i,res in enumerate(worklist): 514 if isinstance(res,ParseResults): 515 if i in namedItems: 516 out += [ res.asXML(namedItems[i], 517 namedItemsOnly and doctag is None, 518 nextLevelIndent, 519 formatted)] 520 else: 521 out += [ res.asXML(None, 522 namedItemsOnly and doctag is None, 523 nextLevelIndent, 524 formatted)] 525 else: 526 # individual token, see if there is a name for it 527 resTag = None 528 if i in namedItems: 529 resTag = namedItems[i] 530 if not resTag: 531 if namedItemsOnly: 532 continue 533 else: 534 resTag = "ITEM" 535 xmlBodyText = _xml_escape(_ustr(res)) 536 out += [ nl, nextLevelIndent, "<", resTag, ">", 537 xmlBodyText, 538 "</", resTag, ">" ] 539 540 out += [ nl, indent, "</", selfTag, ">" ] 541 return "".join(out)
542
543 - def __lookup(self,sub):
544 for k,vlist in self.__tokdict.items(): 545 for v,loc in vlist: 546 if sub is v: 547 return k 548 return None
549
550 - def getName(self):
551 """Returns the results name for this token expression.""" 552 if self.__name: 553 return self.__name 554 elif self.__parent: 555 par = self.__parent() 556 if par: 557 return par.__lookup(self) 558 else: 559 return None 560 elif (len(self) == 1 and 561 len(self.__tokdict) == 1 and 562 self.__tokdict.values()[0][0][1] in (0,-1)): 563 return self.__tokdict.keys()[0] 564 else: 565 return None
566
567 - def dump(self,indent='',depth=0):
568 """Diagnostic method for listing out the contents of a ParseResults. 569 Accepts an optional indent argument so that this string can be embedded 570 in a nested display of other data.""" 571 out = [] 572 out.append( indent+_ustr(self.asList()) ) 573 keys = self.items() 574 keys.sort() 575 for k,v in keys: 576 if out: 577 out.append('\n') 578 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 579 if isinstance(v,ParseResults): 580 if v.keys(): 581 #~ out.append('\n') 582 out.append( v.dump(indent,depth+1) ) 583 #~ out.append('\n') 584 else: 585 out.append(_ustr(v)) 586 else: 587 out.append(_ustr(v)) 588 #~ out.append('\n') 589 return "".join(out)
590 591 # add support for pickle protocol
592 - def __getstate__(self):
593 return ( self.__toklist, 594 ( self.__tokdict.copy(), 595 self.__parent is not None and self.__parent() or None, 596 self.__accumNames, 597 self.__name ) )
598
599 - def __setstate__(self,state):
600 self.__toklist = state[0] 601 self.__tokdict, \ 602 par, \ 603 inAccumNames, \ 604 self.__name = state[1] 605 self.__accumNames = {} 606 self.__accumNames.update(inAccumNames) 607 if par is not None: 608 self.__parent = wkref(par) 609 else: 610 self.__parent = None
611
612 - def __dir__(self):
613 return dir(super(ParseResults,self)) + self.keys()
614
615 -def col (loc,strg):
616 """Returns current column within a string, counting newlines as line separators. 617 The first column is number 1. 618 619 Note: the default parsing behavior is to expand tabs in the input string 620 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 621 on parsing strings containing <TAB>s, and suggested methods to maintain a 622 consistent view of the parsed string, the parse location, and line and column 623 positions within the parsed string. 624 """ 625 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
626
627 -def lineno(loc,strg):
628 """Returns current line number within a string, counting newlines as line separators. 629 The first line is number 1. 630 631 Note: the default parsing behavior is to expand tabs in the input string 632 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 633 on parsing strings containing <TAB>s, and suggested methods to maintain a 634 consistent view of the parsed string, the parse location, and line and column 635 positions within the parsed string. 636 """ 637 return strg.count("\n",0,loc) + 1
638
639 -def line( loc, strg ):
640 """Returns the line of text containing loc within a string, counting newlines as line separators. 641 """ 642 lastCR = strg.rfind("\n", 0, loc) 643 nextCR = strg.find("\n", loc) 644 if nextCR > 0: 645 return strg[lastCR+1:nextCR] 646 else: 647 return strg[lastCR+1:]
648
649 -def _defaultStartDebugAction( instring, loc, expr ):
650 print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
651
652 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
653 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
654
655 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
656 print ("Exception raised:" + _ustr(exc))
657
658 -def nullDebugAction(*args):
659 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 660 pass
661
662 -class ParserElement(object):
663 """Abstract base level parser element class.""" 664 DEFAULT_WHITE_CHARS = " \n\t\r" 665
666 - def setDefaultWhitespaceChars( chars ):
667 """Overrides the default whitespace chars 668 """ 669 ParserElement.DEFAULT_WHITE_CHARS = chars
670 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) 671
672 - def __init__( self, savelist=False ):
673 self.parseAction = list() 674 self.failAction = None 675 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 676 self.strRepr = None 677 self.resultsName = None 678 self.saveAsList = savelist 679 self.skipWhitespace = True 680 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 681 self.copyDefaultWhiteChars = True 682 self.mayReturnEmpty = False # used when checking for left-recursion 683 self.keepTabs = False 684 self.ignoreExprs = list() 685 self.debug = False 686 self.streamlined = False 687 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 688 self.errmsg = "" 689 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 690 self.debugActions = ( None, None, None ) #custom debug actions 691 self.re = None 692 self.callPreparse = True # used to avoid redundant calls to preParse 693 self.callDuringTry = False
694
695 - def copy( self ):
696 """Make a copy of this ParserElement. Useful for defining different parse actions 697 for the same parsing pattern, using copies of the original parse element.""" 698 cpy = copy.copy( self ) 699 cpy.parseAction = self.parseAction[:] 700 cpy.ignoreExprs = self.ignoreExprs[:] 701 if self.copyDefaultWhiteChars: 702 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 703 return cpy
704
705 - def setName( self, name ):
706 """Define name for this expression, for use in debugging.""" 707 self.name = name 708 self.errmsg = "Expected " + self.name 709 if hasattr(self,"exception"): 710 self.exception.msg = self.errmsg 711 return self
712
713 - def setResultsName( self, name, listAllMatches=False ):
714 """Define name for referencing matching tokens as a nested attribute 715 of the returned parse results. 716 NOTE: this returns a *copy* of the original ParserElement object; 717 this is so that the client can define a basic element, such as an 718 integer, and reference it in multiple places with different names. 719 """ 720 newself = self.copy() 721 newself.resultsName = name 722 newself.modalResults = not listAllMatches 723 return newself
724
725 - def setBreak(self,breakFlag = True):
726 """Method to invoke the Python pdb debugger when this element is 727 about to be parsed. Set breakFlag to True to enable, False to 728 disable. 729 """ 730 if breakFlag: 731 _parseMethod = self._parse 732 def breaker(instring, loc, doActions=True, callPreParse=True): 733 import pdb 734 pdb.set_trace() 735 return _parseMethod( instring, loc, doActions, callPreParse )
736 breaker._originalParseMethod = _parseMethod 737 self._parse = breaker 738 else: 739 if hasattr(self._parse,"_originalParseMethod"): 740 self._parse = self._parse._originalParseMethod 741 return self
742
743 - def _normalizeParseActionArgs( f ):
744 """Internal method used to decorate parse actions that take fewer than 3 arguments, 745 so that all parse actions can be called as f(s,l,t).""" 746 STAR_ARGS = 4 747 748 try: 749 restore = None 750 if isinstance(f,type): 751 restore = f 752 f = f.__init__ 753 if not _PY3K: 754 codeObj = f.func_code 755 else: 756 codeObj = f.code 757 if codeObj.co_flags & STAR_ARGS: 758 return f 759 numargs = codeObj.co_argcount 760 if not _PY3K: 761 if hasattr(f,"im_self"): 762 numargs -= 1 763 else: 764 if hasattr(f,"__self__"): 765 numargs -= 1 766 if restore: 767 f = restore 768 except AttributeError: 769 try: 770 if not _PY3K: 771 call_im_func_code = f.__call__.im_func.func_code 772 else: 773 call_im_func_code = f.__code__ 774 775 # not a function, must be a callable object, get info from the 776 # im_func binding of its bound __call__ method 777 if call_im_func_code.co_flags & STAR_ARGS: 778 return f 779 numargs = call_im_func_code.co_argcount 780 if not _PY3K: 781 if hasattr(f.__call__,"im_self"): 782 numargs -= 1 783 else: 784 if hasattr(f.__call__,"__self__"): 785 numargs -= 0 786 except AttributeError: 787 if not _PY3K: 788 call_func_code = f.__call__.func_code 789 else: 790 call_func_code = f.__call__.__code__ 791 # not a bound method, get info directly from __call__ method 792 if call_func_code.co_flags & STAR_ARGS: 793 return f 794 numargs = call_func_code.co_argcount 795 if not _PY3K: 796 if hasattr(f.__call__,"im_self"): 797 numargs -= 1 798 else: 799 if hasattr(f.__call__,"__self__"): 800 numargs -= 1 801 802 803 #~ print ("adding function %s with %d args" % (f.func_name,numargs)) 804 if numargs == 3: 805 return f 806 else: 807 if numargs > 3: 808 def tmp(s,l,t): 809 return f(f.__call__.__self__, s,l,t)
810 if numargs == 2: 811 def tmp(s,l,t): 812 return f(l,t) 813 elif numargs == 1: 814 def tmp(s,l,t): 815 return f(t) 816 else: #~ numargs == 0: 817 def tmp(s,l,t): 818 return f() 819 try: 820 tmp.__name__ = f.__name__ 821 except (AttributeError,TypeError): 822 # no need for special handling if attribute doesnt exist 823 pass 824 try: 825 tmp.__doc__ = f.__doc__ 826 except (AttributeError,TypeError): 827 # no need for special handling if attribute doesnt exist 828 pass 829 try: 830 tmp.__dict__.update(f.__dict__) 831 except (AttributeError,TypeError): 832 # no need for special handling if attribute doesnt exist 833 pass 834 return tmp 835 _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs) 836
837 - def setParseAction( self, *fns, **kwargs ):
838 """Define action to perform when successfully matching parse element definition. 839 Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks), 840 fn(loc,toks), fn(toks), or just fn(), where: 841 - s = the original string being parsed (see note below) 842 - loc = the location of the matching substring 843 - toks = a list of the matched tokens, packaged as a ParseResults object 844 If the functions in fns modify the tokens, they can return them as the return 845 value from fn, and the modified list of tokens will replace the original. 846 Otherwise, fn does not need to return any value. 847 848 Note: the default parsing behavior is to expand tabs in the input string 849 before starting the parsing process. See L{I{parseString}<parseString>} for more information 850 on parsing strings containing <TAB>s, and suggested methods to maintain a 851 consistent view of the parsed string, the parse location, and line and column 852 positions within the parsed string. 853 """ 854 self.parseAction = list(map(self._normalizeParseActionArgs, list(fns))) 855 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 856 return self
857
858 - def addParseAction( self, *fns, **kwargs ):
859 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 860 self.parseAction += list(map(self._normalizeParseActionArgs, list(fns))) 861 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 862 return self
863
864 - def setFailAction( self, fn ):
865 """Define action to perform if parsing fails at this expression. 866 Fail acton fn is a callable function that takes the arguments 867 fn(s,loc,expr,err) where: 868 - s = string being parsed 869 - loc = location where expression match was attempted and failed 870 - expr = the parse expression that failed 871 - err = the exception thrown 872 The function returns no value. It may throw ParseFatalException 873 if it is desired to stop parsing immediately.""" 874 self.failAction = fn 875 return self
876
877 - def _skipIgnorables( self, instring, loc ):
878 exprsFound = True 879 while exprsFound: 880 exprsFound = False 881 for e in self.ignoreExprs: 882 try: 883 while 1: 884 loc,dummy = e._parse( instring, loc ) 885 exprsFound = True 886 except ParseException: 887 pass 888 return loc
889
890 - def preParse( self, instring, loc ):
891 if self.ignoreExprs: 892 loc = self._skipIgnorables( instring, loc ) 893 894 if self.skipWhitespace: 895 wt = self.whiteChars 896 instrlen = len(instring) 897 while loc < instrlen and instring[loc] in wt: 898 loc += 1 899 900 return loc
901
902 - def parseImpl( self, instring, loc, doActions=True ):
903 return loc, []
904
905 - def postParse( self, instring, loc, tokenlist ):
906 return tokenlist
907 908 #~ @profile
909 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
910 debugging = ( self.debug ) #and doActions ) 911 912 if debugging or self.failAction: 913 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 914 if (self.debugActions[0] ): 915 self.debugActions[0]( instring, loc, self ) 916 if callPreParse and self.callPreparse: 917 preloc = self.preParse( instring, loc ) 918 else: 919 preloc = loc 920 tokensStart = loc 921 try: 922 try: 923 loc,tokens = self.parseImpl( instring, preloc, doActions ) 924 except IndexError: 925 raise ParseException( instring, len(instring), self.errmsg, self ) 926 except ParseBaseException, err: 927 #~ print ("Exception raised:", err) 928 if self.debugActions[2]: 929 self.debugActions[2]( instring, tokensStart, self, err ) 930 if self.failAction: 931 self.failAction( instring, tokensStart, self, err ) 932 raise 933 else: 934 if callPreParse and self.callPreparse: 935 preloc = self.preParse( instring, loc ) 936 else: 937 preloc = loc 938 tokensStart = loc 939 if self.mayIndexError or loc >= len(instring): 940 try: 941 loc,tokens = self.parseImpl( instring, preloc, doActions ) 942 except IndexError: 943 raise ParseException( instring, len(instring), self.errmsg, self ) 944 else: 945 loc,tokens = self.parseImpl( instring, preloc, doActions ) 946 947 tokens = self.postParse( instring, loc, tokens ) 948 949 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 950 if self.parseAction and (doActions or self.callDuringTry): 951 if debugging: 952 try: 953 for fn in self.parseAction: 954 tokens = fn( instring, tokensStart, retTokens ) 955 if tokens is not None: 956 retTokens = ParseResults( tokens, 957 self.resultsName, 958 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 959 modal=self.modalResults ) 960 except ParseBaseException, err: 961 #~ print "Exception raised in user parse action:", err 962 if (self.debugActions[2] ): 963 self.debugActions[2]( instring, tokensStart, self, err ) 964 raise 965 else: 966 for fn in self.parseAction: 967 tokens = fn( instring, tokensStart, retTokens ) 968 if tokens is not None: 969 retTokens = ParseResults( tokens, 970 self.resultsName, 971 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 972 modal=self.modalResults ) 973 974 if debugging: 975 #~ print ("Matched",self,"->",retTokens.asList()) 976 if (self.debugActions[1] ): 977 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 978 979 return loc, retTokens
980
981 - def tryParse( self, instring, loc ):
982 try: 983 return self._parse( instring, loc, doActions=False )[0] 984 except ParseFatalException: 985 raise ParseException( instring, loc, self.errmsg, self)
986 987 # this method gets repeatedly called during backtracking with the same arguments - 988 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
989 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
990 lookup = (self,instring,loc,callPreParse,doActions) 991 if lookup in ParserElement._exprArgCache: 992 value = ParserElement._exprArgCache[ lookup ] 993 if isinstance(value,Exception): 994 raise value 995 return value 996 else: 997 try: 998 value = self._parseNoCache( instring, loc, doActions, callPreParse ) 999 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 1000 return value 1001 except ParseBaseException, pe: 1002 ParserElement._exprArgCache[ lookup ] = pe 1003 raise
1004 1005 _parse = _parseNoCache 1006 1007 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1008 _exprArgCache = {}
1009 - def resetCache():
1010 ParserElement._exprArgCache.clear()
1011 resetCache = staticmethod(resetCache) 1012 1013 _packratEnabled = False
1014 - def enablePackrat():
1015 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1016 Repeated parse attempts at the same string location (which happens 1017 often in many complex grammars) can immediately return a cached value, 1018 instead of re-executing parsing/validating code. Memoizing is done of 1019 both valid results and parsing exceptions. 1020 1021 This speedup may break existing programs that use parse actions that 1022 have side-effects. For this reason, packrat parsing is disabled when 1023 you first import pyparsing. To activate the packrat feature, your 1024 program must call the class method ParserElement.enablePackrat(). If 1025 your program uses psyco to "compile as you go", you must call 1026 enablePackrat before calling psyco.full(). If you do not do this, 1027 Python will crash. For best results, call enablePackrat() immediately 1028 after importing pyparsing. 1029 """ 1030 if not ParserElement._packratEnabled: 1031 ParserElement._packratEnabled = True 1032 ParserElement._parse = ParserElement._parseCache
1033 enablePackrat = staticmethod(enablePackrat) 1034
1035 - def parseString( self, instring, parseAll=False ):
1036 """Execute the parse expression with the given string. 1037 This is the main interface to the client code, once the complete 1038 expression has been built. 1039 1040 If you want the grammar to require that the entire input string be 1041 successfully parsed, then set parseAll to True (equivalent to ending 1042 the grammar with StringEnd()). 1043 1044 Note: parseString implicitly calls expandtabs() on the input string, 1045 in order to report proper column numbers in parse actions. 1046 If the input string contains tabs and 1047 the grammar uses parse actions that use the loc argument to index into the 1048 string being parsed, you can ensure you have a consistent view of the input 1049 string by: 1050 - calling parseWithTabs on your grammar before calling parseString 1051 (see L{I{parseWithTabs}<parseWithTabs>}) 1052 - define your parse action using the full (s,loc,toks) signature, and 1053 reference the input string using the parse action's s argument 1054 - explictly expand the tabs in your input string before calling 1055 parseString 1056 """ 1057 ParserElement.resetCache() 1058 if not self.streamlined: 1059 self.streamline() 1060 #~ self.saveAsList = True 1061 for e in self.ignoreExprs: 1062 e.streamline() 1063 if not self.keepTabs: 1064 instring = instring.expandtabs() 1065 loc, tokens = self._parse( instring, 0 ) 1066 if parseAll: 1067 loc = self.preParse( instring, loc ) 1068 StringEnd()._parse( instring, loc ) 1069 return tokens
1070
1071 - def scanString( self, instring, maxMatches=_MAX_INT ):
1072 """Scan the input string for expression matches. Each match will return the 1073 matching tokens, start location, and end location. May be called with optional 1074 maxMatches argument, to clip scanning after 'n' matches are found. 1075 1076 Note that the start and end locations are reported relative to the string 1077 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1078 strings with embedded tabs.""" 1079 if not self.streamlined: 1080 self.streamline() 1081 for e in self.ignoreExprs: 1082 e.streamline() 1083 1084 if not self.keepTabs: 1085 instring = _ustr(instring).expandtabs() 1086 instrlen = len(instring) 1087 loc = 0 1088 preparseFn = self.preParse 1089 parseFn = self._parse 1090 ParserElement.resetCache() 1091 matches = 0 1092 while loc <= instrlen and matches < maxMatches: 1093 try: 1094 preloc = preparseFn( instring, loc ) 1095 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1096 except ParseException: 1097 loc = preloc+1 1098 else: 1099 matches += 1 1100 yield tokens, preloc, nextLoc 1101 loc = nextLoc
1102
1103 - def transformString( self, instring ):
1104 """Extension to scanString, to modify matching text with modified tokens that may 1105 be returned from a parse action. To use transformString, define a grammar and 1106 attach a parse action to it that modifies the returned token list. 1107 Invoking transformString() on a target string will then scan for matches, 1108 and replace the matched text patterns according to the logic in the parse 1109 action. transformString() returns the resulting transformed string.""" 1110 out = [] 1111 lastE = 0 1112 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1113 # keep string locs straight between transformString and scanString 1114 self.keepTabs = True 1115 for t,s,e in self.scanString( instring ): 1116 out.append( instring[lastE:s] ) 1117 if t: 1118 if isinstance(t,ParseResults): 1119 out += t.asList() 1120 elif isinstance(t,list): 1121 out += t 1122 else: 1123 out.append(t) 1124 lastE = e 1125 out.append(instring[lastE:]) 1126 return "".join(map(_ustr,out))
1127
1128 - def searchString( self, instring, maxMatches=_MAX_INT ):
1129 """Another extension to scanString, simplifying the access to the tokens found 1130 to match the given parse expression. May be called with optional 1131 maxMatches argument, to clip searching after 'n' matches are found. 1132 """ 1133 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1134
1135 - def __add__(self, other ):
1136 """Implementation of + operator - returns And""" 1137 if isinstance( other, basestring ): 1138 other = Literal( other ) 1139 if not isinstance( other, ParserElement ): 1140 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1141 SyntaxWarning, stacklevel=2) 1142 return None 1143 return And( [ self, other ] )
1144
1145 - def __radd__(self, other ):
1146 """Implementation of + operator when left operand is not a ParserElement""" 1147 if isinstance( other, basestring ): 1148 other = Literal( other ) 1149 if not isinstance( other, ParserElement ): 1150 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1151 SyntaxWarning, stacklevel=2) 1152 return None 1153 return other + self
1154
1155 - def __sub__(self, other):
1156 """Implementation of - operator, returns And with error stop""" 1157 if isinstance( other, basestring ): 1158 other = Literal( other ) 1159 if not isinstance( other, ParserElement ): 1160 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1161 SyntaxWarning, stacklevel=2) 1162 return None 1163 return And( [ self, And._ErrorStop(), other ] )
1164
1165 - def __rsub__(self, other ):
1166 """Implementation of - operator when left operand is not a ParserElement""" 1167 if isinstance( other, basestring ): 1168 other = Literal( other ) 1169 if not isinstance( other, ParserElement ): 1170 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1171 SyntaxWarning, stacklevel=2) 1172 return None 1173 return other - self
1174
1175 - def __mul__(self,other):
1176 if isinstance(other,int): 1177 minElements, optElements = other,0 1178 elif isinstance(other,tuple): 1179 other = (other + (None, None))[:2] 1180 if other[0] is None: 1181 other = (0, other[1]) 1182 if isinstance(other[0],int) and other[1] is None: 1183 if other[0] == 0: 1184 return ZeroOrMore(self) 1185 if other[0] == 1: 1186 return OneOrMore(self) 1187 else: 1188 return self*other[0] + ZeroOrMore(self) 1189 elif isinstance(other[0],int) and isinstance(other[1],int): 1190 minElements, optElements = other 1191 optElements -= minElements 1192 else: 1193 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1194 else: 1195 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1196 1197 if minElements < 0: 1198 raise ValueError("cannot multiply ParserElement by negative value") 1199 if optElements < 0: 1200 raise ValueError("second tuple value must be greater or equal to first tuple value") 1201 if minElements == optElements == 0: 1202 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1203 1204 if (optElements): 1205 def makeOptionalList(n): 1206 if n>1: 1207 return Optional(self + makeOptionalList(n-1)) 1208 else: 1209 return Optional(self)
1210 if minElements: 1211 if minElements == 1: 1212 ret = self + makeOptionalList(optElements) 1213 else: 1214 ret = And([self]*minElements) + makeOptionalList(optElements) 1215 else: 1216 ret = makeOptionalList(optElements) 1217 else: 1218 if minElements == 1: 1219 ret = self 1220 else: 1221 ret = And([self]*minElements) 1222 return ret 1223
1224 - def __rmul__(self, other):
1225 return self.__mul__(other)
1226
1227 - def __or__(self, other ):
1228 """Implementation of | operator - returns MatchFirst""" 1229 if isinstance( other, basestring ): 1230 other = Literal( other ) 1231 if not isinstance( other, ParserElement ): 1232 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1233 SyntaxWarning, stacklevel=2) 1234 return None 1235 return MatchFirst( [ self, other ] )
1236
1237 - def __ror__(self, other ):
1238 """Implementation of | operator when left operand is not a ParserElement""" 1239 if isinstance( other, basestring ): 1240 other = Literal( other ) 1241 if not isinstance( other, ParserElement ): 1242 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1243 SyntaxWarning, stacklevel=2) 1244 return None 1245 return other | self
1246
1247 - def __xor__(self, other ):
1248 """Implementation of ^ operator - returns Or""" 1249 if isinstance( other, basestring ): 1250 other = Literal( other ) 1251 if not isinstance( other, ParserElement ): 1252 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1253 SyntaxWarning, stacklevel=2) 1254 return None 1255 return Or( [ self, other ] )
1256
1257 - def __rxor__(self, other ):
1258 """Implementation of ^ operator when left operand is not a ParserElement""" 1259 if isinstance( other, basestring ): 1260 other = Literal( other ) 1261 if not isinstance( other, ParserElement ): 1262 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1263 SyntaxWarning, stacklevel=2) 1264 return None 1265 return other ^ self
1266
1267 - def __and__(self, other ):
1268 """Implementation of & operator - returns Each""" 1269 if isinstance( other, basestring ): 1270 other = Literal( other ) 1271 if not isinstance( other, ParserElement ): 1272 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1273 SyntaxWarning, stacklevel=2) 1274 return None 1275 return Each( [ self, other ] )
1276
1277 - def __rand__(self, other ):
1278 """Implementation of & operator when left operand is not a ParserElement""" 1279 if isinstance( other, basestring ): 1280 other = Literal( other ) 1281 if not isinstance( other, ParserElement ): 1282 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1283 SyntaxWarning, stacklevel=2) 1284 return None 1285 return other & self
1286
1287 - def __invert__( self ):
1288 """Implementation of ~ operator - returns NotAny""" 1289 return NotAny( self )
1290
1291 - def __call__(self, name):
1292 """Shortcut for setResultsName, with listAllMatches=default:: 1293 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1294 could be written as:: 1295 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1296 """ 1297 return self.setResultsName(name)
1298
1299 - def suppress( self ):
1300 """Suppresses the output of this ParserElement; useful to keep punctuation from 1301 cluttering up returned output. 1302 """ 1303 return Suppress( self )
1304
1305 - def leaveWhitespace( self ):
1306 """Disables the skipping of whitespace before matching the characters in the 1307 ParserElement's defined pattern. This is normally only used internally by 1308 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 1309 """ 1310 self.skipWhitespace = False 1311 return self
1312
1313 - def setWhitespaceChars( self, chars ):
1314 """Overrides the default whitespace chars 1315 """ 1316 self.skipWhitespace = True 1317 self.whiteChars = chars 1318 self.copyDefaultWhiteChars = False 1319 return self
1320
1321 - def parseWithTabs( self ):
1322 """Overrides default behavior to expand <TAB>s to spaces before parsing the input string. 1323 Must be called before parseString when the input grammar contains elements that 1324 match <TAB> characters.""" 1325 self.keepTabs = True 1326 return self
1327
1328 - def ignore( self, other ):
1329 """Define expression to be ignored (e.g., comments) while doing pattern 1330 matching; may be called repeatedly, to define multiple comment or other 1331 ignorable patterns. 1332 """ 1333 if isinstance( other, Suppress ): 1334 if other not in self.ignoreExprs: 1335 self.ignoreExprs.append( other ) 1336 else: 1337 self.ignoreExprs.append( Suppress( other ) ) 1338 return self
1339
1340 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1341 """Enable display of debugging messages while doing pattern matching.""" 1342 self.debugActions = (startAction or _defaultStartDebugAction, 1343 successAction or _defaultSuccessDebugAction, 1344 exceptionAction or _defaultExceptionDebugAction) 1345 self.debug = True 1346 return self
1347
1348 - def setDebug( self, flag=True ):
1349 """Enable display of debugging messages while doing pattern matching. 1350 Set flag to True to enable, False to disable.""" 1351 if flag: 1352 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 1353 else: 1354 self.debug = False 1355 return self
1356
1357 - def __str__( self ):
1358 return self.name
1359
1360 - def __repr__( self ):
1361 return _ustr(self)
1362
1363 - def streamline( self ):
1364 self.streamlined = True 1365 self.strRepr = None 1366 return self
1367
1368 - def checkRecursion( self, parseElementList ):
1369 pass
1370
1371 - def validate( self, validateTrace=[] ):
1372 """Check defined expressions for valid structure, check for infinite recursive definitions.""" 1373 self.checkRecursion( [] )
1374
1375 - def parseFile( self, file_or_filename, parseAll=False ):
1376 """Execute the parse expression on the given file or filename. 1377 If a filename is specified (instead of a file object), 1378 the entire file is opened, read, and closed before parsing. 1379 """ 1380 try: 1381 file_contents = file_or_filename.read() 1382 except AttributeError: 1383 f = open(file_or_filename, "rb") 1384 file_contents = f.read() 1385 f.close() 1386 return self.parseString(file_contents, parseAll)
1387
1388 - def getException(self):
1389 return ParseException("",0,self.errmsg,self)
1390
1391 - def __getattr__(self,aname):
1392 if aname == "myException": 1393 self.myException = ret = self.getException(); 1394 return ret; 1395 else: 1396 raise AttributeError("no such attribute " + aname)
1397
1398 - def __eq__(self,other):
1399 if isinstance(other, basestring): 1400 try: 1401 (self + StringEnd()).parseString(_ustr(other)) 1402 return True 1403 except ParseBaseException: 1404 return False 1405 else: 1406 return super(ParserElement,self)==other
1407
1408 - def __ne__(self,other):
1409 return not (self == other)
1410
1411 - def __hash__(self):
1412 return hash(id(self))
1413
1414 - def __req__(self,other):
1415 return self == other
1416
1417 - def __rne__(self,other):
1418 return not (self == other)
1419 1420
1421 -class Token(ParserElement):
1422 """Abstract ParserElement subclass, for defining atomic matching patterns."""
1423 - def __init__( self ):
1424 super(Token,self).__init__( savelist=False )
1425 #self.myException = ParseException("",0,"",self) 1426
1427 - def setName(self, name):
1428 s = super(Token,self).setName(name) 1429 self.errmsg = "Expected " + self.name 1430 #s.myException.msg = self.errmsg 1431 return s
1432 1433
1434 -class Empty(Token):
1435 """An empty token, will always match."""
1436 - def __init__( self ):
1437 super(Empty,self).__init__() 1438 self.name = "Empty" 1439 self.mayReturnEmpty = True 1440 self.mayIndexError = False
1441 1442
1443 -class NoMatch(Token):
1444 """A token that will never match."""
1445 - def __init__( self ):
1446 super(NoMatch,self).__init__() 1447 self.name = "NoMatch" 1448 self.mayReturnEmpty = True 1449 self.mayIndexError = False 1450 self.errmsg = "Unmatchable token"
1451 #self.myException.msg = self.errmsg 1452
1453 - def parseImpl( self, instring, loc, doActions=True ):
1454 exc = self.myException 1455 exc.loc = loc 1456 exc.pstr = instring 1457 raise exc
1458 1459
1460 -class Literal(Token):
1461 """Token to exactly match a specified string."""
1462 - def __init__( self, matchString ):
1463 super(Literal,self).__init__() 1464 self.match = matchString 1465 self.matchLen = len(matchString) 1466 try: 1467 self.firstMatchChar = matchString[0] 1468 except IndexError: 1469 warnings.warn("null string passed to Literal; use Empty() instead", 1470 SyntaxWarning, stacklevel=2) 1471 self.__class__ = Empty 1472 self.name = '"%s"' % _ustr(self.match) 1473 self.errmsg = "Expected " + self.name 1474 self.mayReturnEmpty = False 1475 #self.myException.msg = self.errmsg 1476 self.mayIndexError = False
1477 1478 # Performance tuning: this routine gets called a *lot* 1479 # if this is a single character match string and the first character matches, 1480 # short-circuit as quickly as possible, and avoid calling startswith 1481 #~ @profile
1482 - def parseImpl( self, instring, loc, doActions=True ):
1483 if (instring[loc] == self.firstMatchChar and 1484 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 1485 return loc+self.matchLen, self.match 1486 #~ raise ParseException( instring, loc, self.errmsg ) 1487 exc = self.myException 1488 exc.loc = loc 1489 exc.pstr = instring 1490 raise exc
1491 _L = Literal 1492
1493 -class Keyword(Token):
1494 """Token to exactly match a specified string as a keyword, that is, it must be 1495 immediately followed by a non-keyword character. Compare with Literal:: 1496 Literal("if") will match the leading 'if' in 'ifAndOnlyIf'. 1497 Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)' 1498 Accepts two optional constructor arguments in addition to the keyword string: 1499 identChars is a string of characters that would be valid identifier characters, 1500 defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive 1501 matching, default is False. 1502 """ 1503 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 1504
1505 - def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1506 super(Keyword,self).__init__() 1507 self.match = matchString 1508 self.matchLen = len(matchString) 1509 try: 1510 self.firstMatchChar = matchString[0] 1511 except IndexError: 1512 warnings.warn("null string passed to Keyword; use Empty() instead", 1513 SyntaxWarning, stacklevel=2) 1514 self.name = '"%s"' % self.match 1515 self.errmsg = "Expected " + self.name 1516 self.mayReturnEmpty = False 1517 #self.myException.msg = self.errmsg 1518 self.mayIndexError = False 1519 self.caseless = caseless 1520 if caseless: 1521 self.caselessmatch = matchString.upper() 1522 identChars = identChars.upper() 1523 self.identChars = _str2dict(identChars)
1524
1525 - def parseImpl( self, instring, loc, doActions=True ):
1526 if self.caseless: 1527 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1528 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 1529 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 1530 return loc+self.matchLen, self.match 1531 else: 1532 if (instring[loc] == self.firstMatchChar and 1533 (self.matchLen==1 or instring.startswith(self.match,loc)) and 1534 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 1535 (loc == 0 or instring[loc-1] not in self.identChars) ): 1536 return loc+self.matchLen, self.match 1537 #~ raise ParseException( instring, loc, self.errmsg ) 1538 exc = self.myException 1539 exc.loc = loc 1540 exc.pstr = instring 1541 raise exc
1542
1543 - def copy(self):
1544 c = super(Keyword,self).copy() 1545 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 1546 return c
1547
1548 - def setDefaultKeywordChars( chars ):
1549 """Overrides the default Keyword chars 1550 """ 1551 Keyword.DEFAULT_KEYWORD_CHARS = chars
1552 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1553
1554 -class CaselessLiteral(Literal):
1555 """Token to match a specified string, ignoring case of letters. 1556 Note: the matched results will always be in the case of the given 1557 match string, NOT the case of the input text. 1558 """
1559 - def __init__( self, matchString ):
1560 super(CaselessLiteral,self).__init__( matchString.upper() ) 1561 # Preserve the defining literal. 1562 self.returnString = matchString 1563 self.name = "'%s'" % self.returnString 1564 self.errmsg = "Expected " + self.name
1565 #self.myException.msg = self.errmsg 1566
1567 - def parseImpl( self, instring, loc, doActions=True ):
1568 if instring[ loc:loc+self.matchLen ].upper() == self.match: 1569 return loc+self.matchLen, self.returnString 1570 #~ raise ParseException( instring, loc, self.errmsg ) 1571 exc = self.myException 1572 exc.loc = loc 1573 exc.pstr = instring 1574 raise exc
1575
1576 -class CaselessKeyword(Keyword):
1577 - def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1578 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1579
1580 - def parseImpl( self, instring, loc, doActions=True ):
1581 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1582 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 1583 return loc+self.matchLen, self.match 1584 #~ raise ParseException( instring, loc, self.errmsg ) 1585 exc = self.myException 1586 exc.loc = loc 1587 exc.pstr = instring 1588 raise exc
1589
1590 -class Word(Token):
1591 """Token for matching words composed of allowed character sets. 1592 Defined with string containing all allowed initial characters, 1593 an optional string containing allowed body characters (if omitted, 1594 defaults to the initial character set), and an optional minimum, 1595 maximum, and/or exact length. The default value for min is 1 (a 1596 minimum value < 1 is not valid); the default values for max and exact 1597 are 0, meaning no maximum or exact length restriction. 1598 """
1599 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
1600 super(Word,self).__init__() 1601 self.initCharsOrig = initChars 1602 self.initChars = _str2dict(initChars) 1603 if bodyChars : 1604 self.bodyCharsOrig = bodyChars 1605 self.bodyChars = _str2dict(bodyChars) 1606 else: 1607 self.bodyCharsOrig = initChars 1608 self.bodyChars = _str2dict(initChars) 1609 1610 self.maxSpecified = max > 0 1611 1612 if min < 1: 1613 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 1614 1615 self.minLen = min 1616 1617 if max > 0: 1618 self.maxLen = max 1619 else: 1620 self.maxLen = _MAX_INT 1621 1622 if exact > 0: 1623 self.maxLen = exact 1624 self.minLen = exact 1625 1626 self.name = _ustr(self) 1627 self.errmsg = "Expected " + self.name 1628 #self.myException.msg = self.errmsg 1629 self.mayIndexError = False 1630 self.asKeyword = asKeyword 1631 1632 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 1633 if self.bodyCharsOrig == self.initCharsOrig: 1634 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 1635 elif len(self.bodyCharsOrig) == 1: 1636 self.reString = "%s[%s]*" % \ 1637 (re.escape(self.initCharsOrig), 1638 _escapeRegexRangeChars(self.bodyCharsOrig),) 1639 else: 1640 self.reString = "[%s][%s]*" % \ 1641 (_escapeRegexRangeChars(self.initCharsOrig), 1642 _escapeRegexRangeChars(self.bodyCharsOrig),) 1643 if self.asKeyword: 1644 self.reString = r"\b"+self.reString+r"\b" 1645 try: 1646 self.re = re.compile( self.reString ) 1647 except: 1648 self.re = None
1649
1650 - def parseImpl( self, instring, loc, doActions=True ):
1651 if self.re: 1652 result = self.re.match(instring,loc) 1653 if not result: 1654 exc = self.myException 1655 exc.loc = loc 1656 exc.pstr = instring 1657 raise exc 1658 1659 loc = result.end() 1660 return loc,result.group() 1661 1662 if not(instring[ loc ] in self.initChars): 1663 #~ raise ParseException( instring, loc, self.errmsg ) 1664 exc = self.myException 1665 exc.loc = loc 1666 exc.pstr = instring 1667 raise exc 1668 start = loc 1669 loc += 1 1670 instrlen = len(instring) 1671 bodychars = self.bodyChars 1672 maxloc = start + self.maxLen 1673 maxloc = min( maxloc, instrlen ) 1674 while loc < maxloc and instring[loc] in bodychars: 1675 loc += 1 1676 1677 throwException = False 1678 if loc - start < self.minLen: 1679 throwException = True 1680 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 1681 throwException = True 1682 if self.asKeyword: 1683 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 1684 throwException = True 1685 1686 if throwException: 1687 #~ raise ParseException( instring, loc, self.errmsg ) 1688 exc = self.myException 1689 exc.loc = loc 1690 exc.pstr = instring 1691 raise exc 1692 1693 return loc, instring[start:loc]
1694
1695 - def __str__( self ):
1696 try: 1697 return super(Word,self).__str__() 1698 except: 1699 pass 1700 1701 1702 if self.strRepr is None: 1703 1704 def charsAsStr(s): 1705 if len(s)>4: 1706 return s[:4]+"..." 1707 else: 1708 return s
1709 1710 if ( self.initCharsOrig != self.bodyCharsOrig ): 1711 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 1712 else: 1713 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 1714 1715 return self.strRepr
1716 1717
1718 -class Regex(Token):
1719 """Token for matching strings that match a given regular expression. 1720 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 1721 """
1722 - def __init__( self, pattern, flags=0):
1723 """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags.""" 1724 super(Regex,self).__init__() 1725 1726 if len(pattern) == 0: 1727 warnings.warn("null string passed to Regex; use Empty() instead", 1728 SyntaxWarning, stacklevel=2) 1729 1730 self.pattern = pattern 1731 self.flags = flags 1732 1733 try: 1734 self.re = re.compile(self.pattern, self.flags) 1735 self.reString = self.pattern 1736 except sre_constants.error: 1737 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 1738 SyntaxWarning, stacklevel=2) 1739 raise 1740 1741 self.name = _ustr(self) 1742 self.errmsg = "Expected " + self.name 1743 #self.myException.msg = self.errmsg 1744 self.mayIndexError = False 1745 self.mayReturnEmpty = True
1746
1747 - def parseImpl( self, instring, loc, doActions=True ):
1748 result = self.re.match(instring,loc) 1749 if not result: 1750 exc = self.myException 1751 exc.loc = loc 1752 exc.pstr = instring 1753 raise exc 1754 1755 loc = result.end() 1756 d = result.groupdict() 1757 ret = ParseResults(result.group()) 1758 if d: 1759 for k in d: 1760 ret[k] = d[k] 1761 return loc,ret
1762
1763 - def __str__( self ):
1764 try: 1765 return super(Regex,self).__str__() 1766 except: 1767 pass 1768 1769 if self.strRepr is None: 1770 self.strRepr = "Re:(%s)" % repr(self.pattern) 1771 1772 return self.strRepr
1773 1774
1775 -class QuotedString(Token):
1776 """Token for matching strings that are delimited by quoting characters. 1777 """
1778 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1779 """ 1780 Defined with the following parameters: 1781 - quoteChar - string of one or more characters defining the quote delimiting string 1782 - escChar - character to escape quotes, typically backslash (default=None) 1783 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 1784 - multiline - boolean indicating whether quotes can span multiple lines (default=False) 1785 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) 1786 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) 1787 """ 1788 super(QuotedString,self).__init__() 1789 1790 # remove white space from quote chars - wont work anyway 1791 quoteChar = quoteChar.strip() 1792 if len(quoteChar) == 0: 1793 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1794 raise SyntaxError() 1795 1796 if endQuoteChar is None: 1797 endQuoteChar = quoteChar 1798 else: 1799 endQuoteChar = endQuoteChar.strip() 1800 if len(endQuoteChar) == 0: 1801 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1802 raise SyntaxError() 1803 1804 self.quoteChar = quoteChar 1805 self.quoteCharLen = len(quoteChar) 1806 self.firstQuoteChar = quoteChar[0] 1807 self.endQuoteChar = endQuoteChar 1808 self.endQuoteCharLen = len(endQuoteChar) 1809 self.escChar = escChar 1810 self.escQuote = escQuote 1811 self.unquoteResults = unquoteResults 1812 1813 if multiline: 1814 self.flags = re.MULTILINE | re.DOTALL 1815 self.pattern = r'%s(?:[^%s%s]' % \ 1816 ( re.escape(self.quoteChar), 1817 _escapeRegexRangeChars(self.endQuoteChar[0]), 1818 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1819 else: 1820 self.flags = 0 1821 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 1822 ( re.escape(self.quoteChar), 1823 _escapeRegexRangeChars(self.endQuoteChar[0]), 1824 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1825 if len(self.endQuoteChar) > 1: 1826 self.pattern += ( 1827 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 1828 _escapeRegexRangeChars(self.endQuoteChar[i])) 1829 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' 1830 ) 1831 if escQuote: 1832 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 1833 if escChar: 1834 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 1835 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 1836 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 1837 1838 try: 1839 self.re = re.compile(self.pattern, self.flags) 1840 self.reString = self.pattern 1841 except sre_constants.error: 1842 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 1843 SyntaxWarning, stacklevel=2) 1844 raise 1845 1846 self.name = _ustr(self) 1847 self.errmsg = "Expected " + self.name 1848 #self.myException.msg = self.errmsg 1849 self.mayIndexError = False 1850 self.mayReturnEmpty = True
1851
1852 - def parseImpl( self, instring, loc, doActions=True ):
1853 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 1854 if not result: 1855 exc = self.myException 1856 exc.loc = loc 1857 exc.pstr = instring 1858 raise exc 1859 1860 loc = result.end() 1861 ret = result.group() 1862 1863 if self.unquoteResults: 1864 1865 # strip off quotes 1866 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 1867 1868 if isinstance(ret,basestring): 1869 # replace escaped characters 1870 if self.escChar: 1871 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 1872 1873 # replace escaped quotes 1874 if self.escQuote: 1875 ret = ret.replace(self.escQuote, self.endQuoteChar) 1876 1877 return loc, ret
1878
1879 - def __str__( self ):
1880 try: 1881 return super(QuotedString,self).__str__() 1882 except: 1883 pass 1884 1885 if self.strRepr is None: 1886 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 1887 1888 return self.strRepr
1889 1890
1891 -class CharsNotIn(Token):
1892 """Token for matching words composed of characters *not* in a given set. 1893 Defined with string containing all disallowed characters, and an optional 1894 minimum, maximum, and/or exact length. The default value for min is 1 (a 1895 minimum value < 1 is not valid); the default values for max and exact 1896 are 0, meaning no maximum or exact length restriction. 1897 """
1898 - def __init__( self, notChars, min=1, max=0, exact=0 ):
1899 super(CharsNotIn,self).__init__() 1900 self.skipWhitespace = False 1901 self.notChars = notChars 1902 1903 if min < 1: 1904 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 1905 1906 self.minLen = min 1907 1908 if max > 0: 1909 self.maxLen = max 1910 else: 1911 self.maxLen = _MAX_INT 1912 1913 if exact > 0: 1914 self.maxLen = exact 1915 self.minLen = exact 1916 1917 self.name = _ustr(self) 1918 self.errmsg = "Expected " + self.name 1919 self.mayReturnEmpty = ( self.minLen == 0 ) 1920 #self.myException.msg = self.errmsg 1921 self.mayIndexError = False
1922
1923 - def parseImpl( self, instring, loc, doActions=True ):
1924 if instring[loc] in self.notChars: 1925 #~ raise ParseException( instring, loc, self.errmsg ) 1926 exc = self.myException 1927 exc.loc = loc 1928 exc.pstr = instring 1929 raise exc 1930 1931 start = loc 1932 loc += 1 1933 notchars = self.notChars 1934 maxlen = min( start+self.maxLen, len(instring) ) 1935 while loc < maxlen and \ 1936 (instring[loc] not in notchars): 1937 loc += 1 1938 1939 if loc - start < self.minLen: 1940 #~ raise ParseException( instring, loc, self.errmsg ) 1941 exc = self.myException 1942 exc.loc = loc 1943 exc.pstr = instring 1944 raise exc 1945 1946 return loc, instring[start:loc]
1947
1948 - def __str__( self ):
1949 try: 1950 return super(CharsNotIn, self).__str__() 1951 except: 1952 pass 1953 1954 if self.strRepr is None: 1955 if len(self.notChars) > 4: 1956 self.strRepr = "!W:(%s...)" % self.notChars[:4] 1957 else: 1958 self.strRepr = "!W:(%s)" % self.notChars 1959 1960 return self.strRepr
1961
1962 -class White(Token):
1963 """Special matching class for matching whitespace. Normally, whitespace is ignored 1964 by pyparsing grammars. This class is included when some whitespace structures 1965 are significant. Define with a string containing the whitespace characters to be 1966 matched; default is " \\t\\n". Also takes optional min, max, and exact arguments, 1967 as defined for the Word class.""" 1968 whiteStrs = { 1969 " " : "<SPC>", 1970 "\t": "<TAB>", 1971 "\n": "<LF>", 1972 "\r": "<CR>", 1973 "\f": "<FF>", 1974 }
1975 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
1976 super(White,self).__init__() 1977 self.matchWhite = ws 1978 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) ) 1979 #~ self.leaveWhitespace() 1980 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) 1981 self.mayReturnEmpty = True 1982 self.errmsg = "Expected " + self.name 1983 #self.myException.msg = self.errmsg 1984 1985 self.minLen = min 1986 1987 if max > 0: 1988 self.maxLen = max 1989 else: 1990 self.maxLen = _MAX_INT 1991 1992 if exact > 0: 1993 self.maxLen = exact 1994 self.minLen = exact
1995
1996 - def parseImpl( self, instring, loc, doActions=True ):
1997 if not(instring[ loc ] in self.matchWhite): 1998 #~ raise ParseException( instring, loc, self.errmsg ) 1999 exc = self.myException 2000 exc.loc = loc 2001 exc.pstr = instring 2002 raise exc 2003 start = loc 2004 loc += 1 2005 maxloc = start + self.maxLen 2006 maxloc = min( maxloc, len(instring) ) 2007 while loc < maxloc and instring[loc] in self.matchWhite: 2008 loc += 1 2009 2010 if loc - start < self.minLen: 2011 #~ raise ParseException( instring, loc, self.errmsg ) 2012 exc = self.myException 2013 exc.loc = loc 2014 exc.pstr = instring 2015 raise exc 2016 2017 return loc, instring[start:loc]
2018 2019
2020 -class _PositionToken(Token):
2021 - def __init__( self ):
2022 super(_PositionToken,self).__init__() 2023 self.name=self.__class__.__name__ 2024 self.mayReturnEmpty = True 2025 self.mayIndexError = False
2026
2027 -class GoToColumn(_PositionToken):
2028 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2029 - def __init__( self, colno ):
2030 super(GoToColumn,self).__init__() 2031 self.col = colno
2032
2033 - def preParse( self, instring, loc ):
2034 if col(loc,instring) != self.col: 2035 instrlen = len(instring) 2036 if self.ignoreExprs: 2037 loc = self._skipIgnorables( instring, loc ) 2038 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 2039 loc += 1 2040 return loc
2041
2042 - def parseImpl( self, instring, loc, doActions=True ):
2043 thiscol = col( loc, instring ) 2044 if thiscol > self.col: 2045 raise ParseException( instring, loc, "Text not in expected column", self ) 2046 newloc = loc + self.col - thiscol 2047 ret = instring[ loc: newloc ] 2048 return newloc, ret
2049
2050 -class LineStart(_PositionToken):
2051 """Matches if current position is at the beginning of a line within the parse string"""
2052 - def __init__( self ):
2053 super(LineStart,self).__init__() 2054 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2055 self.errmsg = "Expected start of line"
2056 #self.myException.msg = self.errmsg 2057
2058 - def preParse( self, instring, loc ):
2059 preloc = super(LineStart,self).preParse(instring,loc) 2060 if instring[preloc] == "\n": 2061 loc += 1 2062 return loc
2063
2064 - def parseImpl( self, instring, loc, doActions=True ):
2065 if not( loc==0 or 2066 (loc == self.preParse( instring, 0 )) or 2067 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 2068 #~ raise ParseException( instring, loc, "Expected start of line" ) 2069 exc = self.myException 2070 exc.loc = loc 2071 exc.pstr = instring 2072 raise exc 2073 return loc, []
2074
2075 -class LineEnd(_PositionToken):
2076 """Matches if current position is at the end of a line within the parse string"""
2077 - def __init__( self ):
2078 super(LineEnd,self).__init__() 2079 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2080 self.errmsg = "Expected end of line"
2081 #self.myException.msg = self.errmsg 2082
2083 - def parseImpl( self, instring, loc, doActions=True ):
2084 if loc<len(instring): 2085 if instring[loc] == "\n": 2086 return loc+1, "\n" 2087 else: 2088 #~ raise ParseException( instring, loc, "Expected end of line" ) 2089 exc = self.myException 2090 exc.loc = loc 2091 exc.pstr = instring 2092 raise exc 2093 elif loc == len(instring): 2094 return loc+1, [] 2095 else: 2096 exc = self.myException 2097 exc.loc = loc 2098 exc.pstr = instring 2099 raise exc
2100
2101 -class StringStart(_PositionToken):
2102 """Matches if current position is at the beginning of the parse string"""
2103 - def __init__( self ):
2104 super(StringStart,self).__init__() 2105 self.errmsg = "Expected start of text"
2106 #self.myException.msg = self.errmsg 2107
2108 - def parseImpl( self, instring, loc, doActions=True ):
2109 if loc != 0: 2110 # see if entire string up to here is just whitespace and ignoreables 2111 if loc != self.preParse( instring, 0 ): 2112 #~ raise ParseException( instring, loc, "Expected start of text" ) 2113 exc = self.myException 2114 exc.loc = loc 2115 exc.pstr = instring 2116 raise exc 2117 return loc, []
2118
2119 -class StringEnd(_PositionToken):
2120 """Matches if current position is at the end of the parse string"""
2121 - def __init__( self ):
2122 super(StringEnd,self).__init__() 2123 self.errmsg = "Expected end of text"
2124 #self.myException.msg = self.errmsg 2125
2126 - def parseImpl( self, instring, loc, doActions=True ):
2127 if loc < len(instring): 2128 #~ raise ParseException( instring, loc, "Expected end of text" ) 2129 exc = self.myException 2130 exc.loc = loc 2131 exc.pstr = instring 2132 raise exc 2133 elif loc == len(instring): 2134 return loc+1, [] 2135 elif loc > len(instring): 2136 return loc, [] 2137 else: 2138 exc = self.myException 2139 exc.loc = loc 2140 exc.pstr = instring 2141 raise exc
2142
2143 -class WordStart(_PositionToken):
2144 """Matches if the current position is at the beginning of a Word, and 2145 is not preceded by any character in a given set of wordChars 2146 (default=printables). To emulate the \b behavior of regular expressions, 2147 use WordStart(alphanums). WordStart will also match at the beginning of 2148 the string being parsed, or at the beginning of a line. 2149 """
2150 - def __init__(self, wordChars = printables):
2151 super(WordStart,self).__init__() 2152 self.wordChars = _str2dict(wordChars) 2153 self.errmsg = "Not at the start of a word"
2154
2155 - def parseImpl(self, instring, loc, doActions=True ):
2156 if loc != 0: 2157 if (instring[loc-1] in self.wordChars or 2158 instring[loc] not in self.wordChars): 2159 exc = self.myException 2160 exc.loc = loc 2161 exc.pstr = instring 2162 raise exc 2163 return loc, []
2164
2165 -class WordEnd(_PositionToken):
2166 """Matches if the current position is at the end of a Word, and 2167 is not followed by any character in a given set of wordChars 2168 (default=printables). To emulate the \b behavior of regular expressions, 2169 use WordEnd(alphanums). WordEnd will also match at the end of 2170 the string being parsed, or at the end of a line. 2171 """
2172 - def __init__(self, wordChars = printables):
2173 super(WordEnd,self).__init__() 2174 self.wordChars = _str2dict(wordChars) 2175 self.skipWhitespace = False 2176 self.errmsg = "Not at the end of a word"
2177
2178 - def parseImpl(self, instring, loc, doActions=True ):
2179 instrlen = len(instring) 2180 if instrlen>0 and loc<instrlen: 2181 if (instring[loc] in self.wordChars or 2182 instring[loc-1] not in self.wordChars): 2183 #~ raise ParseException( instring, loc, "Expected end of word" ) 2184 exc = self.myException 2185 exc.loc = loc 2186 exc.pstr = instring 2187 raise exc 2188 return loc, []
2189 2190
2191 -class ParseExpression(ParserElement):
2192 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2193 - def __init__( self, exprs, savelist = False ):
2194 super(ParseExpression,self).__init__(savelist) 2195 if isinstance( exprs, list ): 2196 self.exprs = exprs 2197 elif isinstance( exprs, basestring ): 2198 self.exprs = [ Literal( exprs ) ] 2199 else: 2200 self.exprs = [ exprs ] 2201 self.callPreparse = False
2202
2203 - def __getitem__( self, i ):
2204 return self.exprs[i]
2205
2206 - def append( self, other ):
2207 self.exprs.append( other ) 2208 self.strRepr = None 2209 return self
2210
2211 - def leaveWhitespace( self ):
2212 """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on 2213 all contained expressions.""" 2214 self.skipWhitespace = False 2215 self.exprs = [ e.copy() for e in self.exprs ] 2216 for e in self.exprs: 2217 e.leaveWhitespace() 2218 return self
2219
2220 - def ignore( self, other ):
2221 if isinstance( other, Suppress ): 2222 if other not in self.ignoreExprs: 2223 super( ParseExpression, self).ignore( other ) 2224 for e in self.exprs: 2225 e.ignore( self.ignoreExprs[-1] ) 2226 else: 2227 super( ParseExpression, self).ignore( other ) 2228 for e in self.exprs: 2229 e.ignore( self.ignoreExprs[-1] ) 2230 return self
2231
2232 - def __str__( self ):
2233 try: 2234 return super(ParseExpression,self).__str__() 2235 except: 2236 pass 2237 2238 if self.strRepr is None: 2239 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 2240 return self.strRepr
2241
2242 - def streamline( self ):
2243 super(ParseExpression,self).streamline() 2244 2245 for e in self.exprs: 2246 e.streamline() 2247 2248 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 2249 # but only if there are no parse actions or resultsNames on the nested And's 2250 # (likewise for Or's and MatchFirst's) 2251 if ( len(self.exprs) == 2 ): 2252 other = self.exprs[0] 2253 if ( isinstance( other, self.__class__ ) and 2254 not(other.parseAction) and 2255 other.resultsName is None and 2256 not other.debug ): 2257 self.exprs = other.exprs[:] + [ self.exprs[1] ] 2258 self.strRepr = None 2259 self.mayReturnEmpty |= other.mayReturnEmpty 2260 self.mayIndexError |= other.mayIndexError 2261 2262 other = self.exprs[-1] 2263 if ( isinstance( other, self.__class__ ) and 2264 not(other.parseAction) and 2265 other.resultsName is None and 2266 not other.debug ): 2267 self.exprs = self.exprs[:-1] + other.exprs[:] 2268 self.strRepr = None 2269 self.mayReturnEmpty |= other.mayReturnEmpty 2270 self.mayIndexError |= other.mayIndexError 2271 2272 return self
2273
2274 - def setResultsName( self, name, listAllMatches=False ):
2275 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 2276 return ret
2277
2278 - def validate( self, validateTrace=[] ):
2279 tmp = validateTrace[:]+[self] 2280 for e in self.exprs: 2281 e.validate(tmp) 2282 self.checkRecursion( [] )
2283
2284 -class And(ParseExpression):
2285 """Requires all given ParseExpressions to be found in the given order. 2286 Expressions may be separated by whitespace. 2287 May be constructed using the '+' operator. 2288 """ 2289
2290 - class _ErrorStop(Empty):
2291 - def __init__(self, *args, **kwargs):
2292 super(Empty,self).__init__(*args, **kwargs) 2293 self.leaveWhitespace()
2294
2295 - def __init__( self, exprs, savelist = True ):
2296 super(And,self).__init__(exprs, savelist) 2297 self.mayReturnEmpty = True 2298 for e in self.exprs: 2299 if not e.mayReturnEmpty: 2300 self.mayReturnEmpty = False 2301 break 2302 self.setWhitespaceChars( exprs[0].whiteChars ) 2303 self.skipWhitespace = exprs[0].skipWhitespace 2304 self.callPreparse = True
2305
2306 - def parseImpl( self, instring, loc, doActions=True ):
2307 # pass False as last arg to _parse for first element, since we already 2308 # pre-parsed the string as part of our And pre-parsing 2309 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 2310 errorStop = False 2311 for e in self.exprs[1:]: 2312 if isinstance(e, And._ErrorStop): 2313 errorStop = True 2314 continue 2315 if errorStop: 2316 try: 2317 loc, exprtokens = e._parse( instring, loc, doActions ) 2318 except ParseSyntaxException: 2319 raise 2320 except ParseBaseException, pe: 2321 raise ParseSyntaxException(pe) 2322 except IndexError, ie: 2323 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 2324 else: 2325 loc, exprtokens = e._parse( instring, loc, doActions ) 2326 if exprtokens or exprtokens.keys(): 2327 resultlist += exprtokens 2328 return loc, resultlist
2329
2330 - def __iadd__(self, other ):
2331 if isinstance( other, basestring ): 2332 other = Literal( other ) 2333 return self.append( other ) #And( [ self, other ] )
2334
2335 - def checkRecursion( self, parseElementList ):
2336 subRecCheckList = parseElementList[:] + [ self ] 2337 for e in self.exprs: 2338 e.checkRecursion( subRecCheckList ) 2339 if not e.mayReturnEmpty: 2340 break
2341
2342 - def __str__( self ):
2343 if hasattr(self,"name"): 2344 return self.name 2345 2346 if self.strRepr is None: 2347 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2348 2349 return self.strRepr
2350 2351
2352 -class Or(ParseExpression):
2353 """Requires that at least one ParseExpression is found. 2354 If two expressions match, the expression that matches the longest string will be used. 2355 May be constructed using the '^' operator. 2356 """
2357 - def __init__( self, exprs, savelist = False ):
2358 super(Or,self).__init__(exprs, savelist) 2359 self.mayReturnEmpty = False 2360 for e in self.exprs: 2361 if e.mayReturnEmpty: 2362 self.mayReturnEmpty = True 2363 break
2364
2365 - def parseImpl( self, instring, loc, doActions=True ):
2366 maxExcLoc = -1 2367 maxMatchLoc = -1 2368 maxException = None 2369 for e in self.exprs: 2370 try: 2371 loc2 = e.tryParse( instring, loc ) 2372 except ParseException, err: 2373 if err.loc > maxExcLoc: 2374 maxException = err 2375 maxExcLoc = err.loc 2376 except IndexError: 2377 if len(instring) > maxExcLoc: 2378 maxException = ParseException(instring,len(instring),e.errmsg,self) 2379 maxExcLoc = len(instring) 2380 else: 2381 if loc2 > maxMatchLoc: 2382 maxMatchLoc = loc2 2383 maxMatchExp = e 2384 2385 if maxMatchLoc < 0: 2386 if maxException is not None: 2387 raise maxException 2388 else: 2389 raise ParseException(instring, loc, "no defined alternatives to match", self) 2390 2391 return maxMatchExp._parse( instring, loc, doActions )
2392
2393 - def __ixor__(self, other ):
2394 if isinstance( other, basestring ): 2395 other = Literal( other ) 2396 return self.append( other ) #Or( [ self, other ] )
2397
2398 - def __str__( self ):
2399 if hasattr(self,"name"): 2400 return self.name 2401 2402 if self.strRepr is None: 2403 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2404 2405 return self.strRepr
2406
2407 - def checkRecursion( self, parseElementList ):
2408 subRecCheckList = parseElementList[:] + [ self ] 2409 for e in self.exprs: 2410 e.checkRecursion( subRecCheckList )
2411 2412
2413 -class MatchFirst(ParseExpression):
2414 """Requires that at least one ParseExpression is found. 2415 If two expressions match, the first one listed is the one that will match. 2416 May be constructed using the '|' operator. 2417 """
2418 - def __init__( self, exprs, savelist = False ):
2419 super(MatchFirst,self).__init__(exprs, savelist) 2420 if exprs: 2421 self.mayReturnEmpty = False 2422 for e in self.exprs: 2423 if e.mayReturnEmpty: 2424 self.mayReturnEmpty = True 2425 break 2426 else: 2427 self.mayReturnEmpty = True
2428
2429 - def parseImpl( self, instring, loc, doActions=True ):
2430 maxExcLoc = -1 2431 maxException = None 2432 for e in self.exprs: 2433 try: 2434 ret = e._parse( instring, loc, doActions ) 2435 return ret 2436 except ParseException, err: 2437 if err.loc > maxExcLoc: 2438 maxException = err 2439 maxExcLoc = err.loc 2440 except IndexError: 2441 if len(instring) > maxExcLoc: 2442 maxException = ParseException(instring,len(instring),e.errmsg,self) 2443 maxExcLoc = len(instring) 2444 2445 # only got here if no expression matched, raise exception for match that made it the furthest 2446 else: 2447 if maxException is not None: 2448 raise maxException 2449 else: 2450 raise ParseException(instring, loc, "no defined alternatives to match", self)
2451
2452 - def __ior__(self, other ):
2453 if isinstance( other, basestring ): 2454 other = Literal( other ) 2455 return self.append( other ) #MatchFirst( [ self, other ] )
2456
2457 - def __str__( self ):
2458 if hasattr(self,"name"): 2459 return self.name 2460 2461 if self.strRepr is None: 2462 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2463 2464 return self.strRepr
2465
2466 - def checkRecursion( self, parseElementList ):
2467 subRecCheckList = parseElementList[:] + [ self ] 2468 for e in self.exprs: 2469 e.checkRecursion( subRecCheckList )
2470 2471
2472 -class Each(ParseExpression):
2473 """Requires all given ParseExpressions to be found, but in any order. 2474 Expressions may be separated by whitespace. 2475 May be constructed using the '&' operator. 2476 """
2477 - def __init__( self, exprs, savelist = True ):
2478 super(Each,self).__init__(exprs, savelist) 2479 self.mayReturnEmpty = True 2480 for e in self.exprs: 2481 if not e.mayReturnEmpty: 2482 self.mayReturnEmpty = False 2483 break 2484 self.skipWhitespace = True 2485 self.initExprGroups = True
2486
2487 - def parseImpl( self, instring, loc, doActions=True ):
2488 if self.initExprGroups: 2489 self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 2490 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 2491 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 2492 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 2493 self.required += self.multirequired 2494 self.initExprGroups = False 2495 tmpLoc = loc 2496 tmpReqd = self.required[:] 2497 tmpOpt = self.optionals[:] 2498 matchOrder = [] 2499 2500 keepMatching = True 2501 while keepMatching: 2502 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 2503 failed = [] 2504 for e in tmpExprs: 2505 try: 2506 tmpLoc = e.tryParse( instring, tmpLoc ) 2507 except ParseException: 2508 failed.append(e) 2509 else: 2510 matchOrder.append(e) 2511 if e in tmpReqd: 2512 tmpReqd.remove(e) 2513 elif e in tmpOpt: 2514 tmpOpt.remove(e) 2515 if len(failed) == len(tmpExprs): 2516 keepMatching = False 2517 2518 if tmpReqd: 2519 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) 2520 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 2521 2522 # add any unmatched Optionals, in case they have default values defined 2523 matchOrder += [ e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt ] 2524 2525 resultlist = [] 2526 for e in matchOrder: 2527 loc,results = e._parse(instring,loc,doActions) 2528 resultlist.append(results) 2529 2530 finalResults = ParseResults([]) 2531 for r in resultlist: 2532 dups = {} 2533 for k in r.keys(): 2534 if k in finalResults.keys(): 2535 tmp = ParseResults(finalResults[k]) 2536 tmp += ParseResults(r[k]) 2537 dups[k] = tmp 2538 finalResults += ParseResults(r) 2539 for k,v in dups.items(): 2540 finalResults[k] = v 2541 return loc, finalResults
2542
2543 - def __str__( self ):
2544 if hasattr(self,"name"): 2545 return self.name 2546 2547 if self.strRepr is None: 2548 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2549 2550 return self.strRepr
2551
2552 - def checkRecursion( self, parseElementList ):
2553 subRecCheckList = parseElementList[:] + [ self ] 2554 for e in self.exprs: 2555 e.checkRecursion( subRecCheckList )
2556 2557
2558 -class ParseElementEnhance(ParserElement):
2559 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2560 - def __init__( self, expr, savelist=False ):
2561 super(ParseElementEnhance,self).__init__(savelist) 2562 if isinstance( expr, basestring ): 2563 expr = Literal(expr) 2564 self.expr = expr 2565 self.strRepr = None 2566 if expr is not None: 2567 self.mayIndexError = expr.mayIndexError 2568 self.mayReturnEmpty = expr.mayReturnEmpty 2569 self.setWhitespaceChars( expr.whiteChars ) 2570 self.skipWhitespace = expr.skipWhitespace 2571 self.saveAsList = expr.saveAsList 2572 self.callPreparse = expr.callPreparse 2573 self.ignoreExprs.extend(expr.ignoreExprs)
2574
2575 - def parseImpl( self, instring, loc, doActions=True ):
2576 if self.expr is not None: 2577 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 2578 else: 2579 raise ParseException("",loc,self.errmsg,self)
2580
2581 - def leaveWhitespace( self ):
2582 self.skipWhitespace = False 2583 self.expr = self.expr.copy() 2584 if self.expr is not None: 2585 self.expr.leaveWhitespace() 2586 return self
2587
2588 - def ignore( self, other ):
2589 if isinstance( other, Suppress ): 2590 if other not in self.ignoreExprs: 2591 super( ParseElementEnhance, self).ignore( other ) 2592 if self.expr is not None: 2593 self.expr.ignore( self.ignoreExprs[-1] ) 2594 else: 2595 super( ParseElementEnhance, self).ignore( other ) 2596 if self.expr is not None: 2597 self.expr.ignore( self.ignoreExprs[-1] ) 2598 return self
2599
2600 - def streamline( self ):
2601 super(ParseElementEnhance,self).streamline() 2602 if self.expr is not None: 2603 self.expr.streamline() 2604 return self
2605
2606 - def checkRecursion( self, parseElementList ):
2607 if self in parseElementList: 2608 raise RecursiveGrammarException( parseElementList+[self] ) 2609 subRecCheckList = parseElementList[:] + [ self ] 2610 if self.expr is not None: 2611 self.expr.checkRecursion( subRecCheckList )
2612
2613 - def validate( self, validateTrace=[] ):
2614 tmp = validateTrace[:]+[self] 2615 if self.expr is not None: 2616 self.expr.validate(tmp) 2617 self.checkRecursion( [] )
2618
2619 - def __str__( self ):
2620 try: 2621 return super(ParseElementEnhance,self).__str__() 2622 except: 2623 pass 2624 2625 if self.strRepr is None and self.expr is not None: 2626 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 2627 return self.strRepr
2628 2629
2630 -class FollowedBy(ParseElementEnhance):
2631 """Lookahead matching of the given parse expression. FollowedBy 2632 does *not* advance the parsing position within the input string, it only 2633 verifies that the specified parse expression matches at the current 2634 position. FollowedBy always returns a null token list."""
2635 - def __init__( self, expr ):
2636 super(FollowedBy,self).__init__(expr) 2637 self.mayReturnEmpty = True
2638
2639 - def parseImpl( self, instring, loc, doActions=True ):
2640 self.expr.tryParse( instring, loc ) 2641 return loc, []
2642 2643
2644 -class NotAny(ParseElementEnhance):
2645 """Lookahead to disallow matching with the given parse expression. NotAny 2646 does *not* advance the parsing position within the input string, it only 2647 verifies that the specified parse expression does *not* match at the current 2648 position. Also, NotAny does *not* skip over leading whitespace. NotAny 2649 always returns a null token list. May be constructed using the '~' operator."""
2650 - def __init__( self, expr ):
2651 super(NotAny,self).__init__(expr) 2652 #~ self.leaveWhitespace() 2653 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 2654 self.mayReturnEmpty = True 2655 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2656 #self.myException = ParseException("",0,self.errmsg,self) 2657
2658 - def parseImpl( self, instring, loc, doActions=True ):
2659 try: 2660 self.expr.tryParse( instring, loc ) 2661 except (ParseException,IndexError): 2662 pass 2663 else: 2664 #~ raise ParseException(instring, loc, self.errmsg ) 2665 exc = self.myException 2666 exc.loc = loc 2667 exc.pstr = instring 2668 raise exc 2669 return loc, []
2670
2671 - def __str__( self ):
2672 if hasattr(self,"name"): 2673 return self.name 2674 2675 if self.strRepr is None: 2676 self.strRepr = "~{" + _ustr(self.expr) + "}" 2677 2678 return self.strRepr
2679 2680
2681 -class ZeroOrMore(ParseElementEnhance):
2682 """Optional repetition of zero or more of the given expression."""
2683 - def __init__( self, expr ):
2684 super(ZeroOrMore,self).__init__(expr) 2685 self.mayReturnEmpty = True
2686
2687 - def parseImpl( self, instring, loc, doActions=True ):
2688 tokens = [] 2689 try: 2690 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2691 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2692 while 1: 2693 if hasIgnoreExprs: 2694 preloc = self._skipIgnorables( instring, loc ) 2695 else: 2696 preloc = loc 2697 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2698 if tmptokens or tmptokens.keys(): 2699 tokens += tmptokens 2700 except (ParseException,IndexError): 2701 pass 2702 2703 return loc, tokens
2704
2705 - def __str__( self ):
2706 if hasattr(self,"name"): 2707 return self.name 2708 2709 if self.strRepr is None: 2710 self.strRepr = "[" + _ustr(self.expr) + "]..." 2711 2712 return self.strRepr
2713
2714 - def setResultsName( self, name, listAllMatches=False ):
2715 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) 2716 ret.saveAsList = True 2717 return ret
2718 2719
2720 -class OneOrMore(ParseElementEnhance):
2721 """Repetition of one or more of the given expression."""
2722 - def parseImpl( self, instring, loc, doActions=True ):
2723 # must be at least one 2724 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2725 try: 2726 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2727 while 1: 2728 if hasIgnoreExprs: 2729 preloc = self._skipIgnorables( instring, loc ) 2730 else: 2731 preloc = loc 2732 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2733 if tmptokens or tmptokens.keys(): 2734 tokens += tmptokens 2735 except (ParseException,IndexError): 2736 pass 2737 2738 return loc, tokens
2739
2740 - def __str__( self ):
2741 if hasattr(self,"name"): 2742 return self.name 2743 2744 if self.strRepr is None: 2745 self.strRepr = "{" + _ustr(self.expr) + "}..." 2746 2747 return self.strRepr
2748
2749 - def setResultsName( self, name, listAllMatches=False ):
2750 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 2751 ret.saveAsList = True 2752 return ret
2753
2754 -class _NullToken(object):
2755 - def __bool__(self):
2756 return False
2757 __nonzero__ = __bool__
2758 - def __str__(self):
2759 return ""
2760 2761 _optionalNotMatched = _NullToken()
2762 -class Optional(ParseElementEnhance):
2763 """Optional matching of the given expression. 2764 A default return string can also be specified, if the optional expression 2765 is not found. 2766 """
2767 - def __init__( self, exprs, default=_optionalNotMatched ):
2768 super(Optional,self).__init__( exprs, savelist=False ) 2769 self.defaultValue = default 2770 self.mayReturnEmpty = True
2771
2772 - def parseImpl( self, instring, loc, doActions=True ):
2773 try: 2774 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2775 except (ParseException,IndexError): 2776 if self.defaultValue is not _optionalNotMatched: 2777 if self.expr.resultsName: 2778 tokens = ParseResults([ self.defaultValue ]) 2779 tokens[self.expr.resultsName] = self.defaultValue 2780 else: 2781 tokens = [ self.defaultValue ] 2782 else: 2783 tokens = [] 2784 return loc, tokens
2785
2786 - def __str__( self ):
2787 if hasattr(self,"name"): 2788 return self.name 2789 2790 if self.strRepr is None: 2791 self.strRepr = "[" + _ustr(self.expr) + "]" 2792 2793 return self.strRepr
2794 2795
2796 -class SkipTo(ParseElementEnhance):
2797 """Token for skipping over all undefined text until the matched expression is found. 2798 If include is set to true, the matched expression is also consumed. The ignore 2799 argument is used to define grammars (typically quoted strings and comments) that 2800 might contain false matches. 2801 """
2802 - def __init__( self, other, include=False, ignore=None, failOn=None ):
2803 super( SkipTo, self ).__init__( other ) 2804 if ignore is not None: 2805 self.expr = self.expr.copy() 2806 self.expr.ignore(ignore) 2807 self.mayReturnEmpty = True 2808 self.mayIndexError = False 2809 self.includeMatch = include 2810 self.asList = False 2811 if failOn is not None and isinstance(failOn, basestring): 2812 self.failOn = Literal(failOn) 2813 else: 2814 self.failOn = failOn 2815 self.errmsg = "No match found for "+_ustr(self.expr)
2816 #self.myException = ParseException("",0,self.errmsg,self) 2817
2818 - def parseImpl( self, instring, loc, doActions=True ):
2819 startLoc = loc 2820 instrlen = len(instring) 2821 expr = self.expr 2822 failParse = False 2823 while loc <= instrlen: 2824 try: 2825 if self.failOn: 2826 failParse = True 2827 self.failOn.tryParse(instring, loc) 2828 failParse = False 2829 loc = expr._skipIgnorables( instring, loc ) 2830 expr._parse( instring, loc, doActions=False, callPreParse=False ) 2831 skipText = instring[startLoc:loc] 2832 if self.includeMatch: 2833 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) 2834 if mat: 2835 skipRes = ParseResults( skipText ) 2836 skipRes += mat 2837 return loc, [ skipRes ] 2838 else: 2839 return loc, [ skipText ] 2840 else: 2841 return loc, [ skipText ] 2842 except (ParseException,IndexError): 2843 if failParse: 2844 raise 2845 else: 2846 loc += 1 2847 exc = self.myException 2848 exc.loc = loc 2849 exc.pstr = instring 2850 raise exc
2851
2852 -class Forward(ParseElementEnhance):
2853 """Forward declaration of an expression to be defined later - 2854 used for recursive grammars, such as algebraic infix notation. 2855 When the expression is known, it is assigned to the Forward variable using the '<<' operator. 2856 2857 Note: take care when assigning to Forward not to overlook precedence of operators. 2858 Specifically, '|' has a lower precedence than '<<', so that:: 2859 fwdExpr << a | b | c 2860 will actually be evaluated as:: 2861 (fwdExpr << a) | b | c 2862 thereby leaving b and c out as parseable alternatives. It is recommended that you 2863 explicitly group the values inserted into the Forward:: 2864 fwdExpr << (a | b | c) 2865 """
2866 - def __init__( self, other=None ):
2867 super(Forward,self).__init__( other, savelist=False )
2868
2869 - def __lshift__( self, other ):
2870 if isinstance( other, basestring ): 2871 other = Literal(other) 2872 self.expr = other 2873 self.mayReturnEmpty = other.mayReturnEmpty 2874 self.strRepr = None 2875 self.mayIndexError = self.expr.mayIndexError 2876 self.mayReturnEmpty = self.expr.mayReturnEmpty 2877 self.setWhitespaceChars( self.expr.whiteChars ) 2878 self.skipWhitespace = self.expr.skipWhitespace 2879 self.saveAsList = self.expr.saveAsList 2880 self.ignoreExprs.extend(self.expr.ignoreExprs) 2881 return None
2882
2883 - def leaveWhitespace( self ):
2884 self.skipWhitespace = False 2885 return self
2886
2887 - def streamline( self ):
2888 if not self.streamlined: 2889 self.streamlined = True 2890 if self.expr is not None: 2891 self.expr.streamline() 2892 return self
2893
2894 - def validate( self, validateTrace=[] ):
2895 if self not in validateTrace: 2896 tmp = validateTrace[:]+[self] 2897 if self.expr is not None: 2898 self.expr.validate(tmp) 2899 self.checkRecursion([])
2900
2901 - def __str__( self ):
2902 if hasattr(self,"name"): 2903 return self.name 2904 2905 self._revertClass = self.__class__ 2906 self.__class__ = _ForwardNoRecurse 2907 try: 2908 if self.expr is not None: 2909 retString = _ustr(self.expr) 2910 else: 2911 retString = "None" 2912 finally: 2913 self.__class__ = self._revertClass 2914 return self.__class__.__name__ + ": " + retString
2915
2916 - def copy(self):
2917 if self.expr is not None: 2918 return super(Forward,self).copy() 2919 else: 2920 ret = Forward() 2921 ret << self 2922 return ret
2923
2924 -class _ForwardNoRecurse(Forward):
2925 - def __str__( self ):
2926 return "..."
2927
2928 -class TokenConverter(ParseElementEnhance):
2929 """Abstract subclass of ParseExpression, for converting parsed results."""
2930 - def __init__( self, expr, savelist=False ):
2931 super(TokenConverter,self).__init__( expr )#, savelist ) 2932 self.saveAsList = False
2933
2934 -class Upcase(TokenConverter):
2935 """Converter to upper case all matching tokens."""
2936 - def __init__(self, *args):
2937 super(Upcase,self).__init__(*args) 2938 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 2939 DeprecationWarning,stacklevel=2)
2940
2941 - def postParse( self, instring, loc, tokenlist ):
2942 return list(map( string.upper, tokenlist ))
2943 2944
2945 -class Combine(TokenConverter):
2946 """Converter to concatenate all matching tokens to a single string. 2947 By default, the matching patterns must also be contiguous in the input string; 2948 this can be disabled by specifying 'adjacent=False' in the constructor. 2949 """
2950 - def __init__( self, expr, joinString="", adjacent=True ):
2951 super(Combine,self).__init__( expr ) 2952 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 2953 if adjacent: 2954 self.leaveWhitespace() 2955 self.adjacent = adjacent 2956 self.skipWhitespace = True 2957 self.joinString = joinString
2958
2959 - def ignore( self, other ):
2960 if self.adjacent: 2961 ParserElement.ignore(self, other) 2962 else: 2963 super( Combine, self).ignore( other ) 2964 return self
2965
2966 - def postParse( self, instring, loc, tokenlist ):
2967 retToks = tokenlist.copy() 2968 del retToks[:] 2969 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 2970 2971 if self.resultsName and len(retToks.keys())>0: 2972 return [ retToks ] 2973 else: 2974 return retToks
2975
2976 -class Group(TokenConverter):
2977 """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
2978 - def __init__( self, expr ):
2979 super(Group,self).__init__( expr ) 2980 self.saveAsList = True
2981
2982 - def postParse( self, instring, loc, tokenlist ):
2983 return [ tokenlist ]
2984
2985 -class Dict(TokenConverter):
2986 """Converter to return a repetitive expression as a list, but also as a dictionary. 2987 Each element can also be referenced using the first token in the expression as its key. 2988 Useful for tabular report scraping when the first column can be used as a item key. 2989 """
2990 - def __init__( self, exprs ):
2991 super(Dict,self).__init__( exprs ) 2992 self.saveAsList = True
2993
2994 - def postParse( self, instring, loc, tokenlist ):
2995 for i,tok in enumerate(tokenlist): 2996 if len(tok) == 0: 2997 continue 2998 ikey = tok[0] 2999 if isinstance(ikey,int): 3000 ikey = _ustr(tok[0]).strip() 3001 if len(tok)==1: 3002 tokenlist[ikey] = _ParseResultsWithOffset("",i) 3003 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 3004 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 3005 else: 3006 dictvalue = tok.copy() #ParseResults(i) 3007 del dictvalue[0] 3008 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()): 3009 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 3010 else: 3011 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 3012 3013 if self.resultsName: 3014 return [ tokenlist ] 3015 else: 3016 return tokenlist
3017 3018
3019 -class Suppress(TokenConverter):
3020 """Converter for ignoring the results of a parsed expression."""
3021 - def postParse( self, instring, loc, tokenlist ):
3022 return []
3023
3024 - def suppress( self ):
3025 return self
3026 3027
3028 -class OnlyOnce(object):
3029 """Wrapper for parse actions, to ensure they are only called once."""
3030 - def __init__(self, methodCall):
3031 self.callable = ParserElement._normalizeParseActionArgs(methodCall) 3032 self.called = False
3033 - def __call__(self,s,l,t):
3034 if not self.called: 3035 results = self.callable(s,l,t) 3036 self.called = True 3037 return results 3038 raise ParseException(s,l,"")
3039 - def reset(self):
3040 self.called = False
3041
3042 -def traceParseAction(f):
3043 """Decorator for debugging parse actions.""" 3044 f = ParserElement._normalizeParseActionArgs(f) 3045 def z(*paArgs): 3046 thisFunc = f.func_name 3047 s,l,t = paArgs[-3:] 3048 if len(paArgs)>3: 3049 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 3050 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 3051 try: 3052 ret = f(*paArgs) 3053 except Exception, exc: 3054 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 3055 raise 3056 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 3057 return ret
3058 try: 3059 z.__name__ = f.__name__ 3060 except AttributeError: 3061 pass 3062 return z 3063 3064 # 3065 # global helpers 3066 #
3067 -def delimitedList( expr, delim=",", combine=False ):
3068 """Helper to define a delimited list of expressions - the delimiter defaults to ','. 3069 By default, the list elements and delimiters can have intervening whitespace, and 3070 comments, but this can be overridden by passing 'combine=True' in the constructor. 3071 If combine is set to True, the matching tokens are returned as a single token 3072 string, with the delimiters included; otherwise, the matching tokens are returned 3073 as a list of tokens, with the delimiters suppressed. 3074 """ 3075 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 3076 if combine: 3077 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 3078 else: 3079 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3080
3081 -def countedArray( expr ):
3082 """Helper to define a counted list of expressions. 3083 This helper defines a pattern of the form:: 3084 integer expr expr expr... 3085 where the leading integer tells how many expr expressions follow. 3086 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 3087 """ 3088 arrayExpr = Forward() 3089 def countFieldParseAction(s,l,t): 3090 n = int(t[0]) 3091 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 3092 return []
3093 return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr ) 3094
3095 -def _flatten(L):
3096 if type(L) is not list: return [L] 3097 if L == []: return L 3098 return _flatten(L[0]) + _flatten(L[1:])
3099
3100 -def matchPreviousLiteral(expr):
3101 """Helper to define an expression that is indirectly defined from 3102 the tokens matched in a previous expression, that is, it looks 3103 for a 'repeat' of a previous expression. For example:: 3104 first = Word(nums) 3105 second = matchPreviousLiteral(first) 3106 matchExpr = first + ":" + second 3107 will match "1:1", but not "1:2". Because this matches a 3108 previous literal, will also match the leading "1:1" in "1:10". 3109 If this is not desired, use matchPreviousExpr. 3110 Do *not* use with packrat parsing enabled. 3111 """ 3112 rep = Forward() 3113 def copyTokenToRepeater(s,l,t): 3114 if t: 3115 if len(t) == 1: 3116 rep << t[0] 3117 else: 3118 # flatten t tokens 3119 tflat = _flatten(t.asList()) 3120 rep << And( [ Literal(tt) for tt in tflat ] ) 3121 else: 3122 rep << Empty()
3123 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3124 return rep 3125
3126 -def matchPreviousExpr(expr):
3127 """Helper to define an expression that is indirectly defined from 3128 the tokens matched in a previous expression, that is, it looks 3129 for a 'repeat' of a previous expression. For example:: 3130 first = Word(nums) 3131 second = matchPreviousExpr(first) 3132 matchExpr = first + ":" + second 3133 will match "1:1", but not "1:2". Because this matches by 3134 expressions, will *not* match the leading "1:1" in "1:10"; 3135 the expressions are evaluated first, and then compared, so 3136 "1" is compared with "10". 3137 Do *not* use with packrat parsing enabled. 3138 """ 3139 rep = Forward() 3140 e2 = expr.copy() 3141 rep << e2 3142 def copyTokenToRepeater(s,l,t): 3143 matchTokens = _flatten(t.asList()) 3144 def mustMatchTheseTokens(s,l,t): 3145 theseTokens = _flatten(t.asList()) 3146 if theseTokens != matchTokens: 3147 raise ParseException("",0,"")
3148 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 3149 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3150 return rep 3151
3152 -def _escapeRegexRangeChars(s):
3153 #~ escape these chars: ^-] 3154 for c in r"\^-]": 3155 s = s.replace(c,_bslash+c) 3156 s = s.replace("\n",r"\n") 3157 s = s.replace("\t",r"\t") 3158 return _ustr(s)
3159
3160 -def oneOf( strs, caseless=False, useRegex=True ):
3161 """Helper to quickly define a set of alternative Literals, and makes sure to do 3162 longest-first testing when there is a conflict, regardless of the input order, 3163 but returns a MatchFirst for best performance. 3164 3165 Parameters: 3166 - strs - a string of space-delimited literals, or a list of string literals 3167 - caseless - (default=False) - treat all literals as caseless 3168 - useRegex - (default=True) - as an optimization, will generate a Regex 3169 object; otherwise, will generate a MatchFirst object (if caseless=True, or 3170 if creating a Regex raises an exception) 3171 """ 3172 if caseless: 3173 isequal = ( lambda a,b: a.upper() == b.upper() ) 3174 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 3175 parseElementClass = CaselessLiteral 3176 else: 3177 isequal = ( lambda a,b: a == b ) 3178 masks = ( lambda a,b: b.startswith(a) ) 3179 parseElementClass = Literal 3180 3181 if isinstance(strs,(list,tuple)): 3182 symbols = strs[:] 3183 elif isinstance(strs,basestring): 3184 symbols = strs.split() 3185 else: 3186 warnings.warn("Invalid argument to oneOf, expected string or list", 3187 SyntaxWarning, stacklevel=2) 3188 3189 i = 0 3190 while i < len(symbols)-1: 3191 cur = symbols[i] 3192 for j,other in enumerate(symbols[i+1:]): 3193 if ( isequal(other, cur) ): 3194 del symbols[i+j+1] 3195 break 3196 elif ( masks(cur, other) ): 3197 del symbols[i+j+1] 3198 symbols.insert(i,other) 3199 cur = other 3200 break 3201 else: 3202 i += 1 3203 3204 if not caseless and useRegex: 3205 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 3206 try: 3207 if len(symbols)==len("".join(symbols)): 3208 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) ) 3209 else: 3210 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) ) 3211 except: 3212 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 3213 SyntaxWarning, stacklevel=2) 3214 3215 3216 # last resort, just use MatchFirst 3217 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3218
3219 -def dictOf( key, value ):
3220 """Helper to easily and clearly define a dictionary by specifying the respective patterns 3221 for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens 3222 in the proper order. The key pattern can include delimiting markers or punctuation, 3223 as long as they are suppressed, thereby leaving the significant key text. The value 3224 pattern can include named results, so that the Dict results can include named token 3225 fields. 3226 """ 3227 return Dict( ZeroOrMore( Group ( key + value ) ) )
3228
3229 -def originalTextFor(expr, asString=True):
3230 """Helper to return the original, untokenized text for a given expression. Useful to 3231 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 3232 revert separate tokens with intervening whitespace back to the original matching 3233 input text. Simpler to use than the parse action keepOriginalText, and does not 3234 require the inspect module to chase up the call stack. By default, returns a 3235 string containing the original parsed text. 3236 3237 If the optional asString argument is passed as False, then the return value is a 3238 ParseResults containing any results names that were originally matched, and a 3239 single token containing the original matched text from the input string. So if 3240 the expression passed to originalTextFor contains expressions with defined 3241 results names, you must set asString to False if you want to preserve those 3242 results name values.""" 3243 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 3244 matchExpr = locMarker("_original_start") + expr + locMarker("_original_end") 3245 if asString: 3246 extractText = lambda s,l,t: s[t._original_start:t._original_end] 3247 else: 3248 def extractText(s,l,t): 3249 del t[:] 3250 t.insert(0, s[t._original_start:t._original_end]) 3251 del t["_original_start"] 3252 del t["_original_end"]
3253 matchExpr.setParseAction(extractText) 3254 return matchExpr 3255 3256 # convenience constants for positional expressions 3257 empty = Empty().setName("empty") 3258 lineStart = LineStart().setName("lineStart") 3259 lineEnd = LineEnd().setName("lineEnd") 3260 stringStart = StringStart().setName("stringStart") 3261 stringEnd = StringEnd().setName("stringEnd") 3262 3263 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 3264 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ]) 3265 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16))) 3266 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8))) 3267 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) 3268 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 3269 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" 3270 3271 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) 3272
3273 -def srange(s):
3274 r"""Helper to easily define string ranges for use in Word construction. Borrows 3275 syntax from regexp '[]' string range definitions:: 3276 srange("[0-9]") -> "0123456789" 3277 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 3278 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 3279 The input string must be enclosed in []'s, and the returned string is the expanded 3280 character set joined into a single string. 3281 The values enclosed in the []'s may be:: 3282 a single character 3283 an escaped character with a leading backslash (such as \- or \]) 3284 an escaped hex character with a leading '\0x' (\0x21, which is a '!' character) 3285 an escaped octal character with a leading '\0' (\041, which is a '!' character) 3286 a range of any of the above, separated by a dash ('a-z', etc.) 3287 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 3288 """ 3289 try: 3290 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) 3291 except: 3292 return ""
3293
3294 -def matchOnlyAtCol(n):
3295 """Helper method for defining parse actions that require matching at a specific 3296 column in the input text. 3297 """ 3298 def verifyCol(strg,locn,toks): 3299 if col(locn,strg) != n: 3300 raise ParseException(strg,locn,"matched token not at column %d" % n)
3301 return verifyCol 3302
3303 -def replaceWith(replStr):
3304 """Helper method for common parse actions that simply return a literal value. Especially 3305 useful when used with transformString(). 3306 """ 3307 def _replFunc(*args): 3308 return [replStr]
3309 return _replFunc 3310
3311 -def removeQuotes(s,l,t):
3312 """Helper parse action for removing quotation marks from parsed quoted strings. 3313 To use, add this parse action to quoted string using:: 3314 quotedString.setParseAction( removeQuotes ) 3315 """ 3316 return t[0][1:-1]
3317
3318 -def upcaseTokens(s,l,t):
3319 """Helper parse action to convert tokens to upper case.""" 3320 return [ tt.upper() for tt in map(_ustr,t) ]
3321
3322 -def downcaseTokens(s,l,t):
3323 """Helper parse action to convert tokens to lower case.""" 3324 return [ tt.lower() for tt in map(_ustr,t) ]
3325
3326 -def keepOriginalText(s,startLoc,t):
3327 """Helper parse action to preserve original parsed text, 3328 overriding any nested parse actions.""" 3329 try: 3330 endloc = getTokensEndLoc() 3331 except ParseException: 3332 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") 3333 del t[:] 3334 t += ParseResults(s[startLoc:endloc]) 3335 return t
3336
3337 -def getTokensEndLoc():
3338 """Method to be called from within a parse action to determine the end 3339 location of the parsed tokens.""" 3340 import inspect 3341 fstack = inspect.stack() 3342 try: 3343 # search up the stack (through intervening argument normalizers) for correct calling routine 3344 for f in fstack[2:]: 3345 if f[3] == "_parseNoCache": 3346 endloc = f[0].f_locals["loc"] 3347 return endloc 3348 else: 3349 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") 3350 finally: 3351 del fstack
3352
3353 -def _makeTags(tagStr, xml):
3354 """Internal helper to construct opening and closing tag expressions, given a tag name""" 3355 if isinstance(tagStr,basestring): 3356 resname = tagStr 3357 tagStr = Keyword(tagStr, caseless=not xml) 3358 else: 3359 resname = tagStr.name 3360 3361 tagAttrName = Word(alphas,alphanums+"_-:") 3362 if (xml): 3363 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 3364 openTag = Suppress("<") + tagStr + \ 3365 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 3366 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3367 else: 3368 printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] ) 3369 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 3370 openTag = Suppress("<") + tagStr + \ 3371 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 3372 Optional( Suppress("=") + tagAttrValue ) ))) + \ 3373 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3374 closeTag = Combine(_L("</") + tagStr + ">") 3375 3376 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) 3377 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr) 3378 3379 return openTag, closeTag
3380
3381 -def makeHTMLTags(tagStr):
3382 """Helper to construct opening and closing tag expressions for HTML, given a tag name""" 3383 return _makeTags( tagStr, False )
3384
3385 -def makeXMLTags(tagStr):
3386 """Helper to construct opening and closing tag expressions for XML, given a tag name""" 3387 return _makeTags( tagStr, True )
3388
3389 -def withAttribute(*args,**attrDict):
3390 """Helper to create a validating parse action to be used with start tags created 3391 with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag 3392 with a required attribute value, to avoid false matches on common tags such as 3393 <TD> or <DIV>. 3394 3395 Call withAttribute with a series of attribute names and values. Specify the list 3396 of filter attributes names and values as: 3397 - keyword arguments, as in (class="Customer",align="right"), or 3398 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 3399 For attribute names with a namespace prefix, you must use the second form. Attribute 3400 names are matched insensitive to upper/lower case. 3401 3402 To verify that the attribute exists, but without specifying a value, pass 3403 withAttribute.ANY_VALUE as the value. 3404 """ 3405 if args: 3406 attrs = args[:] 3407 else: 3408 attrs = attrDict.items() 3409 attrs = [(k,v) for k,v in attrs] 3410 def pa(s,l,tokens): 3411 for attrName,attrValue in attrs: 3412 if attrName not in tokens: 3413 raise ParseException(s,l,"no matching attribute " + attrName) 3414 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 3415 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 3416 (attrName, tokens[attrName], attrValue))
3417 return pa 3418 withAttribute.ANY_VALUE = object() 3419 3420 opAssoc = _Constants() 3421 opAssoc.LEFT = object() 3422 opAssoc.RIGHT = object() 3423
3424 -def operatorPrecedence( baseExpr, opList ):
3425 """Helper method for constructing grammars of expressions made up of 3426 operators working in a precedence hierarchy. Operators may be unary or 3427 binary, left- or right-associative. Parse actions can also be attached 3428 to operator expressions. 3429 3430 Parameters: 3431 - baseExpr - expression representing the most basic element for the nested 3432 - opList - list of tuples, one for each operator precedence level in the 3433 expression grammar; each tuple is of the form 3434 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 3435 - opExpr is the pyparsing expression for the operator; 3436 may also be a string, which will be converted to a Literal; 3437 if numTerms is 3, opExpr is a tuple of two expressions, for the 3438 two operators separating the 3 terms 3439 - numTerms is the number of terms for this operator (must 3440 be 1, 2, or 3) 3441 - rightLeftAssoc is the indicator whether the operator is 3442 right or left associative, using the pyparsing-defined 3443 constants opAssoc.RIGHT and opAssoc.LEFT. 3444 - parseAction is the parse action to be associated with 3445 expressions matching this operator expression (the 3446 parse action tuple member may be omitted) 3447 """ 3448 ret = Forward() 3449 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) 3450 for i,operDef in enumerate(opList): 3451 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 3452 if arity == 3: 3453 if opExpr is None or len(opExpr) != 2: 3454 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 3455 opExpr1, opExpr2 = opExpr 3456 thisExpr = Forward()#.setName("expr%d" % i) 3457 if rightLeftAssoc == opAssoc.LEFT: 3458 if arity == 1: 3459 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 3460 elif arity == 2: 3461 if opExpr is not None: 3462 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 3463 else: 3464 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 3465 elif arity == 3: 3466 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 3467 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 3468 else: 3469 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3470 elif rightLeftAssoc == opAssoc.RIGHT: 3471 if arity == 1: 3472 # try to avoid LR with this extra test 3473 if not isinstance(opExpr, Optional): 3474 opExpr = Optional(opExpr) 3475 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 3476 elif arity == 2: 3477 if opExpr is not None: 3478 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 3479 else: 3480 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 3481 elif arity == 3: 3482 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 3483 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 3484 else: 3485 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3486 else: 3487 raise ValueError("operator must indicate right or left associativity") 3488 if pa: 3489 matchExpr.setParseAction( pa ) 3490 thisExpr << ( matchExpr | lastExpr ) 3491 lastExpr = thisExpr 3492 ret << lastExpr 3493 return ret
3494 3495 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") 3496 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") 3497 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") 3498 unicodeString = Combine(_L('u') + quotedString.copy()) 3499
3500 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
3501 """Helper method for defining nested lists enclosed in opening and closing 3502 delimiters ("(" and ")" are the default). 3503 3504 Parameters: 3505 - opener - opening character for a nested list (default="("); can also be a pyparsing expression 3506 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 3507 - content - expression for items within the nested lists (default=None) 3508 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 3509 3510 If an expression is not provided for the content argument, the nested 3511 expression will capture all whitespace-delimited content between delimiters 3512 as a list of separate values. 3513 3514 Use the ignoreExpr argument to define expressions that may contain 3515 opening or closing characters that should not be treated as opening 3516 or closing characters for nesting, such as quotedString or a comment 3517 expression. Specify multiple expressions using an Or or MatchFirst. 3518 The default is quotedString, but if no expressions are to be ignored, 3519 then pass None for this argument. 3520 """ 3521 if opener == closer: 3522 raise ValueError("opening and closing strings cannot be the same") 3523 if content is None: 3524 if isinstance(opener,basestring) and isinstance(closer,basestring): 3525 if len(opener) == 1 and len(closer)==1: 3526 if ignoreExpr is not None: 3527 content = (Combine(OneOrMore(~ignoreExpr + 3528 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3529 ).setParseAction(lambda t:t[0].strip())) 3530 else: 3531 content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 3532 ).setParseAction(lambda t:t[0].strip())) 3533 else: 3534 if ignoreExpr is not None: 3535 content = (Combine(OneOrMore(~ignoreExpr + 3536 ~Literal(opener) + ~Literal(closer) + 3537 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3538 ).setParseAction(lambda t:t[0].strip())) 3539 else: 3540 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 3541 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3542 ).setParseAction(lambda t:t[0].strip())) 3543 else: 3544 raise ValueError("opening and closing arguments must be strings if no content expression is given") 3545 ret = Forward() 3546 if ignoreExpr is not None: 3547 ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 3548 else: 3549 ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 3550 return ret
3551
3552 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3553 """Helper method for defining space-delimited indentation blocks, such as 3554 those used to define block statements in Python source code. 3555 3556 Parameters: 3557 - blockStatementExpr - expression defining syntax of statement that 3558 is repeated within the indented block 3559 - indentStack - list created by caller to manage indentation stack 3560 (multiple statementWithIndentedBlock expressions within a single grammar 3561 should share a common indentStack) 3562 - indent - boolean indicating whether block must be indented beyond the 3563 the current level; set to False for block of left-most statements 3564 (default=True) 3565 3566 A valid block must contain at least one blockStatement. 3567 """ 3568 def checkPeerIndent(s,l,t): 3569 if l >= len(s): return 3570 curCol = col(l,s) 3571 if curCol != indentStack[-1]: 3572 if curCol > indentStack[-1]: 3573 raise ParseFatalException(s,l,"illegal nesting") 3574 raise ParseException(s,l,"not a peer entry")
3575 3576 def checkSubIndent(s,l,t): 3577 curCol = col(l,s) 3578 if curCol > indentStack[-1]: 3579 indentStack.append( curCol ) 3580 else: 3581 raise ParseException(s,l,"not a subentry") 3582 3583 def checkUnindent(s,l,t): 3584 if l >= len(s): return 3585 curCol = col(l,s) 3586 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 3587 raise ParseException(s,l,"not an unindent") 3588 indentStack.pop() 3589 3590 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 3591 INDENT = Empty() + Empty().setParseAction(checkSubIndent) 3592 PEER = Empty().setParseAction(checkPeerIndent) 3593 UNDENT = Empty().setParseAction(checkUnindent) 3594 if indent: 3595 smExpr = Group( Optional(NL) + 3596 FollowedBy(blockStatementExpr) + 3597 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 3598 else: 3599 smExpr = Group( Optional(NL) + 3600 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 3601 blockStatementExpr.ignore(_bslash + LineEnd()) 3602 return smExpr 3603 3604 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 3605 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 3606 3607 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) 3608 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() 3609 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) 3610 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None 3611 3612 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 3613 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") 3614 3615 htmlComment = Regex(r"<!--[\s\S]*?-->") 3616 restOfLine = Regex(r".*").leaveWhitespace() 3617 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") 3618 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") 3619 3620 javaStyleComment = cppStyleComment 3621 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 3622 _noncomma = "".join( [ c for c in printables if c != "," ] ) 3623 _commasepitem = Combine(OneOrMore(Word(_noncomma) + 3624 Optional( Word(" \t") + 3625 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 3626 commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList") 3627 3628 3629 if __name__ == "__main__": 3630
3631 - def test( teststring ):
3632 try: 3633 tokens = simpleSQL.parseString( teststring ) 3634 tokenlist = tokens.asList() 3635 print (teststring + "->" + str(tokenlist)) 3636 print ("tokens = " + str(tokens)) 3637 print ("tokens.columns = " + str(tokens.columns)) 3638 print ("tokens.tables = " + str(tokens.tables)) 3639 print (tokens.asXML("SQL",True)) 3640 except ParseBaseException,err: 3641 print (teststring + "->") 3642 print (err.line) 3643 print (" "*(err.column-1) + "^") 3644 print (err) 3645 print()
3646 3647 selectToken = CaselessLiteral( "select" ) 3648 fromToken = CaselessLiteral( "from" ) 3649 3650 ident = Word( alphas, alphanums + "_$" ) 3651 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3652 columnNameList = Group( delimitedList( columnName ) )#.setName("columns") 3653 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3654 tableNameList = Group( delimitedList( tableName ) )#.setName("tables") 3655 simpleSQL = ( selectToken + \ 3656 ( '*' | columnNameList ).setResultsName( "columns" ) + \ 3657 fromToken + \ 3658 tableNameList.setResultsName( "tables" ) ) 3659 3660 test( "SELECT * from XYZZY, ABC" ) 3661 test( "select * from SYS.XYZZY" ) 3662 test( "Select A from Sys.dual" ) 3663 test( "Select AA,BB,CC from Sys.dual" ) 3664 test( "Select A, B, C from Sys.dual" ) 3665 test( "Select A, B, C from Sys.dual" ) 3666 test( "Xelect A, B, C from Sys.dual" ) 3667 test( "Select A, B, C frox Sys.dual" ) 3668 test( "Select" ) 3669 test( "Select ^^^ frox Sys.dual" ) 3670 test( "Select A, B, C from Sys.dual, Table2 " ) 3671