Package libxyz :: Package parser :: Module block
[hide private]
[frames] | no frames]

Source Code for Module libxyz.parser.block

  1  #-*- coding: utf8 -* 
  2  # 
  3  # Max E. Kuznecov ~syhpoon <syhpoon@syhpoon.name> 2008 
  4  # 
  5  # This file is part of XYZCommander. 
  6  # XYZCommander is free software: you can redistribute it and/or modify 
  7  # it under the terms of the GNU Lesser Public License as published by 
  8  # the Free Software Foundation, either version 3 of the License, or 
  9  # (at your option) any later version. 
 10  # XYZCommander is distributed in the hope that it will be useful, 
 11  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 12  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 
 13  # GNU Lesser Public License for more details. 
 14  # You should have received a copy of the GNU Lesser Public License 
 15  # along with XYZCommander. If not, see <http://www.gnu.org/licenses/>. 
 16   
 17  import re 
 18   
 19  from libxyz.parser import BaseParser 
 20  from libxyz.parser import Lexer 
 21  from libxyz.parser import ParsedData 
 22  from libxyz.exceptions import XYZValueError 
 23  from libxyz.exceptions import LexerError 
 24   
 25  import libxyz 
 26   
27 -class BlockParser(BaseParser):
28 """ 29 BaseParser is used to parse block structures. 30 Format: 31 32 name { 33 var1 <assign> val1 <delimiter> 34 var2 <assign> val2 [<list_separator>val3...] <delimiter> 35 ... 36 } 37 """ 38 39 STATE_INIT = 0 40 STATE_BLOCK_OPEN = 1 41 STATE_VARIABLE = 2 42 STATE_ASSIGN = 3 43 STATE_VALUE = 4 44 STATE_DELIM = 5 45 STATE_LIST_VALUE = 6 46 47 DEFAULT_OPT = { 48 u"comment": u"#", 49 u"varre": re.compile(r"^[\w-]+$"), 50 u"assignchar": u"=", 51 u"delimiter": u"\n", 52 u"validvars": (), 53 u"value_validator": None, 54 u"count": 0, 55 u"list_separator": u",", 56 u"macrochar": u"&", 57 u"var_transform": None, 58 } 59
60 - def __init__(self, opt=None):
61 """ 62 @param opt: Parser options. 63 @type opt: dict 64 65 Available options: 66 - comment: Comment character. 67 Everything else ignored until EOL. 68 Type: I{string (single char)} 69 Default: # 70 - delimiter: Character to use as delimiter between statements. 71 Type: I{string (single char)} 72 Default: \\n 73 - varre: Valid variable name regular expression. 74 Type: I{Compiled re object (L{re.compile})} 75 Default: ^[\w-]+$ 76 - assignchar: Variable-value split character. 77 Type: I{string (single char)} 78 Default: = 79 - validvars: List of variables valid within block. 80 Type: I{sequence} 81 Default: () 82 - value_validator: Value validator 83 Type: A function that takes three args: 84 current block name, var and value and validates them. 85 In case value is invalid, XYZValueError must be raised. 86 Otherwise function must return required value, possibly modified. 87 Default: None 88 - count: How many blocks to parse. If count <= 0 - will parse 89 all available. 90 Type: integer 91 Default: 0 92 - list_separator: Character to separate elements in list 93 Type: I{string (single char)} 94 Default: , 95 - macrochar: Macro character (None to disable macros) 96 Type: I{string (single char)} 97 Default: & 98 - var_transform: A function which is called with variable name 99 as single argument, and which returns new variable object 100 or raises XYZValueError 101 """ 102 103 super(BlockParser, self).__init__() 104 105 if opt and not isinstance(opt, dict): 106 raise XYZValueError(_(u"Invalid opt type: %s. "\ 107 u"Dictionary expected.") % type(opt)) 108 109 self.opt = opt or self.DEFAULT_OPT 110 self.set_opt(self.DEFAULT_OPT, self.opt) 111 112 self._default_data = None 113 self._state = self.STATE_INIT 114 self._parsed_obj = None 115 self._varname = None 116 self._macroname = None 117 self._sdata = None 118 self._result = {} 119 self._current_list = [] 120 self._macros = {} 121 self._lexer = None 122 self._openblock = u"{" 123 self._closeblock = u"}" 124 self._parsed = 0 125 126 self._tok_type = None 127 128 self._parse_table = { 129 self.STATE_INIT: self._process_state_init, 130 self.STATE_BLOCK_OPEN: self._process_state_block_open, 131 self.STATE_VARIABLE: self._process_state_variable, 132 self.STATE_ASSIGN: self._process_state_assign, 133 self.STATE_VALUE: self._process_state_value, 134 self.STATE_LIST_VALUE: self._process_state_list_value, 135 self.STATE_DELIM: self._process_state_delim, 136 }
137 138 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 139
140 - def parse(self, source, default_data=None):
141 """ 142 Parse blocks of text and return a dict of L{ParsedData} objects 143 or raise L{libxyz.exceptions.ParseError} exception 144 145 @param source: Source data 146 @param default_data: Dictionary containing L{libxyz.parser.ParsedData} 147 objects with default values. 148 """ 149 150 self._cleanup() 151 152 self._result = default_data or {} 153 self._parsed = 0 154 self._default_data = default_data 155 156 _tokens = (self._openblock, 157 self._closeblock, 158 self.assignchar, 159 self.delimiter, 160 self.list_separator, 161 ) 162 163 self._lexer = Lexer(source, _tokens, self.comment, self.macrochar) 164 self._sdata = self._lexer.sdata 165 166 try: 167 while True: 168 _res = self._lexer.lexer() 169 170 if _res is None: 171 break 172 else: 173 self._tok_type, _val = _res 174 175 # We're only interested in LF in DELIM or LIST_VALUE 176 # states 177 if _val == "\n" and \ 178 self._state not in (self.STATE_DELIM, self.STATE_LIST_VALUE): 179 continue 180 else: 181 self._parse_table[self._state](_val) 182 except LexerError, e: 183 self.error(str(e)) 184 185 self._check_complete() 186 187 return self._result
188 189 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 190
191 - def _process_state_init(self, word):
192 if self._default_data and word in self._default_data: 193 self._parsed_obj = self._default_data[word] 194 else: 195 self._parsed_obj = ParsedData(word) 196 197 self._state = self.STATE_BLOCK_OPEN
198 199 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 200
201 - def _process_state_block_open(self, word):
202 if word != self._openblock: 203 self.error(msg=(word, self._openblock), 204 etype=self.error_unexpected) 205 else: 206 self._state = self.STATE_VARIABLE
207 208 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 209
210 - def _process_state_variable(self, word):
211 if word == self._closeblock: 212 self._complete_block() 213 return 214 215 if self._tok_type == self._lexer.TOKEN_MACRO: 216 self._macroname = word 217 else: 218 if self.validvars and word not in self.validvars: 219 self.error(_(u"Unknown variable %s") % word) 220 elif self.varre.match(word) is None: 221 self.error(_(u"Invalid variable name: %s") % word) 222 223 if callable(self.var_transform): 224 try: 225 self._varname = self.var_transform(word) 226 except XYZValueError, e: 227 self.error(_(u"Variable transformation error: %s") % 228 unicode(e)) 229 else: 230 self._varname = word 231 232 self._state = self.STATE_ASSIGN
233 234 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 235
236 - def _process_state_assign(self, word):
237 if word != self.assignchar: 238 self.error(msg=(word, self.assignchar), 239 etype=self.error_unexpected) 240 else: 241 self._state = self.STATE_VALUE 242 self._lexer.escaping_on()
243 244 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 245
246 - def _process_state_list_value(self, word):
247 if word == self.list_separator: 248 self._state = self.STATE_VALUE 249 return 250 251 if len(self._current_list) == 1: 252 _value = self._current_list[0] 253 else: 254 _value = tuple(self._current_list) 255 256 # Macro 257 if self._macroname: 258 self._macros[self._macroname] = _value 259 self._macroname = None 260 # Variable 261 else: 262 if self.value_validator: 263 try: 264 _value = self.value_validator(self._parsed_obj.name, 265 self._varname, _value) 266 except XYZValueError, e: 267 self.error(_(u"Invalid value: %s") % str(e)) 268 269 self._parsed_obj[self._varname] = _value 270 self._varname = None 271 272 self._current_list = [] 273 self._lexer.escaping_off() 274 self._state = self.STATE_DELIM 275 self._lexer.unget(word)
276 277 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 278
279 - def _process_state_value(self, word):
280 if self._tok_type == self._lexer.TOKEN_MACRO: 281 try: 282 self._current_list.append(self._macros[word]) 283 except KeyError: 284 self.error(_(u"Undefined macro %s") % word) 285 else: 286 self._current_list.append(word) 287 288 self._state = self.STATE_LIST_VALUE
289 290 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 291
292 - def _process_state_delim(self, word):
293 if word == self._closeblock: 294 self._complete_block() 295 return 296 if word != self.delimiter: 297 self.error(msg=(word, self.delimiter), 298 etype=self.error_unexpected) 299 else: 300 self._state = self.STATE_VARIABLE
301 302 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 303
304 - def _complete_block(self):
305 if self._parsed_obj: 306 self._result[self._parsed_obj.name] = self._parsed_obj 307 self._parsed += 1 308 309 self._cleanup() 310 311 if self.count > 0 and self.count == self._parsed: 312 self._lexer.done()
313 314 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 315
316 - def _cleanup(self):
317 """ 318 Set all neccessary variables to initial state 319 """ 320 321 self._parsed_obj = None 322 self._varname = None 323 self._macroname = None 324 self._state = self.STATE_INIT 325 self._in_comment = False 326 self._in_quote = False 327 self._current_list = [] 328 self._macros = {} 329 self._default_data = None
330 331 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 332
333 - def _check_complete(self):
334 """ 335 Check state after source reaches EOF for consistency 336 """ 337 338 _err = False 339 _msg = None 340 341 if self._in_quote: 342 _err, _msg = True, _(u"Unterminated quote") 343 344 if self._state != self.STATE_INIT: 345 if self._state != self.STATE_BLOCK_OPEN: 346 _err, _msg = True, _(u"Unclosed block") 347 else: 348 _err, _msg = True, None 349 350 if self._lexer.get_idt(): 351 _err, _msg = True, None 352 353 if _err: 354 self.error(_msg)
355