View Javadoc

1   package org.codehaus.groovy.syntax.lexer;
2   
3   import org.codehaus.groovy.syntax.ReadException;
4   import org.codehaus.groovy.syntax.Types;
5   import org.codehaus.groovy.syntax.Token;
6   import org.codehaus.groovy.GroovyBugError;
7   
8   
9   /***
10   *  A base class for all other lexers.
11   *
12   *  @author Bob Mcwhirter
13   *  @author James Strachan
14   *  @author John Wilson
15   *  @author Chris Poirier
16   */
17  
18  public class LexerBase implements Lexer
19  {
20  
21      protected int startLine;                 // the start line of the current token
22      protected int startColumn;               // the start column of the current token
23  
24      protected Lexer delegate = null;         // another lexer currently satisfying our requests
25      protected Lexer source   = null;         // a lexer we are obtaining data from
26  
27  
28  
29     /***
30      *  Initializes the <code>LexerBase</code>.
31      */
32  
33      public LexerBase( )
34      {
35      }
36  
37  
38  
39     /***
40      *  Gets the lexer that is actually doing the <code>nextToken()</code>
41      *  work, if it isn't us.
42      */
43  
44      public Lexer getDelegate()
45      {
46          return delegate;
47      }
48  
49  
50  
51     /***
52      *  Gets the lexer from which this lexer is obtaining characters.
53      */
54  
55      public Lexer getSource()
56      {
57          return source;
58      }
59  
60  
61  
62     /***
63      *  Finds and returns (consuming) the next token from the underlying stream.
64      *  Returns null when out of tokens.  This implementation correctly handles
65      *  delegation, and subclasses implement undelegatedNextToken(), which will
66      *  be called by this routine when appropriate.
67      */
68  
69      public Token nextToken() throws ReadException, LexerException
70      {
71          //
72          // If we are delegated, use it until it returns null.
73  
74          if( delegate != null )
75          {
76              Token next = delegate.nextToken();
77  
78              if( next == null )
79              {
80                  undelegate();
81              }
82              else
83              {
84                  return next;
85              }
86  
87          }
88  
89          mark();
90          return undelegatedNextToken();
91      }
92  
93  
94  
95     /***
96      *  Does undelegated nextToken() operations.  You supply your
97      *  lexer-specific nextToken() code by overriding this method.
98      */
99  
100     protected Token undelegatedNextToken() throws ReadException, LexerException
101     {
102         return null;
103     }
104 
105 
106 
107 
108   //---------------------------------------------------------------------------
109   // SPECIAL HANDLERS
110 
111 
112    /***
113     *  Process an end-of-line marker and returns a NEWLINE token.
114     *  Returns null if not at an end-of-line.
115     */
116 
117     protected Token tokenizeEOL() throws LexerException, ReadException
118     {
119         Token token = null;
120 
121         char c = la();
122         switch( c )
123         {
124             case '\r':
125             case '\n':
126                 token = symbol( Types.NEWLINE );
127 
128                 consume();
129                 if (c == '\r' && la() == '\n')
130                 {
131                     consume();
132                 }
133         }
134 
135         return token;
136     }
137 
138 
139 
140    /***
141     *  Reads an end-of-line marker and writes the text into the
142     *  specified buffer, if supplied.
143     */
144 
145     protected boolean readEOL( StringBuffer destination ) throws LexerException, ReadException
146     {
147         boolean read = false;
148 
149         char c = la();
150         switch( c )
151         {
152             case '\r':
153             case '\n':
154                 if( destination == null )
155                 {
156                     consume();
157                     if (c == '\r' && la() == '\n')
158                     {
159                         consume();
160                     }
161                 }
162                 else
163                 {
164                     destination.append( consume() );
165                     if (c == '\r' && la() == '\n')
166                     {
167                         destination.append( consume() );
168                     }
169                 }
170 
171                 read = true;
172         }
173 
174         return read;
175     }
176 
177 
178 
179    /***
180     *  Synonym for <code>readEOL(null)</code>.
181     */
182 
183     protected void readEOL() throws LexerException, ReadException
184     {
185         readEOL( null );
186     }
187 
188 
189 
190 
191 
192   //---------------------------------------------------------------------------
193   // DELEGATION
194 
195 
196    /***
197     *  Resets a lexer for reuse.
198     */
199 
200     public void reset()
201     {
202         delegate = null;
203         source   = null;
204     }
205 
206 
207 
208    /***
209     *  Delegates our duties to another Lexer.
210     */
211 
212     public void delegate( Lexer to )
213     {
214         this.delegate = to;
215         to.setSource( this );
216     }
217 
218 
219 
220    /***
221     *  Retakes responsibility for our duties.
222     */
223 
224     public void undelegate()
225     {
226         if( delegate != null )
227         {
228             delegate.unsetSource( );
229             delegate = null;
230         }
231     }
232 
233 
234 
235    /***
236     *  Sets the source lexer.
237     */
238 
239     public void setSource( Lexer source )
240     {
241         if( source == null )
242         {
243             throw new GroovyBugError( "use unsetSource() to remove a source from a lexer" );
244         }
245         this.source = source;
246     }
247 
248 
249 
250    /***
251     *  Unsets the source lexer.
252     */
253 
254     public void unsetSource()
255     {
256         this.source = null;
257     }
258 
259 
260 
261    /***
262     *  Returns true if we are delegated to another lexer.
263     */
264 
265     public boolean isDelegated()
266     {
267         return delegate != null;
268     }
269 
270 
271 
272    /***
273     *  Returns true if we are obtaining our characters
274     *  from another lexer.
275     */
276 
277     public boolean isExternallySourced()
278     {
279         return source != null;
280     }
281 
282 
283 
284 
285   //---------------------------------------------------------------------------
286   // ERROR HANDLING
287 
288 
289    /***
290     *  Creates and throws a new <code>UnexpectedCharacterException</code>.
291     */
292 
293     protected void unexpected( char c, int offset, String message ) throws UnexpectedCharacterException
294     {
295         throw new UnexpectedCharacterException( getStartLine(), getStartColumn() + offset, c, message );
296     }
297 
298 
299 
300    /***
301     *  Creates and throws a new <code>UnexpectedCharacterException</code>.
302     */
303 
304     protected void unexpected( char c, char[] expected, int offset ) throws UnexpectedCharacterException
305     {
306         throw new UnexpectedCharacterException( getStartLine(), getStartColumn() + offset, c, expected );
307     }
308 
309 
310 
311    /***
312     *  Synonym for <code>unexpected( c, null, offset )</code>.
313     */
314 
315     protected void unexpected( char c, int offset ) throws UnexpectedCharacterException
316     {
317         unexpected( c, null, offset );
318     }
319 
320 
321 
322 
323   //---------------------------------------------------------------------------
324   // SUPPORT ROUTINES
325 
326 
327    /***
328     *  Creates a new symbol token, and allows you to alter the starting
329     *  column.
330     */
331 
332     protected Token symbol( int type, int columnOffset )
333     {
334         return Token.newSymbol( type, getStartLine(), getStartColumn() - columnOffset );
335     }
336 
337 
338 
339    /***
340     *  Creates a new symbol token.
341     */
342 
343     protected Token symbol( int type )
344     {
345         return Token.newSymbol( type, getStartLine(), getStartColumn() );
346     }
347 
348 
349 
350 
351   //---------------------------------------------------------------------------
352   // STREAM ROUTINES
353 
354 
355    /***
356     *  Returns the current line number.
357     */
358 
359     public int getLine()
360     {
361         if( source != null )
362         {
363             return source.getLine();
364         }
365 
366         return -1;
367     }
368 
369 
370 
371    /***
372     *  Returns the current column within that line.
373     */
374 
375     public int getColumn()
376     {
377         if( source != null )
378         {
379             return source.getColumn();
380         }
381 
382         return -1;
383     }
384 
385 
386 
387    /***
388     *  Saves information about the current position, for tracking token extents.
389     */
390 
391     protected void mark()
392     {
393         startLine   = getLine();
394         startColumn = getColumn();
395     }
396 
397 
398 
399    /***
400     *  Returns the starting line of the current token.
401     */
402 
403     protected int getStartLine()
404     {
405         return this.startLine;
406     }
407 
408 
409 
410    /***
411     *  Returns the starting column of the current token.
412     */
413 
414     protected int getStartColumn()
415     {
416         return this.startColumn;
417     }
418 
419 
420 
421    /***
422     *  Returns the next character, without consuming it.
423     */
424 
425     public char la() throws LexerException, ReadException
426     {
427         return la(1);
428     }
429 
430 
431 
432    /***
433     *  Returns the next <code>k</code>th character, without consuming any.
434     */
435 
436     public char la(int k) throws LexerException, ReadException
437     {
438         if( source != null )
439         {
440             return source.la(k);
441         }
442         else
443         {
444             return CharStream.EOS;
445         }
446     }
447 
448 
449 
450    /***
451     *  Eats a character from the input stream.
452     */
453 
454     public char consume() throws LexerException, ReadException
455     {
456         if( source != null )
457         {
458             return source.consume();
459         }
460         else
461         {
462             return CharStream.EOS;
463         }
464     }
465 
466 
467 }