View Javadoc

1   package org.codehaus.groovy.syntax.lexer;
2   
3   //{{{ imports
4   import org.codehaus.groovy.syntax.ReadException;
5   import org.codehaus.groovy.syntax.Token;
6   import org.codehaus.groovy.GroovyBugError;
7   //}}}
8   
9   /***
10   *  A Lexer for processing standard strings.
11   *
12   *  @author Chris Poirier
13   */
14  
15  public class StringLexer extends TextLexerBase
16  {
17  
18      protected String  delimiter = null;
19      protected char    watchFor;
20      protected boolean allowGStrings = false;
21      protected boolean emptyString   = true;   // If set, we need to send an empty string
22  
23  
24     /***
25      *  If set true, the filter will allow // and \$ to pass through unchanged.
26      *  You should set this appropriately BEFORE setting source!
27      */
28  
29      public void allowGStrings( boolean allow )
30      {
31          allowGStrings = allow;
32      }
33  
34  
35  
36     /***
37      *  Returns a single STRING, then null.   The STRING is all of the processed
38      *  input.  Backslashes are stripped, with the \r, \n, and \t converted
39      *  appropriately.
40      */
41  
42      public Token undelegatedNextToken( ) throws ReadException, LexerException
43      {
44          if( emptyString )
45          {
46              emptyString = false;
47              return Token.newString( "", getStartLine(), getStartColumn() );
48          }
49          else if( finished )
50          {
51              return null;
52          }
53          else
54          {
55              StringBuffer string = new StringBuffer();
56  
57              while( la(1) != CharStream.EOS )
58              {
59                  string.append( consume() );
60              }
61              
62              if( la(1) == CharStream.EOS && string.length() == 0 )
63              {
64                  finished = true;
65              }
66  
67              return Token.newString( string.toString(), getStartLine(), getStartColumn() );
68          }
69      }
70  
71  
72  
73     /***
74      *  Controls delimiter search.  When turned on, the first thing we do
75      *  is check for and eat our delimiter.
76      */
77  
78      public void delimit( boolean delimit )
79      {
80          super.delimit( delimit );
81  
82          if( delimit )
83          {
84              try
85              {
86                  if( !finished && la(1) == CharStream.EOS )
87                  {
88                      finishUp();
89  
90                      //
91                      // The GStringLexer will correctly handle the empty string.
92                      // We don't.  In order to ensure that an empty string is
93                      // supplied, we set a flag that is checked during
94                      // undelegatedNextToken().
95  
96                      if( !allowGStrings )
97                      {
98                          emptyString = true;
99                      }
100                 }
101             }
102             catch( Exception e )
103             {
104                 finished = true;
105             }
106         }
107     }
108 
109 
110 
111 
112    /***
113     *  Sets the source lexer and identifies and consumes the opening delimiter.
114     */
115 
116     public void setSource( Lexer source )
117     {
118         super.setSource( source );
119 
120         emptyString = false;
121 
122         try
123         {
124             char c = source.la();
125             switch( c )
126             {
127                 case '\'':
128                 case '"':
129                     mark();
130                     source.consume();
131 
132                     if( source.la() == c && source.la(2) == c )
133                     {
134                         source.consume(); source.consume();
135                         delimiter = new StringBuffer().append(c).append(c).append(c).toString();
136                     }
137                     else
138                     {
139                         delimiter = new StringBuffer().append(c).toString();
140                     }
141 
142                     watchFor = delimiter.charAt(0);
143                     break;
144 
145 
146                 default:
147                 {
148                     throw new GroovyBugError( "at the time of StringLexer.setSource(), the source must be on a single or double quote" );
149                 }
150             }
151 
152             restart();
153             delimit( true );
154         }
155         catch( Exception e )
156         {
157             //
158             // If we couldn't read our delimiter, we'll just
159             // cancel our source.  nextToken() will return null.
160 
161             e.printStackTrace();
162             unsetSource( );
163         }
164     }
165 
166 
167 
168    /***
169     *  Unsets our source.
170     */
171 
172     public void unsetSource()
173     {
174         super.unsetSource();
175         delimiter   = null;
176         finished    = true;
177         emptyString = false;
178     }
179 
180 
181 
182 
183   //---------------------------------------------------------------------------
184   // STREAM ROUTINES
185 
186     private int    lookahead  = 0;             // the number of characters identified
187     private char[] characters = new char[3];   // the next characters identified by la()
188     private int[]  widths     = new int[3];    // the source widths of the next characters
189 
190 
191 
192    /***
193     *  Returns the next <code>k</code>th character, without consuming any.
194     */
195 
196     public char la(int k) throws LexerException, ReadException
197     {
198 
199         if( !finished && source != null )
200         {
201 
202             if( delimited )
203             {
204 
205                 if( k > characters.length )
206                 {
207                     throw new GroovyBugError( "StringLexer lookahead tolerance exceeded" );
208                 }
209 
210                 if( lookahead >= k )
211                 {
212                     return characters[k-1];
213                 }
214 
215                 lookahead = 0;
216 
217                 char c = ' ', c1 = ' ', c2 = ' ';
218                 int offset = 1, width = 0;
219                 for( int i = 1; i <= k; i++ )
220                 {
221                     c1 = source.la(offset);
222                     C1_SWITCH: switch( c1 )
223                     {
224                         case CharStream.EOS:
225                         {
226                             return c1;
227                         }
228 
229                         case '//':
230                         {
231                             c2 = source.la( offset + 1 );
232 
233                             ESCAPE_SWITCH: switch( c2 )
234                             {
235 
236                                 case CharStream.EOS:
237                                     return c2;
238 
239                                 case '//':
240                                     c = '//';
241                                     width = 2;
242                                     break ESCAPE_SWITCH;
243                                 case '$':
244                                 {
245                                     if( allowGStrings )
246                                     {
247                                         c = c1;
248                                         width = 1;
249                                     }
250                                     else
251                                     {
252                                         c = c2;
253                                         width = 2;
254                                     }
255                                     break ESCAPE_SWITCH;
256                                 }
257 
258                                 case 'r':
259                                     c = '\r';
260                                     width = 2;
261                                     break ESCAPE_SWITCH;
262 
263                                 case 't':
264                                     c = '\t';
265                                     width = 2;
266                                     break ESCAPE_SWITCH;
267 
268                                 case 'n':
269                                     c = '\n';
270                                     width = 2;
271                                     break ESCAPE_SWITCH;
272 
273 
274                                 default:
275                                     c = c2;
276                                     width = 2;
277                                     break ESCAPE_SWITCH;
278                             }
279                             break C1_SWITCH;
280                         }
281 
282                         default:
283                         {
284                             if( c1 == watchFor )
285                             {
286                                 boolean atEnd = true;
287                                 for( int j = 1; j < delimiter.length(); j++ )
288                                 {
289                                     if( source.la(offset+j) != delimiter.charAt(j) )
290                                     {
291                                         atEnd = false;
292                                         break;
293                                     }
294                                 }
295 
296                                 if( atEnd )
297                                 {
298                                     return CharStream.EOS;
299                                 }
300                             }
301 
302                             c = c1;
303                             width = 1;
304                             break C1_SWITCH;
305                         }
306                     }
307 
308 
309                     characters[lookahead] = c;
310                     widths[lookahead]     = width;
311 
312                     offset += width;
313                     lookahead += 1;
314                 }
315 
316                 return c;                                         // <<< FLOW CONTROL <<<<<<<<<
317             }
318 
319             lookahead = 0;
320             return source.la(k);
321         }
322 
323         return CharStream.EOS;
324 
325     }
326 
327 
328 
329    /***
330     *  Eats a character from the input stream.  Searches for the delimiter if
331     *  delimited.  Note that turning delimiting on also checks if we are at the
332     *  delimiter, so if we aren't finished, there is something to consume.
333     */
334 
335     public char consume() throws LexerException, ReadException
336     {
337         if( !finished && source != null )
338         {
339             char c = CharStream.EOS;
340 
341             if( delimited )
342             {
343                 if( lookahead < 1 )
344                 {
345                     la( 1 );
346                 }
347 
348                 if( lookahead >= 1 )
349                 {
350                     c = characters[0];
351                     for( int i = 0; i < widths[0]; i++ )
352                     {
353                         source.consume();
354                     }
355 
356                     lookahead = 0;
357                 }
358 
359                 if( la(1) == CharStream.EOS )
360                 {
361                     finishUp();
362                 }
363             }
364             else
365             {
366                 c = source.consume();
367             }
368 
369             lookahead = 0;
370             return c;
371         }
372 
373         return CharStream.EOS;
374     }
375 
376 
377 
378    /***
379     *  Eats our delimiter from the stream and marks us finished.
380     */
381 
382     protected void finishUp() throws LexerException, ReadException
383     {
384         for( int i = 0; i < delimiter.length(); i++ )
385         {
386             char c = source.la(1);
387             if( c == CharStream.EOS )
388             {
389                 throw new UnterminatedStringLiteralException(getStartLine(), getStartColumn());
390             }
391             else if( c == delimiter.charAt(i) )
392             {
393                 source.consume();
394             }
395             else
396             {
397                 throw new GroovyBugError( "la() said delimiter [" + delimiter + "], finishUp() found [" + c + "]" );
398             }
399         }
400 
401         finish();
402     }
403 
404 }