1 package org.codehaus.groovy.syntax.lexer;
2
3
4 import org.codehaus.groovy.syntax.ReadException;
5 import org.codehaus.groovy.syntax.Token;
6 import org.codehaus.groovy.GroovyBugError;
7
8
9 /***
10 * A Lexer for processing standard strings.
11 *
12 * @author Chris Poirier
13 */
14
15 public class StringLexer extends TextLexerBase
16 {
17
18 protected String delimiter = null;
19 protected char watchFor;
20 protected boolean allowGStrings = false;
21 protected boolean emptyString = true;
22
23
24 /***
25 * If set true, the filter will allow // and \$ to pass through unchanged.
26 * You should set this appropriately BEFORE setting source!
27 */
28
29 public void allowGStrings( boolean allow )
30 {
31 allowGStrings = allow;
32 }
33
34
35
36 /***
37 * Returns a single STRING, then null. The STRING is all of the processed
38 * input. Backslashes are stripped, with the \r, \n, and \t converted
39 * appropriately.
40 */
41
42 public Token undelegatedNextToken( ) throws ReadException, LexerException
43 {
44 if( emptyString )
45 {
46 emptyString = false;
47 return Token.newString( "", getStartLine(), getStartColumn() );
48 }
49 else if( finished )
50 {
51 return null;
52 }
53 else
54 {
55 StringBuffer string = new StringBuffer();
56
57 while( la(1) != CharStream.EOS )
58 {
59 string.append( consume() );
60 }
61
62 if( la(1) == CharStream.EOS && string.length() == 0 )
63 {
64 finished = true;
65 }
66
67 return Token.newString( string.toString(), getStartLine(), getStartColumn() );
68 }
69 }
70
71
72
73 /***
74 * Controls delimiter search. When turned on, the first thing we do
75 * is check for and eat our delimiter.
76 */
77
78 public void delimit( boolean delimit )
79 {
80 super.delimit( delimit );
81
82 if( delimit )
83 {
84 try
85 {
86 if( !finished && la(1) == CharStream.EOS )
87 {
88 finishUp();
89
90
91
92
93
94
95
96 if( !allowGStrings )
97 {
98 emptyString = true;
99 }
100 }
101 }
102 catch( Exception e )
103 {
104 finished = true;
105 }
106 }
107 }
108
109
110
111
112 /***
113 * Sets the source lexer and identifies and consumes the opening delimiter.
114 */
115
116 public void setSource( Lexer source )
117 {
118 super.setSource( source );
119
120 emptyString = false;
121
122 try
123 {
124 char c = source.la();
125 switch( c )
126 {
127 case '\'':
128 case '"':
129 mark();
130 source.consume();
131
132 if( source.la() == c && source.la(2) == c )
133 {
134 source.consume(); source.consume();
135 delimiter = new StringBuffer().append(c).append(c).append(c).toString();
136 }
137 else
138 {
139 delimiter = new StringBuffer().append(c).toString();
140 }
141
142 watchFor = delimiter.charAt(0);
143 break;
144
145
146 default:
147 {
148 throw new GroovyBugError( "at the time of StringLexer.setSource(), the source must be on a single or double quote" );
149 }
150 }
151
152 restart();
153 delimit( true );
154 }
155 catch( Exception e )
156 {
157
158
159
160
161 e.printStackTrace();
162 unsetSource( );
163 }
164 }
165
166
167
168 /***
169 * Unsets our source.
170 */
171
172 public void unsetSource()
173 {
174 super.unsetSource();
175 delimiter = null;
176 finished = true;
177 emptyString = false;
178 }
179
180
181
182
183
184
185
186 private int lookahead = 0;
187 private char[] characters = new char[3];
188 private int[] widths = new int[3];
189
190
191
192 /***
193 * Returns the next <code>k</code>th character, without consuming any.
194 */
195
196 public char la(int k) throws LexerException, ReadException
197 {
198
199 if( !finished && source != null )
200 {
201
202 if( delimited )
203 {
204
205 if( k > characters.length )
206 {
207 throw new GroovyBugError( "StringLexer lookahead tolerance exceeded" );
208 }
209
210 if( lookahead >= k )
211 {
212 return characters[k-1];
213 }
214
215 lookahead = 0;
216
217 char c = ' ', c1 = ' ', c2 = ' ';
218 int offset = 1, width = 0;
219 for( int i = 1; i <= k; i++ )
220 {
221 c1 = source.la(offset);
222 C1_SWITCH: switch( c1 )
223 {
224 case CharStream.EOS:
225 {
226 return c1;
227 }
228
229 case '//':
230 {
231 c2 = source.la( offset + 1 );
232
233 ESCAPE_SWITCH: switch( c2 )
234 {
235
236 case CharStream.EOS:
237 return c2;
238
239 case '//':
240 c = '//';
241 width = 2;
242 break ESCAPE_SWITCH;
243 case '$':
244 {
245 if( allowGStrings )
246 {
247 c = c1;
248 width = 1;
249 }
250 else
251 {
252 c = c2;
253 width = 2;
254 }
255 break ESCAPE_SWITCH;
256 }
257
258 case 'r':
259 c = '\r';
260 width = 2;
261 break ESCAPE_SWITCH;
262
263 case 't':
264 c = '\t';
265 width = 2;
266 break ESCAPE_SWITCH;
267
268 case 'n':
269 c = '\n';
270 width = 2;
271 break ESCAPE_SWITCH;
272
273
274 default:
275 c = c2;
276 width = 2;
277 break ESCAPE_SWITCH;
278 }
279 break C1_SWITCH;
280 }
281
282 default:
283 {
284 if( c1 == watchFor )
285 {
286 boolean atEnd = true;
287 for( int j = 1; j < delimiter.length(); j++ )
288 {
289 if( source.la(offset+j) != delimiter.charAt(j) )
290 {
291 atEnd = false;
292 break;
293 }
294 }
295
296 if( atEnd )
297 {
298 return CharStream.EOS;
299 }
300 }
301
302 c = c1;
303 width = 1;
304 break C1_SWITCH;
305 }
306 }
307
308
309 characters[lookahead] = c;
310 widths[lookahead] = width;
311
312 offset += width;
313 lookahead += 1;
314 }
315
316 return c;
317 }
318
319 lookahead = 0;
320 return source.la(k);
321 }
322
323 return CharStream.EOS;
324
325 }
326
327
328
329 /***
330 * Eats a character from the input stream. Searches for the delimiter if
331 * delimited. Note that turning delimiting on also checks if we are at the
332 * delimiter, so if we aren't finished, there is something to consume.
333 */
334
335 public char consume() throws LexerException, ReadException
336 {
337 if( !finished && source != null )
338 {
339 char c = CharStream.EOS;
340
341 if( delimited )
342 {
343 if( lookahead < 1 )
344 {
345 la( 1 );
346 }
347
348 if( lookahead >= 1 )
349 {
350 c = characters[0];
351 for( int i = 0; i < widths[0]; i++ )
352 {
353 source.consume();
354 }
355
356 lookahead = 0;
357 }
358
359 if( la(1) == CharStream.EOS )
360 {
361 finishUp();
362 }
363 }
364 else
365 {
366 c = source.consume();
367 }
368
369 lookahead = 0;
370 return c;
371 }
372
373 return CharStream.EOS;
374 }
375
376
377
378 /***
379 * Eats our delimiter from the stream and marks us finished.
380 */
381
382 protected void finishUp() throws LexerException, ReadException
383 {
384 for( int i = 0; i < delimiter.length(); i++ )
385 {
386 char c = source.la(1);
387 if( c == CharStream.EOS )
388 {
389 throw new UnterminatedStringLiteralException(getStartLine(), getStartColumn());
390 }
391 else if( c == delimiter.charAt(i) )
392 {
393 source.consume();
394 }
395 else
396 {
397 throw new GroovyBugError( "la() said delimiter [" + delimiter + "], finishUp() found [" + c + "]" );
398 }
399 }
400
401 finish();
402 }
403
404 }