View Javadoc

1   package org.codehaus.groovy.syntax.lexer;
2   
3   import org.codehaus.groovy.syntax.ReadException;
4   import org.codehaus.groovy.GroovyBugError;
5   
6   
7   /***
8    *  Identifies and returns tokens from a source text.  <code>nextToken()</code>
9    *  is the primary entry point.  This is the primary lexer for the Groovy language.
10   *  It can delegate operations, but will not accept being delegated to.
11   *
12   *  @author Bob Mcwhirter
13   *  @author James Strachan
14   *  @author John Wilson
15   *  @author Chris Poirier
16   */
17  
18  public class GroovyLexer extends GroovyLexerBase
19  {
20  
21      private CharStream charStream;           // the source of data for the lexer
22      protected int line;                      // the current line in the source
23      protected int column;                    // the current column in the source
24  
25  
26  
27     /***
28      *  Initializes the <code>Lexer</code> from an opened <code>CharStream</code>.
29      */
30  
31      public GroovyLexer(CharStream charStream)
32      {
33          this.charStream = charStream;
34          this.line = 1;
35          this.column = 1;
36      }
37  
38  
39     /***
40      *  Returns the underlying <code>CharStream</code>.
41      */
42  
43      public CharStream getCharStream()
44      {
45          return this.charStream;
46      }
47  
48  
49  
50     /***
51      *  Refuses to set a source.
52      */
53  
54      public void setSource( Lexer source )
55      {
56          throw new GroovyBugError( "you can't set a source on the GroovyLexer" );
57      }
58  
59  
60  
61     /***
62      *  Similarly refuses to clear a source.
63      */
64  
65      public void unsetSource()
66      {
67          throw new GroovyBugError( "you can't unset a source on the GroovyLexer" );
68      }
69  
70  
71  
72  
73    //---------------------------------------------------------------------------
74    // STREAM PROCESSING
75  
76      private final char[] buf       = new char[5];          // ??
77      private final int[]  charWidth = new int[buf.length];  // ??
78  
79      private int cur = 0;                                   // ??
80      private int charsInBuffer = 0;                         // ??
81      private boolean eosRead = false;                       // ??
82      private boolean escapeLookahead = false;               // ??
83      private char escapeLookaheadChar;                      // ??
84  
85      private boolean boundary = false;                      // set true when the lexer is on a line boundary
86  
87  
88  
89     /***
90      *  Returns the current line number.
91      */
92  
93      public int getLine()
94      {
95          return line;
96      }
97  
98  
99  
100    /***
101     *  Returns the current column within that line.
102     */
103 
104     public int getColumn()
105     {
106         return column;
107     }
108 
109 
110 
111    /***
112     *  Returns the next <code>k</code>th character, without consuming any.
113     */
114 
115     public char la(int k) throws LexerException, ReadException
116     {
117         if (k > this.charsInBuffer)
118         {
119             if( k > this.buf.length )
120             {
121                 throw new GroovyBugError( "Could not look ahead for character: " + k + " due to buffer exhaustion" );
122             }
123 
124             for (int i = 0; i != this.charsInBuffer; i++, this.cur++)
125             {
126                this.buf[i] = this.buf[this.cur];
127                this.charWidth[i] = this.charWidth[this.cur];
128             }
129 
130             fillBuffer();
131         }
132 
133         return this.buf[this.cur + k - 1];
134     }
135 
136 
137 
138    /***
139     *  Eats a character from the input stream.  We don't
140     *  support sources here, as we own the CharStream on which
141     *  we are working.
142     */
143 
144     public char consume() throws LexerException, ReadException
145     {
146         if (this.charsInBuffer == 0)
147         {
148             fillBuffer();
149         }
150 
151 
152         //
153         // Consume the next character
154 
155         this.charsInBuffer--;
156 
157         int  width   = this.charWidth[this.cur];
158         char c       = this.buf[this.cur++];
159         this.column += width;
160 
161 
162         //
163         // Mark line boundaries as necessary.  Only relevant
164         // non-manufactured tokens need apply.
165 
166         if( boundary || (c == '\n' && width == 1) )
167         {
168             boundary = false;
169             line++;
170             column = 1;
171         }
172         else if( c == '\r' && width == 1 )
173         {
174             if( la(1) != '\n' )
175             {
176                 line++;
177                 column = 1;
178             }
179             else /* it is '\n' and */ if( this.charWidth[this.cur] == 1 )
180             {
181                 boundary = true;
182             }
183         }
184 
185 
186         return c;
187     }
188 
189 
190 
191    /***
192     *  Fills the lookahead buffer from the stream.
193     */
194 
195     private void fillBuffer() throws ReadException, LexerException
196     {
197         this.cur = 0;
198 
199         do
200         {
201             if( this.eosRead )
202             {
203                 this.buf[this.charsInBuffer] = CharStream.EOS;
204             }
205             else
206             {
207                 char c = this.escapeLookahead ? this.escapeLookaheadChar : charStream.consume();
208 
209                 this.escapeLookahead = false;
210                 this.charWidth[this.charsInBuffer] = 1;
211 
212                 if(c == CharStream.EOS)
213                 {
214                     this.eosRead = true;
215                 }
216 
217                 if( c == '//' )
218                 {
219                     c = charStream.consume();
220 
221                     if( c == 'u' )
222                     {
223                         do
224                         {
225                             this.charWidth[this.charsInBuffer]++;
226                             c = charStream.consume();
227                         }
228                         while (c == 'u'); // the spec allows any number of u characters after the \	
229 
230                         try
231                         {
232                             c =
233                                 (char) Integer.parseInt(
234                                     new String(
235                                         new char[] {
236                                             c,
237                                             charStream.consume(),
238                                             charStream.consume(),
239                                             charStream.consume()    }),
240                                     16);
241                             this.charWidth[this.charsInBuffer] += 4;
242                         }
243                         catch (NumberFormatException e)
244                         {
245                             throw new UnexpectedCharacterException(
246                                 getStartLine(),
247                                 getStartColumn() + 1,
248                                 c,
249                                 new char[] {
250                             });
251                         }
252                     }
253                     else
254                     {
255                         this.escapeLookahead = true;
256                         this.escapeLookaheadChar = c;
257                         c = '//';
258                     }
259                 }
260 
261                 this.buf[this.charsInBuffer] = c;
262             }
263         }
264         while (++this.charsInBuffer != this.buf.length);
265     }
266 
267 }