klee
Lexer.cpp
Go to the documentation of this file.
1//===-- Lexer.cpp ---------------------------------------------------------===//
2//
3// The KLEE Symbolic Virtual Machine
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
11
12#include "llvm/Support/MemoryBuffer.h"
13#include "llvm/Support/raw_ostream.h"
14
15#include <iomanip>
16#include <string.h>
17
18using namespace llvm;
19using namespace klee;
20using namespace klee::expr;
21
23
24const char *Token::getKindName() const {
25 switch (kind) {
26 default:
27 case Unknown: return "Unknown";
28 case Arrow: return "Arrow";
29 case At: return "At";
30 case Colon: return "Colon";
31 case Comma: return "Comma";
32 case Comment: return "Comment";
33 case EndOfFile: return "EndOfFile";
34 case Equals: return "Equals";
35 case Identifier: return "Identifier";
36 case KWArray: return "KWArray";
37 case KWFalse: return "KWFalse";
38 case KWQuery: return "KWQuery";
39 case KWReserved: return "KWReserved";
40 case KWSymbolic: return "KWSymbolic";
41 case KWTrue: return "KWTrue";
42 case KWWidth: return "KWWidth";
43 case LBrace: return "LBrace";
44 case LParen: return "LParen";
45 case LSquare: return "LSquare";
46 case Number: return "Number";
47 case RBrace: return "RBrace";
48 case RParen: return "RParen";
49 case RSquare: return "RSquare";
50 case Semicolon: return "Semicolon";
51 }
52}
53
55 llvm::errs() << "(Token \"" << getKindName() << "\" "
56 << (const void*) start << " " << length << " "
57 << line << " " << column << ")";
58}
59
61
62static inline bool isInternalIdentifierChar(int Char) {
63 return isalnum(Char) || Char == '_' || Char == '.' || Char == '-';
64}
65
66Lexer::Lexer(const llvm::MemoryBuffer *MB)
67 : BufferPos(MB->getBufferStart()), BufferEnd(MB->getBufferEnd()),
68 LineNumber(1), ColumnNumber(0) {
69}
70
72}
73
75 if (BufferPos == BufferEnd)
76 return -1;
77 return *BufferPos;
78}
79
81 if (BufferPos == BufferEnd)
82 return -1;
83
84 // Handle DOS/Mac newlines here, by stripping duplicates and by
85 // returning '\n' for both.
86 char Result = *BufferPos++;
87 if (Result == '\n' || Result == '\r') {
88 if (BufferPos != BufferEnd && *BufferPos == ('\n' + '\r' - Result))
89 ++BufferPos;
90 Result = '\n';
91 }
92
93 if (Result == '\n') {
94 ++LineNumber;
95 ColumnNumber = 0;
96 } else {
98 }
99
100 return Result;
101}
102
104 Result.kind = k;
105 Result.length = BufferPos - Result.start;
106 return Result;
107}
108
109static bool isReservedKW(const char *Str, unsigned N) {
110 unsigned i;
111
112 // Check for i[0-9]+
113 if (N>1 && Str[0] == 'i') {
114 for (i=1; i<N; ++i)
115 if (!isdigit(Str[i]))
116 break;
117 if (i==N)
118 return true;
119 }
120
121 // Check for fp[0-9]+([.].*)?$
122 if (N>3 && Str[0]=='f' && Str[1]=='p' && isdigit(Str[2])) {
123 for (i=3; i<N; ++i)
124 if (!isdigit(Str[i]))
125 break;
126 if (i==N || Str[i]=='.')
127 return true;
128 }
129
130 return false;
131}
132static bool isWidthKW(const char *Str, unsigned N) {
133 if (N<2 || Str[0] != 'w')
134 return false;
135 for (unsigned i=1; i<N; ++i)
136 if (!isdigit(Str[i]))
137 return false;
138 return true;
139}
141 unsigned Length = BufferPos - Result.start;
142 switch (Length) {
143 case 3:
144 if (memcmp("def", Result.start, 3) == 0)
145 return SetTokenKind(Result, Token::KWReserved);
146 if (memcmp("var", Result.start, 3) == 0)
147 return SetTokenKind(Result, Token::KWReserved);
148 break;
149
150 case 4:
151 if (memcmp("true", Result.start, 4) == 0)
152 return SetTokenKind(Result, Token::KWTrue);
153 break;
154
155 case 5:
156 if (memcmp("array", Result.start, 5) == 0)
157 return SetTokenKind(Result, Token::KWArray);
158 if (memcmp("false", Result.start, 5) == 0)
159 return SetTokenKind(Result, Token::KWFalse);
160 if (memcmp("query", Result.start, 5) == 0)
161 return SetTokenKind(Result, Token::KWQuery);
162 break;
163
164 case 6:
165 if (memcmp("define", Result.start, 6) == 0)
166 return SetTokenKind(Result, Token::KWReserved);
167 break;
168
169 case 7:
170 if (memcmp("declare", Result.start, 7) == 0)
171 return SetTokenKind(Result, Token::KWReserved);
172 break;
173
174 case 8:
175 if (memcmp("symbolic", Result.start, 8) == 0)
176 return SetTokenKind(Result, Token::KWSymbolic);
177 break;
178 }
179
180 if (isReservedKW(Result.start, Length))
181 return SetTokenKind(Result, Token::KWReserved);
182 if (isWidthKW(Result.start, Length))
183 return SetTokenKind(Result, Token::KWWidth);
184
185 return SetTokenKind(Result, Token::Identifier);
186}
187
189 for (;;) {
190 int Char = GetNextChar();
191 if (Char == -1 || Char =='\n')
192 break;
193 }
194}
195
197 while (isalnum(PeekNextChar()) || PeekNextChar()=='_')
198 GetNextChar();
199 return SetTokenKind(Result, Token::Number);
200}
201
204 GetNextChar();
205
206 // Recognize keywords specially.
207 return SetIdentifierTokenKind(Result);
208}
209
211 Result.kind = Token::Unknown;
212 Result.length = 0;
213 Result.start = BufferPos;
214
215 // Skip whitespace.
216 while (isspace(PeekNextChar()))
217 GetNextChar();
218
219 Result.start = BufferPos;
220 Result.line = LineNumber;
221 Result.column = ColumnNumber;
222 int Char = GetNextChar();
223 switch (Char) {
224 case -1: return SetTokenKind(Result, Token::EndOfFile);
225
226 case '(': return SetTokenKind(Result, Token::LParen);
227 case ')': return SetTokenKind(Result, Token::RParen);
228 case ',': return SetTokenKind(Result, Token::Comma);
229 case ':': return SetTokenKind(Result, Token::Colon);
230 case ';': return SetTokenKind(Result, Token::Semicolon);
231 case '=': return SetTokenKind(Result, Token::Equals);
232 case '@': return SetTokenKind(Result, Token::At);
233 case '[': return SetTokenKind(Result, Token::LSquare);
234 case ']': return SetTokenKind(Result, Token::RSquare);
235 case '{': return SetTokenKind(Result, Token::LBrace);
236 case '}': return SetTokenKind(Result, Token::RBrace);
237
238 case '#':
240 return SetTokenKind(Result, Token::Comment);
241
242 case '+': {
243 if (isdigit(PeekNextChar()))
244 return LexNumber(Result);
245 else
246 return SetTokenKind(Result, Token::Unknown);
247 }
248
249 case '-': {
250 int Next = PeekNextChar();
251 if (Next == '>')
252 return GetNextChar(), SetTokenKind(Result, Token::Arrow);
253 else if (isdigit(Next))
254 return LexNumber(Result);
255 else
256 return SetTokenKind(Result, Token::Unknown);
257 break;
258 }
259
260 default:
261 if (isdigit(Char))
262 return LexNumber(Result);
263 else if (isalpha(Char) || Char == '_')
264 return LexIdentifier(Result);
265 return SetTokenKind(Result, Token::Unknown);
266 }
267}
static bool isReservedKW(const char *Str, unsigned N)
Definition: Lexer.cpp:109
static bool isWidthKW(const char *Str, unsigned N)
Definition: Lexer.cpp:132
static bool isInternalIdentifierChar(int Char)
Definition: Lexer.cpp:62
void SkipToEndOfLine()
Definition: Lexer.cpp:188
Token & SetTokenKind(Token &Result, Token::Kind k)
Definition: Lexer.cpp:103
int PeekNextChar()
Definition: Lexer.cpp:74
const char * BufferPos
Definition: Lexer.h:79
unsigned ColumnNumber
The current line.
Definition: Lexer.h:82
unsigned LineNumber
The buffer end position.
Definition: Lexer.h:81
const char * BufferEnd
The current lexer position.
Definition: Lexer.h:80
int GetNextChar()
The current column.
Definition: Lexer.cpp:80
Token & LexNumber(Token &Result)
LexNumber - Lex a number which does not have a base specifier.
Definition: Lexer.cpp:196
Lexer(const llvm::MemoryBuffer *_buf)
Definition: Lexer.cpp:66
Token & Lex(Token &Result)
Definition: Lexer.cpp:210
Token & SetIdentifierTokenKind(Token &Result)
Definition: Lexer.cpp:140
Token & LexIdentifier(Token &Result)
LexIdentifier - Lex an identifier.
Definition: Lexer.cpp:202
Definition: main.cpp:291
const char * start
The token kind.
Definition: Lexer.h:53
unsigned column
The line number of the start of this token.
Definition: Lexer.h:56
unsigned line
The length of the token.
Definition: Lexer.h:55
const char * getKindName() const
getKindName - The name of this token's kind.
Definition: Lexer.cpp:24
unsigned length
The beginning of the token string.
Definition: Lexer.h:54
@ Arrow
'->'
Definition: Lexer.h:24
@ KWFalse
'false'
Definition: Lexer.h:32
@ KWQuery
'query'
Definition: Lexer.h:33
@ Unknown
<other>
Definition: Lexer.h:46
@ RSquare
']'
Definition: Lexer.h:44
@ Comment
#[^ ]+
Definition: Lexer.h:27
@ KWArray
'array'
Definition: Lexer.h:31
@ Number
[+-]?[0-9][a-zA-Z0-9_]+
Definition: Lexer.h:41
@ EndOfFile
<end of file>
Definition: Lexer.h:28
@ Identifier
[a-zA-Z_][a-zA-Z0-9._]*
Definition: Lexer.h:30
@ Semicolon
';'
Definition: Lexer.h:45
@ Equals
' = '
Definition: Lexer.h:29
@ KWSymbolic
'symbolic'
Definition: Lexer.h:35
@ KWWidth
w[0-9]+
Definition: Lexer.h:37
@ KWReserved
fp[0-9]+([.].*)?, i[0-9]+
Definition: Lexer.h:34
@ KWTrue
'true'
Definition: Lexer.h:36
@ LSquare
'['
Definition: Lexer.h:40