00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifndef COMMA_PARSER_LEXER_HDR_GUARD
00010 #define COMMA_PARSER_LEXER_HDR_GUARD
00011
00012 #include "comma/basic/Diagnostic.h"
00013 #include "comma/basic/TextProvider.h"
00014 #include <iosfwd>
00015 #include <string>
00016
00017 namespace comma {
00018
00019 class Lexer {
00020
00021 public:
00022 Lexer(TextProvider &txtProvider, Diagnostic &diag);
00023
00024
00025
00026
00027
00028
00029
00030 enum Code {
00031 UNUSED_ID,
00032
00033 #define RESERVED(NAME, STRING) TKN_ ## NAME,
00034 #define GLYPH(NAME, STRING) TKN_ ## NAME,
00035 #define TOKEN(NAME) TKN_ ## NAME,
00036 #include "comma/parser/Tokens.def"
00037 #undef RESERVED
00038 #undef GLYPH
00039 #undef TOKEN
00040
00041 NUMTOKEN_CODES
00042 };
00043
00044
00045
00046
00047
00048
00049 class Token {
00050
00051 public:
00052 Token() : code(Lexer::UNUSED_ID) { }
00053
00054 Lexer::Code getCode() const { return code; }
00055
00056 Location getLocation() const { return location; }
00057
00058 const char *getRep() const { return string; }
00059
00060 unsigned getLength() const { return length; }
00061
00062
00063 std::string getString() const;
00064
00065 private:
00066 Lexer::Code code : 8;
00067 unsigned length : 24;
00068 Location location;
00069 const char *string;
00070
00071
00072
00073 friend class Lexer;
00074
00075 Token(Lexer::Code code,
00076 Location location,
00077 const char *string,
00078 unsigned length)
00079 : code(code),
00080 length(length),
00081 location(location),
00082 string(string) { }
00083 };
00084
00085
00086
00087
00088 void scan(Lexer::Token &tkn);
00089
00090 void peek(Lexer::Token &tkn, unsigned n);
00091
00092
00093
00094
00095
00096
00097 void beginExcursion();
00098
00099 void endExcursion();
00100
00101 void forgetExcursion();
00102
00103
00104 bool lexSuccessful() const { return errorCount == 0; }
00105
00106
00107 unsigned getErrorCount() const { return errorCount; }
00108
00112 void abortScanning() { scanningAborted = true; }
00113
00114
00115
00116 static bool isFunctionGlyph(const Lexer::Token &tkn) {
00117 switch (tkn.getCode()) {
00118 case TKN_EQUAL:
00119 case TKN_NEQUAL:
00120 case TKN_LESS:
00121 case TKN_LEQ:
00122 case TKN_GREAT:
00123 case TKN_GEQ:
00124 case TKN_MINUS:
00125 case TKN_STAR:
00126 case TKN_PLUS:
00127 case TKN_FSLASH:
00128 case TKN_POW:
00129 case TKN_MOD:
00130 case TKN_REM:
00131 case TKN_AND:
00132 case TKN_NOT:
00133 case TKN_XOR:
00134 case TKN_OR:
00135 return true;
00136 default:
00137 return false;
00138 }
00139 }
00140
00141
00142
00143 static const char *tokenString(Code code);
00144
00145
00146 static std::string tokenString(const Token &tkn);
00147
00148 private:
00149 void scanToken();
00150
00151 bool eatWhitespace();
00152
00153 bool eatComment();
00154
00155 bool scanWord();
00156
00157 bool scanGlyph();
00158
00159 bool scanCharacter();
00160
00161 bool scanString();
00162
00163 bool scanNumeric();
00164
00165 bool scanEscape();
00166
00167 static bool isAlphabetic(unsigned c);
00168
00169 static bool isInitialIdentifierChar(unsigned c);
00170
00171 static bool isInnerIdentifierChar(unsigned c);
00172
00173 static bool isWhitespace(unsigned c);
00174
00175 static bool isDecimalDigit(unsigned c);
00176
00177 Location currentLocation() const;
00178
00179
00180
00181
00182
00183
00184 unsigned readStream();
00185 unsigned peekStream();
00186 void ungetStream();
00187 void ignoreStream();
00188
00189
00190
00191
00192
00193 Code getTokenCode(TextIterator &start, TextIterator &end) const;
00194
00195 void emitToken(Code code,
00196 const TextIterator &start, const TextIterator &end);
00197
00198
00199
00200 void emitToken(Code code, Location loc);
00201
00202 void emitStringToken(const TextIterator &start, const TextIterator &end);
00203
00204 void emitIntegerToken(const TextIterator &start, const TextIterator &end);
00205
00206 void emitIdentifierToken(const TextIterator &start,
00207 const TextIterator &end);
00208
00209 void emitCharacterToken(const TextIterator &start, const TextIterator &end);
00210
00211 DiagnosticStream &report(Location loc, diag::Kind kind) {
00212 ++errorCount;
00213 SourceLocation sloc = txtProvider.getSourceLocation(loc);
00214 return diagnostic.report(sloc, kind);
00215 }
00216
00217
00218
00219 void diagnoseConsecutiveUnderscores(unsigned c1, unsigned c2);
00220
00221 DiagnosticStream &report(SourceLocation sloc, diag::Kind kind) {
00222 ++errorCount;
00223 return diagnostic.report(sloc, kind);
00224 }
00225
00226 DiagnosticStream &report(diag::Kind kind) {
00227 ++errorCount;
00228 SourceLocation sloc = txtProvider.getSourceLocation(currentLocation());
00229 return diagnostic.report(sloc, kind);
00230 }
00231
00232
00233 TextProvider &txtProvider;
00234
00235
00236 Diagnostic &diagnostic;
00237
00238
00239 TextIterator currentIter;
00240
00241
00242 unsigned errorCount;
00243
00244
00245 bool scanningAborted;
00246
00247
00248
00249 Token *targetToken;
00250
00251
00252 std::vector<Token> tokens;
00253
00254
00255
00256 std::vector<unsigned> positionStack;
00257
00258
00259
00260 unsigned index;
00261 };
00262
00263 }
00264
00265 #endif