Lexer
using CLanguage.Syntax;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Runtime.CompilerServices;
namespace CLanguage.Parser
{
[System.Runtime.CompilerServices.NullableContext(1)]
[System.Runtime.CompilerServices.Nullable(0)]
public class Lexer
{
private int _token = -1;
[System.Runtime.CompilerServices.Nullable(2)]
private object _value;
private int _lastR = -2;
private char[] _chbuf = new char[4096];
private int _chbuflen;
private Location location;
private Location endLocation;
private int line = 1;
private int column = 1;
private static readonly Dictionary<string, int> _kwTokens = new Dictionary<string, int> {
{
"auto",
292
},
{
"bool",
307
},
{
"break",
327
},
{
"char",
296
},
{
"class",
313
},
{
"const",
304
},
{
"continue",
326
},
{
"do",
323
},
{
"double",
303
},
{
"else",
320
},
{
"enum",
315
},
{
"extern",
290
},
{
"false",
311
},
{
"float",
302
},
{
"for",
324
},
{
"goto",
325
},
{
"if",
319
},
{
"inline",
294
},
{
"int",
298
},
{
"long",
299
},
{
"public",
286
},
{
"private",
287
},
{
"protected",
288
},
{
"register",
293
},
{
"restrict",
295
},
{
"return",
328
},
{
"short",
297
},
{
"signed",
300
},
{
"sizeof",
260
},
{
"static",
291
},
{
"struct",
312
},
{
"true",
310
},
{
"typedef",
289
},
{
"union",
314
},
{
"unsigned",
301
},
{
"void",
306
},
{
"volatile",
305
},
{
"while",
322
}
};
public static readonly HashSet<int> KeywordTokens = new HashSet<int>(_kwTokens.Values);
public static readonly HashSet<int> OperatorTokens = new HashSet<int> {
268,
267,
266,
269,
272,
271,
263,
262,
261,
264,
265
};
private int nextPosition;
public Report Report { get; }
public Document Document { get; }
public Token CurrentToken => new Token(_token, _value, location, endLocation);
public Func<string, bool> IsTypedef { get; set; } = (string _) => false;
public Lexer(Document document, [System.Runtime.CompilerServices.Nullable(2)] Report report = null)
{
Report = (report ?? new Report(null));
Document = document;
location = new Location(document, 0, 1, 1);
endLocation = location;
}
public Lexer(string name, string code, [System.Runtime.CompilerServices.Nullable(2)] Report report = null)
: this(new Document(name, code), report)
{
}
private bool Eof()
{
_value = null;
_token = -1;
return false;
}
private int Read()
{
if (nextPosition < Document.Content.Length) {
char result = Document.Content[nextPosition];
nextPosition++;
column++;
return result;
}
return -1;
}
private int Peek()
{
if (nextPosition < Document.Content.Length)
return Document.Content[nextPosition];
return -1;
}
public void SkipWhiteSpace()
{
int num = _lastR;
if (num == -2)
num = Read();
bool flag = true;
while (flag) {
while (num >= 0 && num <= 32) {
switch (num) {
case 10:
goto end_IL_002e;
}
num = Read();
continue;
continue;
end_IL_002e:
break;
}
flag = false;
if (num == 47 && Peek() == 47) {
int num2 = Read();
while (num2 > 0 && num2 != 10 && num2 != 8232) {
num2 = Read();
}
num = Read();
line++;
column = 1;
flag = true;
} else if (num == 47 && Peek() == 42) {
int num3 = Read();
while (num3 > 0 && (num3 != 42 || Peek() != 47)) {
if (num3 == 10 || num3 == 8232) {
line++;
column = 1;
}
num3 = Read();
}
Read();
num = Read();
flag = true;
}
}
_lastR = num;
}
public bool Advance()
{
SkipWhiteSpace();
int num = _lastR;
if (num == -1)
return Eof();
location = new Location(location.Document, nextPosition - 1, line, column);
char c = (char)num;
if (c == '\n' || c == '
') {
_token = 329;
_value = null;
_lastR = Read();
line++;
column = 1;
} else if (!char.IsDigit(c)) {
switch (num) {
case 61:
num = Read();
if (num == 61) {
_token = 268;
_value = null;
_lastR = Read();
} else {
_token = 61;
_value = null;
_lastR = num;
}
break;
case 33:
num = Read();
if (num == 61) {
_token = 269;
_value = null;
_lastR = Read();
} else {
_token = 33;
_value = null;
_lastR = num;
}
break;
case 58:
num = Read();
if (num == 58) {
_token = 270;
_value = null;
_lastR = Read();
} else {
_token = 58;
_value = null;
_lastR = num;
}
break;
case 35:
case 37:
case 40:
case 41:
case 44:
case 59:
case 63:
case 91:
case 92:
case 93:
case 123:
case 125:
case 126:
_token = num;
_value = null;
_lastR = Read();
break;
case 46: {
int num5 = Read();
if (num5 == 46 && Peek() == 46) {
num = Read();
if (num == 46) {
_token = 316;
_value = null;
_lastR = Read();
} else {
_token = 46;
_value = null;
_lastR = num;
Report.Error(1001, location + 1, location + 2, "Identifier expected");
}
} else {
_token = num;
_value = null;
_lastR = num5;
}
break;
}
case 42:
case 47: {
int num4 = Read();
if (num4 == 61) {
num4 = Read();
_token = ((num == 42) ? 273 : 274);
_value = null;
_lastR = num4;
} else {
_token = num;
_value = null;
_lastR = num4;
}
break;
}
case 94: {
int num2 = Read();
if (num2 == 61) {
num2 = Read();
_token = 281;
_value = null;
_lastR = num2;
} else {
_token = num;
_value = null;
_lastR = num2;
}
break;
}
case 38: {
int num3 = Read();
switch (num3) {
case 38:
num3 = Read();
if (num3 == 61) {
num3 = Read();
_token = 283;
_value = null;
_lastR = num3;
} else {
_token = 271;
_value = null;
_lastR = num3;
}
break;
case 61:
num3 = Read();
_token = 280;
_value = null;
_lastR = num3;
break;
default:
_token = num;
_value = null;
_lastR = num3;
break;
}
break;
}
case 124: {
int num10 = Read();
switch (num10) {
case 124:
num10 = Read();
if (num10 == 61) {
num10 = Read();
_token = 284;
_value = null;
_lastR = num10;
} else {
_token = 272;
_value = null;
_lastR = num10;
}
break;
case 61:
num10 = Read();
_token = 282;
_value = null;
_lastR = num10;
break;
default:
_token = num;
_value = null;
_lastR = num10;
break;
}
break;
}
case 43: {
int num9 = Read();
switch (num9) {
case 61:
_token = 276;
_value = null;
_lastR = Read();
break;
case 43:
_token = 262;
_value = null;
_lastR = Read();
break;
default:
_token = num;
_value = null;
_lastR = num9;
break;
}
break;
}
case 45: {
int num7 = Read();
switch (num7) {
case 61:
_token = 277;
_value = null;
_lastR = Read();
break;
case 45:
_token = 263;
_value = null;
_lastR = Read();
break;
default:
_token = num;
_value = null;
_lastR = num7;
break;
}
break;
}
case 60: {
int num6 = Read();
switch (num6) {
case 61:
_token = 266;
_value = null;
_lastR = Read();
break;
case 60:
_token = 264;
_value = null;
_lastR = Read();
break;
default:
_token = num;
_value = null;
_lastR = num6;
break;
}
break;
}
case 62: {
int num8 = Read();
switch (num8) {
case 61:
_token = 267;
_value = null;
_lastR = Read();
break;
case 62:
_token = 265;
_value = null;
_lastR = Read();
break;
default:
_token = num;
_value = null;
_lastR = num8;
break;
}
break;
}
case 34: {
_chbuflen = 0;
num = Read();
c = (char)num;
bool flag = num < 0 || c == '"';
while (!flag && _chbuflen + 1 < _chbuf.Length) {
switch (c) {
case '\\':
num = Read();
c = (char)num;
if (num >= 0) {
switch (c) {
case '\\':
_chbuf[_chbuflen++] = '\\';
break;
case 'r':
_chbuf[_chbuflen++] = '\r';
break;
case 'n':
_chbuf[_chbuflen++] = '\n';
break;
case 't':
_chbuf[_chbuflen++] = '\t';
break;
case '\'':
_chbuf[_chbuflen++] = '\'';
break;
case '"':
_chbuf[_chbuflen++] = '"';
break;
default:
if (!char.IsWhiteSpace((char)num))
throw new NotSupportedException("Unrecognized string escape sequence");
while (num > 0 && num != 10 && num != 8232) {
num = Read();
}
break;
}
num = Read();
c = (char)num;
}
break;
case '\n':
case '
':
endLocation = new Location(location.Document, (_lastR >= 0) ? (nextPosition - 1) : location.Document.Content.Length, line, column);
Report.Error(1010, location, endLocation, "Newline in constant");
flag = true;
break;
default:
_chbuf[_chbuflen++] = c;
num = Read();
c = (char)num;
break;
}
flag = (flag || num < 0 || c == '"');
}
_lastR = Read();
_token = 259;
_value = new string(_chbuf, 0, _chbuflen);
break;
}
case 39: {
_chbuflen = 0;
num = Read();
c = (char)num;
bool flag2 = num < 0 || c == '\'';
while (!flag2 && _chbuflen + 1 < _chbuf.Length) {
_chbuf[_chbuflen++] = c;
num = Read();
c = (char)num;
flag2 = (num < 0 || c == '\'');
}
if (_chbuflen > 1 && _chbuf[0] == '\\') {
switch (_chbuf[1]) {
case '\\':
_chbuf[0] = '\\';
_chbuflen = 1;
break;
case 'r':
_chbuf[0] = '\r';
_chbuflen = 1;
break;
case 'n':
_chbuf[0] = '\n';
_chbuflen = 1;
break;
case 't':
_chbuf[0] = '\t';
_chbuflen = 1;
break;
case '\'':
_chbuf[0] = '\'';
_chbuflen = 1;
break;
case '"':
_chbuf[0] = '"';
_chbuflen = 1;
break;
default:
throw new NotSupportedException("Unrecognized char escape sequence");
}
}
_lastR = Read();
_token = 258;
_value = _chbuf[0];
break;
}
default: {
_chbuf[0] = c;
_chbuflen = 0;
while (c == '_' || char.IsLetterOrDigit(c) || num > 127) {
_chbuf[_chbuflen++] = c;
num = Read();
c = (char)num;
}
if (_chbuflen == 0)
throw new NotSupportedException($"""{new object[1] {
(char)num
}}""");
_lastR = num;
string text = (string)(_value = new string(_chbuf, 0, _chbuflen));
int value = 0;
if (_kwTokens.TryGetValue(text, out value))
_token = value;
else if (IsTypedef != null && IsTypedef(text)) {
_token = 285;
} else {
_token = 257;
}
break;
}
}
} else {
bool flag3 = true;
bool flag4 = false;
bool flag5 = false;
bool flag6 = false;
bool flag7 = false;
_chbuf[0] = c;
_chbuflen = 0;
while (c == '.' || char.IsDigit(c) || c == 'E' || c == 'e' || c == 'f' || c == 'F' || c == 'u' || c == 'U' || c == 'l' || c == 'L' || (!flag7 && c == 'x') || (flag7 && IsHex(c))) {
switch (c) {
case 'L':
case 'l':
flag4 = true;
break;
case 'U':
case 'u':
flag5 = true;
break;
default:
if (!flag7 && (c == 'f' || c == 'F'))
flag6 = true;
else if (c == 'x' && _chbuflen == 1 && _chbuf[0] == '0') {
flag7 = true;
} else {
flag3 = (flag3 && char.IsDigit(c));
_chbuf[_chbuflen++] = c;
}
break;
}
num = Read();
c = (char)num;
}
_lastR = num;
string s = new string(_chbuf, 0, _chbuflen);
CultureInfo invariantCulture = CultureInfo.InvariantCulture;
endLocation = new Location(location.Document, (_lastR >= 0) ? (nextPosition - 1) : location.Document.Content.Length, line, column);
if (flag3 | flag7) {
NumberStyles style = flag7 ? NumberStyles.HexNumber : NumberStyles.None;
if (flag4) {
if (flag5) {
if (ulong.TryParse(s, style, invariantCulture, out ulong result))
_value = result;
else {
_value = 0;
Report.Error(1021, location, endLocation, "Integral constant is too large");
}
} else
_value = long.Parse(s, style, invariantCulture);
} else if (flag5) {
_value = uint.Parse(s, style, invariantCulture);
} else {
_value = int.Parse(s, style, invariantCulture);
}
} else if (flag6) {
_value = float.Parse(s, invariantCulture);
} else {
_value = double.Parse(s, invariantCulture);
}
_token = 258;
}
goto IL_0e64;
IL_0e64:
endLocation = new Location(location.Document, (_lastR >= 0) ? (nextPosition - 1) : location.Document.Content.Length, line, column);
return true;
IL_0951:
_token = 265;
_value = null;
_lastR = Read();
goto IL_0e64;
IL_0974:
_token = num;
_value = null;
int lastR;
_lastR = lastR;
goto IL_0e64;
IL_091a:
lastR = Read();
switch (lastR) {
case 61:
goto IL_0928;
case 62:
goto IL_0951;
default:
goto IL_0974;
}
IL_0928:
_token = 267;
_value = null;
_lastR = Read();
goto IL_0e64;
}
private static bool IsHex(char c)
{
switch (c) {
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
return true;
default:
return false;
}
}
}
}