Hedron Hedron - 4 months ago 8
C Question

Keep lexer from reading first character in file

I'm making a lexer in c and I noticed that when reading the test code that the file buffer has a strange character that prints as a space. For some reason the lexer reads it from the buffer and treats it like a space.

test file:

mo on


output

Current character: " ", Length: 6, Pointer: 0
Current character: "m", Length: 6, Pointer: 1
Type:2 {
Line: 1
Pos: 0
Number: 21646720
Real: 21646720
String: 'mo'
}

Current character: " ", Length: 6, Pointer: 3
Current character: "o", Length: 6, Pointer: 4
Type:2 {
Line: 1
Pos: 0
Number: 21683576
Real: 21683576
String: 'o'
}


code

static char lexer_look(lexer_t* lexer, size_t ahead) {
if (lexer->len < lexer->ptr + ahead) {
error_new(lexer->errors, 0, 0, "The lexer tried to index %d out of bounds %d", lexer->ptr + ahead, lexer->len);
return;
}
return lexer->src[lexer->ptr + ahead];
}

static token_t* next_token(lexer_t* lexer) {
token_t* token = NULL;

while (token == NULL && can_adv(lexer, 1)) {
const char c = lexer_look(lexer, 0);

if (DEBUG)
printf("Current character: \"%c\", Length: %d, Pointer: %d\n", lexer_look(lexer, 0), lexer->len, lexer->ptr);

switch (c) {
case '\n':
new_line(lexer);
lexer_adv(lexer, 1);
break;
case '\"':
token = lexer_str(lexer);
break;
case '#':
lexer_comment(lexer);
break;
default:
if (isalpha(c) || c == '_')
token = lexer_ident(lexer);
else if (isspace(c))
lexer_adv(lexer, 1);
else
break;
}
}

return token;
}

Answer

c should be defined as an int with a value confined in the range EOF..UCHAR_MAX for isalpha() and isspace() to have a reliable behavior.