Preserve whitespace in tokenizer.

This is needed to avoid confusing defines such as
#define A (1)
for macro functions.
This commit is contained in:
Jeremiah Orians 2021-11-23 00:55:35 +00:00 committed by Andrius Štikonas
parent d577e56fb9
commit eba382a014
3 changed files with 22 additions and 24 deletions

7
cc.c
View File

@ -27,8 +27,8 @@ void initialize_types();
struct token_list* read_all_tokens(FILE* a, struct token_list* current, char* filename);
struct token_list* reverse_list(struct token_list* head);
struct token_list* remove_line_comments(struct token_list* head);
struct token_list* remove_line_comment_tokens(struct token_list* head);
struct token_list* remove_comments(struct token_list* head);
void remove_whitespace();
struct token_list* remove_preprocessor_directives(struct token_list* head);
void eat_newline_tokens();
@ -161,7 +161,7 @@ int main(int argc, char** argv, char** envp)
}
global_token = reverse_list(global_token);
global_token = remove_line_comments(global_token);
global_token = remove_comments(global_token);
/* Get the environmental bits */
populate_env(envp);
@ -170,6 +170,7 @@ int main(int argc, char** argv, char** envp)
if(NULL == M2LIBC_PATH) M2LIBC_PATH = "./M2libc";
preprocess();
remove_whitespace();
if(PREPROCESSOR_MODE)
{

View File

@ -64,12 +64,14 @@ void eat_current_token()
update_global_token = TRUE;
macro_token = eat_token(macro_token);
while (macro_token->s[0] == ' ')
macro_token = eat_token(macro_token);
if(update_global_token)
global_token = macro_token;
}
void eat_newline_tokens()
void remove_whitespace()
{
macro_token = global_token;
@ -77,7 +79,7 @@ void eat_newline_tokens()
{
if(NULL == macro_token) return;
if(match("\n", macro_token->s))
if(match(" ", macro_token->s) || match("\t", macro_token->s))
{
eat_current_token();
}

View File

@ -28,12 +28,7 @@ int grab_byte()
{
int c = fgetc(input);
if(10 == c) line = line + 1;
return c;
}
int clearWhiteSpace(int c)
{
if((32 == c) || (9 == c)) return clearWhiteSpace(grab_byte());
if(9 == c) c = ' ';
return c;
}
@ -162,7 +157,7 @@ struct token_list* remove_line_comments(struct token_list* head)
return first;
}
struct token_list* remove_line_comment_tokens(struct token_list* head)
struct token_list* remove_comments(struct token_list* head)
{
struct token_list* first = NULL;
@ -172,6 +167,10 @@ struct token_list* remove_line_comment_tokens(struct token_list* head)
{
head = eat_token(head);
}
else if('/' == head->s[0] && '*' == head->s[1])
{
head = eat_token(head);
}
else
{
if(NULL == first)
@ -208,22 +207,23 @@ struct token_list* remove_preprocessor_directives(struct token_list* head)
return first;
}
int get_token(int c)
{
struct token_list* current = calloc(1, sizeof(struct token_list));
require(NULL != current, "Exhausted memory while getting token\n");
reset:
reset_hold_string();
string_index = 0;
c = clearWhiteSpace(c);
if(c == EOF)
{
free(current);
return c;
}
else if((32 == c) || (9 == c) || (c == '\n'))
{
c = consume_byte(c);
}
else if('#' == c)
{
c = consume_byte(c);
@ -251,19 +251,18 @@ reset:
c = consume_byte(c);
if(c == '*')
{
c = grab_byte();
c = consume_byte(c);
while(c != '/')
{
while(c != '*')
{
c = grab_byte();
c = consume_byte(c);
require(EOF != c, "Hit EOF inside of block comment\n");
}
c = grab_byte();
c = consume_byte(c);
require(EOF != c, "Hit EOF inside of block comment\n");
}
c = grab_byte();
goto reset;
c = consume_byte(c);
}
else if(c == '/')
{
@ -274,10 +273,6 @@ reset:
c = consume_byte(c);
}
}
else if (c == '\n')
{
c = consume_byte(c);
}
else if(c == '*')
{
c = consume_byte(c);