Preserve whitespace in tokenizer.

This is needed to avoid confusing defines such as #define A (1) for macro functions.
2021-11-23 00:55:35 +00:00 · 2021-11-23 00:55:35 +00:00 · eba382a014
parent d577e56fb9
commit eba382a014
3 changed files with 22 additions and 24 deletions
--- a/cc.c
+++ b/cc.c
@ -27,8 +27,8 @@ void initialize_types();
 struct token_list* read_all_tokens(FILE* a, struct token_list* current, char* filename);
 struct token_list* reverse_list(struct token_list* head);

-struct token_list* remove_line_comments(struct token_list* head);
-struct token_list* remove_line_comment_tokens(struct token_list* head);
+struct token_list* remove_comments(struct token_list* head);
+void remove_whitespace();
 struct token_list* remove_preprocessor_directives(struct token_list* head);

 void eat_newline_tokens();
@ -161,7 +161,7 @@ int main(int argc, char** argv, char** envp)
 	}
 	global_token = reverse_list(global_token);

-	global_token = remove_line_comments(global_token);
+	global_token = remove_comments(global_token);

 	/* Get the environmental bits */
 	populate_env(envp);
@ -170,6 +170,7 @@ int main(int argc, char** argv, char** envp)
 	if(NULL == M2LIBC_PATH) M2LIBC_PATH = "./M2libc";

 	preprocess();
+	remove_whitespace();

 	if(PREPROCESSOR_MODE)
 	{
--- a/cc_macro.c
+++ b/cc_macro.c
@ -64,12 +64,14 @@ void eat_current_token()
 		update_global_token = TRUE;

 	macro_token = eat_token(macro_token);
+	while (macro_token->s[0] == ' ')
+		macro_token = eat_token(macro_token);

 	if(update_global_token)
 		global_token = macro_token;
 }

-void eat_newline_tokens()
+void remove_whitespace()
 {
 	macro_token = global_token;

@ -77,7 +79,7 @@ void eat_newline_tokens()
 	{
 		if(NULL == macro_token) return;

-		if(match("\n", macro_token->s))
+		if(match(" ", macro_token->s) || match("\t", macro_token->s))
 		{
 			eat_current_token();
 		}
--- a/cc_reader.c
+++ b/cc_reader.c
@ -28,12 +28,7 @@ int grab_byte()
 {
 	int c = fgetc(input);
 	if(10 == c) line = line + 1;
-	return c;
-}
-
-int clearWhiteSpace(int c)
-{
-	if((32 == c) || (9 == c)) return clearWhiteSpace(grab_byte());
+	if(9 == c) c = ' ';
 	return c;
 }

@ -162,7 +157,7 @@ struct token_list* remove_line_comments(struct token_list* head)
 	return first;
 }

-struct token_list* remove_line_comment_tokens(struct token_list* head)
+struct token_list* remove_comments(struct token_list* head)
 {
 	struct token_list* first = NULL;

@ -172,6 +167,10 @@ struct token_list* remove_line_comment_tokens(struct token_list* head)
 		{
 			head = eat_token(head);
 		}
+		else if('/' == head->s[0] && '*' == head->s[1])
+		{
+			head = eat_token(head);
+		}
 		else
 		{
 			if(NULL == first)
@ -208,22 +207,23 @@ struct token_list* remove_preprocessor_directives(struct token_list* head)
 	return first;
 }

-
 int get_token(int c)
 {
 	struct token_list* current = calloc(1, sizeof(struct token_list));
 	require(NULL != current, "Exhausted memory while getting token\n");

-reset:
 	reset_hold_string();
 	string_index = 0;

-	c = clearWhiteSpace(c);
 	if(c == EOF)
 	{
 		free(current);
 		return c;
 	}
+	else if((32 == c) || (9 == c) || (c == '\n'))
+	{
+		c = consume_byte(c);
+	}
 	else if('#' == c)
 	{
 		c = consume_byte(c);
@ -251,19 +251,18 @@ reset:
 		c = consume_byte(c);
 		if(c == '*')
 		{
-			c = grab_byte();
+			c = consume_byte(c);
 			while(c != '/')
 			{
 				while(c != '*')
 				{
-					c = grab_byte();
+					c = consume_byte(c);
 					require(EOF != c, "Hit EOF inside of block comment\n");
 				}
-				c = grab_byte();
+				c = consume_byte(c);
 				require(EOF != c, "Hit EOF inside of block comment\n");
 			}
-			c = grab_byte();
-			goto reset;
+			c = consume_byte(c);
 		}
 		else if(c == '/')
 		{
@ -274,10 +273,6 @@ reset:
 			c = consume_byte(c);
 		}
 	}
-	else if (c == '\n')
-	{
-		c = consume_byte(c);
-	}
 	else if(c == '*')
 	{
 		c = consume_byte(c);