Make output more closely match input to enable easier audits

2021-12-26 07:42:54 -05:00 · 2021-12-26 07:42:54 -05:00 · 5a81f6ae68
parent b34a2528c8
commit 5a81f6ae68
6 changed files with 61 additions and 162 deletions
--- a/cc.c
+++ b/cc.c
@ -27,11 +27,6 @@ void initialize_types();
 struct token_list* read_all_tokens(FILE* a, struct token_list* current, char* filename);
 struct token_list* reverse_list(struct token_list* head);

-struct token_list* remove_comments(struct token_list* head);
-void remove_whitespace();
-struct token_list* remove_preprocessor_directives(struct token_list* head);
-
-void eat_newline_tokens();
 void init_macro_env(char* sym, char* value, char* source, int num);
 void preprocess();
 void output_tokens(struct token_list *i, FILE* out);
@ -52,6 +47,7 @@ int main(int argc, char** argv, char** envp)
 	init_macro_env("__M2__", "42", "__INTERNAL_M2__", 0); /* Setup __M2__ */
 	char* name;
 	char* hold;
+	int DUMP_MODE = FALSE;

 	int i = 1;
 	while(i <= argc)
@ -65,6 +61,11 @@ int main(int argc, char** argv, char** envp)
 			PREPROCESSOR_MODE = TRUE;
 			i += 1;
 		}
+		else if(match(argv[i], "--dump-mode"))
+		{
+			DUMP_MODE = TRUE;
+			i+= 1;
+		}
 		else if(match(argv[i], "-f") || match(argv[i], "--file"))
 		{
 			if(NULL == hold_string)
@ -161,20 +162,23 @@ int main(int argc, char** argv, char** envp)
 	}
 	global_token = reverse_list(global_token);

-	global_token = remove_comments(global_token);
-
 	/* Get the environmental bits */
 	populate_env(envp);
 	setup_env(envp);
 	M2LIBC_PATH = env_lookup("M2LIBC_PATH");
 	if(NULL == M2LIBC_PATH) M2LIBC_PATH = "./M2libc";

+	if(DUMP_MODE)
+	{
+		output_tokens(global_token, destination_file);
+		exit(EXIT_SUCCESS);
+	}
+
 	preprocess();
-	remove_whitespace();

 	if(PREPROCESSOR_MODE)
 	{
-		fputs("\n/* Preprocessed source */\n", destination_file);
+		fputs("/* M2-Mesoplanet Preprocessed source */\n", destination_file);
 		output_tokens(global_token, destination_file);
 		fclose(destination_file);
 	}
--- a/cc_core.c
+++ b/cc_core.c
@ -61,7 +61,6 @@ void output_tokens(struct token_list *i, FILE* out)
 	while(NULL != i)
 	{
 		fputs(i->s, out);
-		fputs(" ", out);
 		i = i->next;
 	}
 }
--- a/cc_macro.c
+++ b/cc_macro.c
@ -84,25 +84,6 @@ void eat_current_token_without_space()
 	_eat_current_token(FALSE);
 }

-void remove_whitespace()
-{
-	macro_token = global_token;
-
-	while(TRUE)
-	{
-		if(NULL == macro_token) return;
-
-		if(match(" ", macro_token->s) || match("\t", macro_token->s))
-		{
-			eat_current_token();
-		}
-		else
-		{
-			macro_token = macro_token->next;
-		}
-	}
-}
-
 struct token_list* lookup_token(struct token_list* token, struct token_list* arguments)
 {
 	char *s;
@ -786,6 +767,23 @@ void macro_directive()
 			eat_current_token();
 		}
 	}
+	else if(match("#FILENAME", macro_token->s))
+	{
+		while(TRUE)
+		{
+			if(NULL == macro_token)
+			{
+				return;
+			}
+
+			if('\n' == macro_token->s[0])
+			{
+				return;
+			}
+
+			eat_current_token();
+		}
+	}
 	else
 	{
 		/* Put a big fat warning but see if we can just ignore */
--- a/cc_reader.c
+++ b/cc_reader.c
@ -56,7 +56,6 @@ int grab_byte()
 {
 	int c = fgetc(input);
 	if(10 == c) line = line + 1;
-	if(9 == c) c = ' ';
 	return c;
 }

@ -79,7 +78,8 @@ int preserve_string(int c)
 		c = consume_byte(c);
 		require(EOF != c, "Unterminated string\n");
 	} while(escape || (c != frequent));
-	return grab_byte();
+	c = consume_byte(frequent);
+	return c;
 }


@ -95,20 +95,6 @@ void copy_string(char* target, char* source, int max)
 }


-void fixup_label()
-{
-	int hold = ':';
-	int prev;
-	int i = 0;
-	do
-	{
-		prev = hold;
-		hold = hold_string[i];
-		hold_string[i] = prev;
-		i = i + 1;
-	} while(0 != hold);
-}
-
 int preserve_keyword(int c, char* S)
 {
 	while(in_set(c, S))
@ -120,12 +106,13 @@ int preserve_keyword(int c, char* S)

 void reset_hold_string()
 {
-	int i = string_index + 2;
+	int i = MAX_STRING;
 	while(0 <= i)
 	{
 		hold_string[i] = 0;
 		i = i - 1;
 	}
+	string_index = 0;
 }

 /* note if this is the first token in the list, head needs fixing up */
@ -145,95 +132,6 @@ struct token_list* eat_token(struct token_list* token)
 	return token->next;
 }

-struct token_list* eat_until_newline(struct token_list* head)
-{
-	while (NULL != head)
-	{
-		if('\n' == head->s[0])
-		{
-			return head;
-		}
-		else
-		{
-			head = eat_token(head);
-		}
-	}
-
-	return NULL;
-}
-
-struct token_list* remove_line_comments(struct token_list* head)
-{
-	struct token_list* first = NULL;
-
-	while (NULL != head)
-	{
-		if(match("//", head->s))
-		{
-			head = eat_until_newline(head);
-		}
-		else
-		{
-			if(NULL == first)
-			{
-				first = head;
-			}
-			head = head->next;
-		}
-	}
-
-	return first;
-}
-
-struct token_list* remove_comments(struct token_list* head)
-{
-	struct token_list* first = NULL;
-
-	while (NULL != head)
-	{
-		if(match("//", head->s))
-		{
-			head = eat_token(head);
-		}
-		else if('/' == head->s[0] && '*' == head->s[1])
-		{
-			head = eat_token(head);
-		}
-		else
-		{
-			if(NULL == first)
-			{
-				first = head;
-			}
-			head = head->next;
-		}
-	}
-
-	return first;
-}
-
-struct token_list* remove_preprocessor_directives(struct token_list* head)
-{
-	struct token_list* first = NULL;
-
-	while (NULL != head)
-	{
-		if('#' == head->s[0])
-		{
-			head = eat_until_newline(head);
-		}
-		else
-		{
-			if(NULL == first)
-			{
-				first = head;
-			}
-			head = head->next;
-		}
-	}
-
-	return first;
-}

 void new_token(char* s, int size)
 {
@ -255,7 +153,6 @@ void new_token(char* s, int size)
 int get_token(int c)
 {
 	reset_hold_string();
-	string_index = 0;

 	if(c == EOF)
 	{
@ -272,12 +169,7 @@ int get_token(int c)
 	}
 	else if(in_set(c, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"))
 	{
-		c = preserve_keyword(c, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_");
-		if(':' == c)
-		{
-			fixup_label();
-			c = ' ';
-		}
+		c = preserve_keyword(c, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_:");
 	}
 	else if(in_set(c, "<=>|&!^%"))
 	{
@ -307,6 +199,11 @@ int get_token(int c)
 		}
 		else if(c == '/')
 		{
+			while(c != '\n')
+			{
+				c = consume_byte(c);
+				require(EOF != c, "Hit EOF inside of line comment\n");
+			}
 			c = consume_byte(c);
 		}
 		else if(c == '=')
@ -376,7 +273,6 @@ struct token_list* reverse_list(struct token_list* head)
 int read_include(int c)
 {
 	reset_hold_string();
-	string_index = 0;
 	int done = FALSE;
 	int ch;

@ -410,13 +306,24 @@ int read_include(int c)
 	return c;
 }

+void insert_file_header(char* name, int line)
+{
+	char* hold_line = int2str(line, 10, FALSE);
+	reset_hold_string();
+	strcat(hold_string, "// #FILENAME ");
+	strcat(hold_string, name);
+	strcat(hold_string, " ");
+	strcat(hold_string, hold_line);
+	strcat(hold_string, "\n");
+	new_token(hold_string, strlen(hold_string)+2);
+}
+
 struct token_list* read_all_tokens(FILE* a, struct token_list* current, char* filename);
 int include_file(int ch)
 {
 	/* The old state to restore to */
 	char* hold_filename = file;
 	FILE* hold_input = input;
-	char* hold_line;
 	int hold_number;

 	/* The new file to load */
@ -429,7 +336,7 @@ int include_file(int ch)

 	/* Get new filename */
 	read_include(ch);
-	ch = ' ';
+	ch = '\n';
 	new_filename = token->s;
 	/* Remove name from stream */
 	token = token->next;
@ -472,24 +379,14 @@ int include_file(int ch)
 		exit(EXIT_FAILURE);
 	}

-
-	/* Replace token */
-	new_token("//", 4);
-	new_token(" // #FILENAME", 11);
-	new_token(new_filename, strlen(new_filename) + 2);
-	new_token("1", 3);
-	new_token("\n", 3);
-	/* make sure to store return line number right after include */
-	hold_line = int2str(line + 1, 10, FALSE);
+	/* protect our current line number */
 	hold_number = line + 1;
+
+	/* Read the new file */
 	read_all_tokens(new_file, token, new_filename);

 	/* put back old file info */
-	new_token("//", 4);
-	new_token(" // #FILENAME", 11);
-	new_token(hold_filename, strlen(hold_filename)+2);
-	new_token(hold_line, strlen(hold_line)+2);
-	new_token("\n", 3);
+	insert_file_header(hold_filename, hold_number);

 	/* resume reading old file */
 	input = hold_input;
@ -500,10 +397,11 @@ int include_file(int ch)

 struct token_list* read_all_tokens(FILE* a, struct token_list* current, char* filename)
 {
+	token = current;
+	insert_file_header(filename, 1);
 	input  = a;
 	line = 1;
 	file = filename;
-	token = current;
 	int ch = grab_byte();
 	while(EOF != ch)
 	{
--- a/test/test0000/proof.answer
+++ b/test/test0000/proof.answer
@ -1 +1 @@
-e98ce4032c30d918c1fc4f7132567af790692e1a87123c5843f0001fc24447ad  test/test0000/tmp/return.c
+d5796491afca11ed64a81fe75048a409e0cf642eb20d8d2558fceda7cca86e20  test/test0000/tmp/return.c
--- a/test/test0001/proof.answer
+++ b/test/test0001/proof.answer
@ -1 +1 @@
-ba1d093813d679dacfe729dab1bdb8d11df35ff85b7d476ed23738972842157d  test/test0001/tmp/return.c
+b31ee92d2492e7e1de1b8524b3c321780c5abf84ac872859e4361e3d4128bd0d  test/test0001/tmp/return.c