From df451c172872877829914447514b632ccb31fd4f Mon Sep 17 00:00:00 2001
From: Jeremiah Orians <jeremiah@pdp10.guru>
Date: Fri, 10 Aug 2018 17:30:48 -0400
Subject: [PATCH] Code Clean up done during knight platform bootstrap work

---
 CHANGELOG.org             |  1 +
 cc.h                      | 12 +++-----
 cc_core.c                 | 21 +++++++-------
 cc_reader.c               | 18 +++++++++---
 cc_strings.c              | 58 +++++++++++++++++++--------------------
 test/test.answers         |  2 +-
 test/test100/proof.answer |  2 +-
 7 files changed, 60 insertions(+), 54 deletions(-)

diff --git a/CHANGELOG.org b/CHANGELOG.org
index b73e4ac..63a84b5 100644
--- a/CHANGELOG.org
+++ b/CHANGELOG.org
@@ -35,6 +35,7 @@ Reorged logic tree to reduce complexity
 Simplified expression
 Changed EOF detection logic to deal with unsigned bits
 Reduced Memory usage down from 50MB to 2MB
+Simplified Tokenizer logic for easier implementation and understanding
 
 ** Fixed
 Correct bug in how \" is treated
diff --git a/cc.h b/cc.h
index c3c4e30..0ca1134 100644
--- a/cc.h
+++ b/cc.h
@@ -25,8 +25,6 @@
 // CONSTANT FALSE 0
 #define TRUE 1
 // CONSTANT TRUE 1
-#define LF 10
-// CONSTANT LF 10
 
 void file_print(char* s, FILE* f);
 int match(char* a, char* b);
@@ -47,25 +45,23 @@ struct type
 struct token_list
 {
 	struct token_list* next;
-	struct token_list* prev;
 	union
 	{
-		struct token_list* entry;
-		char* s;
+		struct token_list* locals;
+		struct token_list* prev;
 	};
+	char* s;
 	union
 	{
-		struct token_list* frame;
 		struct type* type;
 		char* filename;
 	};
 	union
 	{
 		struct token_list* arguments;
+		int depth;
 		int linenumber;
 	};
-	struct token_list* locals;
-	int depth;
 };
 
 /* What types we have */
diff --git a/cc_core.c b/cc_core.c
index 0d4cb2c..65932df 100644
--- a/cc_core.c
+++ b/cc_core.c
@@ -29,6 +29,7 @@ struct type* current_target;
 char* break_target_head;
 char* break_target_func;
 char* break_target_num;
+struct token_list* break_frame;
 int current_count;
 struct type* last_type;
 
@@ -687,8 +688,8 @@ struct token_list* process_for(struct token_list* out, struct token_list* functi
 	char* nested_break_head = break_target_head;
 	char* nested_break_func = break_target_func;
 	char* nested_break_num = break_target_num;
-	struct token_list* nested_locals = function->frame;
-	function->frame = function->locals;
+	struct token_list* nested_locals = break_frame;
+	break_frame = function->locals;
 	break_target_head = "FOR_END_";
 	break_target_func = function->s;
 	break_target_num = number_string;
@@ -736,7 +737,7 @@ struct token_list* process_for(struct token_list* out, struct token_list* functi
 	break_target_head = nested_break_head;
 	break_target_func = nested_break_func;
 	break_target_num = nested_break_num;
-	function->frame = nested_locals;
+	break_frame = nested_locals;
 	return out;
 }
 
@@ -765,8 +766,8 @@ struct token_list* process_do(struct token_list* out, struct token_list* functio
 	char* nested_break_head = break_target_head;
 	char* nested_break_func = break_target_func;
 	char* nested_break_num = break_target_num;
-	struct token_list* nested_locals = function->frame;
-	function->frame = function->locals;
+	struct token_list* nested_locals = break_frame;
+	break_frame = function->locals;
 	break_target_head = "DO_END_";
 	break_target_func = function->s;
 	break_target_num = number_string;
@@ -788,7 +789,7 @@ struct token_list* process_do(struct token_list* out, struct token_list* functio
 	out = emit(":DO_END_", out);
 	out = uniqueID(function->s, out, number_string);
 
-	function->frame = nested_locals;
+	break_frame = nested_locals;
 	break_target_head = nested_break_head;
 	break_target_func = nested_break_func;
 	break_target_num = nested_break_num;
@@ -805,8 +806,8 @@ struct token_list* process_while(struct token_list* out, struct token_list* func
 	char* nested_break_head = break_target_head;
 	char* nested_break_func = break_target_func;
 	char* nested_break_num = break_target_num;
-	struct token_list* nested_locals = function->frame;
-	function->frame = function->locals;
+	struct token_list* nested_locals = break_frame;
+	break_frame = function->locals;
 
 	break_target_head = "END_WHILE_";
 	break_target_func = function->s;
@@ -832,7 +833,7 @@ struct token_list* process_while(struct token_list* out, struct token_list* func
 	out = emit(":END_WHILE_", out);
 	out = uniqueID(function->s, out, number_string);
 
-	function->frame = nested_locals;
+	break_frame = nested_locals;
 	break_target_head = nested_break_head;
 	break_target_func = nested_break_func;
 	break_target_num = nested_break_num;
@@ -961,7 +962,7 @@ struct token_list* statement(struct token_list* out, struct token_list* function
 			exit(EXIT_FAILURE);
 		}
 		struct token_list* i = function->locals;
-		while(i != function->frame)
+		while(i != break_frame)
 		{
 			if(NULL == i) break;
 			out = emit("POP_ebx\t# break_cleanup_locals\n", out);
diff --git a/cc_reader.c b/cc_reader.c
index 557daf5..f573857 100644
--- a/cc_reader.c
+++ b/cc_reader.c
@@ -66,9 +66,19 @@ void fixup_label()
 	} while(0 != hold);
 }
 
+int in_set(int c, char* s)
+{
+	while(0 != s[0])
+	{
+		if(c == s[0]) return TRUE;
+		s = s + 1;
+	}
+	return FALSE;
+}
+
 int preserve_keyword(int c)
 {
-	while((('a' <= c) & (c <= 'z')) | (('A' <= c) & (c <= 'Z')) | (('0' <= c) & (c <= '9')) | (c == '_'))
+	while(in_set(c, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"))
 	{
 		c = consume_byte(c);
 	}
@@ -82,7 +92,7 @@ int preserve_keyword(int c)
 
 int preserve_symbol(int c)
 {
-	while((c == '<') | (c == '=') | (c == '>') | (c == '|') | (c == '&') | (c == '!') | (c == '-'))
+	while(in_set(c, "<=>|&!-"))
 	{
 		c = consume_byte(c);
 	}
@@ -119,11 +129,11 @@ reset:
 		c = purge_macro(c);
 		goto reset;
 	}
-	else if((('a' <= c) & (c <= 'z')) | (('A' <= c) & (c <= 'Z')) | (('0' <= c) & (c <= '9')) | (c == '_'))
+	else if(in_set(c, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"))
 	{
 		c = preserve_keyword(c);
 	}
-	else if((c == '<') | (c == '=') | (c == '>') | (c == '|') | (c == '&') | (c == '!') | ( c == '-'))
+	else if(in_set(c, "<=>|&!-"))
 	{
 		c = preserve_symbol(c);
 	}
diff --git a/cc_strings.c b/cc_strings.c
index 0c6c94a..280e20b 100644
--- a/cc_strings.c
+++ b/cc_strings.c
@@ -126,27 +126,26 @@ int escape_lookup(char* c)
 /* Deal with human strings */
 char* collect_regular_string(char* string)
 {
-	int j = 0;
+	int i = 0;
 	string_index = 0;
 
-	/* 34 == " */
-	hold_string[0] = 34;
-	while(string[j] != 0)
+	hold_string[0] = '"';
+	while(string[i] != 0)
 	{
-		if((string[j] == '\\') & (string[j + 1] == 'x'))
+		if((string[i] == '\\') & (string[i + 1] == 'x'))
 		{
-			hold_string[string_index] = escape_lookup(string + j);
-			j = j + 4;
+			hold_string[string_index] = escape_lookup(string + i);
+			i = i + 4;
 		}
-		else if(string[j] == '\\')
+		else if(string[i] == '\\')
 		{
-			hold_string[string_index] = escape_lookup(string + j);
-			j = j + 2;
+			hold_string[string_index] = escape_lookup(string + i);
+			i = i + 2;
 		}
 		else
 		{
-			hold_string[string_index] = string[j];
-			j = j + 1;
+			hold_string[string_index] = string[i];
+			i = i + 1;
 		}
 
 		string_index = string_index + 1;
@@ -155,43 +154,42 @@ char* collect_regular_string(char* string)
 	char* message = calloc(string_index + 3, sizeof(char));
 	copy_string(message, hold_string);
 	reset_hold_string();
-	message[string_index] = 34;
-	message[string_index + 1] = LF;
+	message[string_index] = '"';
+	message[string_index + 1] = '\n';
 	return message;
 }
 
 /* Deal with non-human strings */
 char* collect_weird_string(char* string)
 {
-	int j = 1;
+	int i = 1;
 	string_index = 1;
 	int temp;
 	char* table = "0123456789ABCDEF";
 
-	/* 39 == ' */
-	hold_string[0] = 39;
-	while(string[j] != 0)
+	hold_string[0] = '\'';
+	while(string[i] != 0)
 	{
 		hold_string[string_index] = ' ';
 
-		if((string[j] == '\\') & (string[j + 1] == 'x'))
+		if((string[i] == '\\') & (string[i + 1] == 'x'))
 		{
-			hold_string[string_index + 1] = upcase(string[j + 2]);
-			hold_string[string_index + 2] = upcase(string[j + 3]);
-			j = j + 4;
+			hold_string[string_index + 1] = upcase(string[i + 2]);
+			hold_string[string_index + 2] = upcase(string[i + 3]);
+			i = i + 4;
 		}
-		else if(string[j] == '\\')
+		else if(string[i] == '\\')
 		{
-			temp = escape_lookup(string + j);
+			temp = escape_lookup(string + i);
 			hold_string[string_index + 1] = table[(temp >> 4)];
 			hold_string[string_index + 2] = table[(temp & 15)];
-			j = j + 2;
+			i = i + 2;
 		}
 		else
 		{
-			hold_string[string_index + 1] = table[(string[j] >> 4)];
-			hold_string[string_index + 2] = table[(string[j] & 15)];
-			j = j + 1;
+			hold_string[string_index + 1] = table[(string[i] >> 4)];
+			hold_string[string_index + 2] = table[(string[i] & 15)];
+			i = i + 1;
 		}
 
 		string_index = string_index + 3;
@@ -203,8 +201,8 @@ char* collect_weird_string(char* string)
 	hold[string_index] = ' ';
 	hold[string_index + 1] = '0';
 	hold[string_index + 2] = '0';
-	hold[string_index + 3] = 39;
-	hold[string_index + 4] = LF;
+	hold[string_index + 3] = '\'';
+	hold[string_index + 4] = '\n';
 	return hold;
 }
 
diff --git a/test/test.answers b/test/test.answers
index 9b2c38b..803f313 100644
--- a/test/test.answers
+++ b/test/test.answers
@@ -9,7 +9,7 @@ b45fae655b7f848b28ebdb8eb2e30ae789fbcf7920bc315395d53986bb1adae4  test/results/t
 d511db73158a9544a5b5f828a79751e3de8a04b81c143fd0c146fc22c938aa9f  test/results/test08-binary
 6831ba0c4e01cea5fb524d811e75542875512fb417baa03d2515278d5b0ee6a5  test/results/test09-binary
 ef179cd359ba1d61d45089e314cd4ac2069c8dc4dd7494d7c766344ea3c8cf88  test/results/test10-binary
-c526b47458dd06a47fee8d83533d71f48c51867b1c358e43f69da970f7d06338  test/results/test100-binary
+9d1ccb77471d95d25dc55782d386f64a2e1a777b1f46e9bb09dc4c7e23f31f55  test/results/test100-binary
 5aaf399fe706d4a8c85c121c75ada29a65c293b57c98e8999961a2ef0bab0d62  test/results/test11-binary
 4f8111e73e07255ae203963438c82ea8bcff7474e1594b52b426c58a03cb30eb  test/results/test12-binary
 dd74dabfdce8657ff440c1eef531cbf67a64854f2020d4d6bcb65c9cc2d199cb  test/results/test13-binary
diff --git a/test/test100/proof.answer b/test/test100/proof.answer
index b32d9fb..a46f8d9 100644
--- a/test/test100/proof.answer
+++ b/test/test100/proof.answer
@@ -1 +1 @@
-5f65ea0157c4526f8d6f5887a045c6fcbb809787feb45ee3317699de3e480012  test/test100/proof
+b20ff7318f559391ba3c6c67bb992d9c2dd42148e4b857c4a7ee835b2f83fa4b  test/test100/proof