diff --git a/CHANGELOG.org b/CHANGELOG.org index 63a84b5..c18eea7 100644 --- a/CHANGELOG.org +++ b/CHANGELOG.org @@ -23,6 +23,7 @@ Added test24 - get_machine Added General_Recursion to reduce complexity Added uniqueID to purge all code segments that recreate it's functionality Added struct Char arrays for structs +Added in_set to make complex conditional logic into a simple char search ** Changed Made String naming 100% deterministic @@ -36,6 +37,10 @@ Simplified expression Changed EOF detection logic to deal with unsigned bits Reduced Memory usage down from 50MB to 2MB Simplified Tokenizer logic for easier implementation and understanding +Simplified program logic +Significantly reduced weird string detection logic +Trimmed escape_lookup down to a key core +Eliminated Several Minor unneeded steps ** Fixed Correct bug in how \" is treated diff --git a/cc.h b/cc.h index 0ca1134..c419a52 100644 --- a/cc.h +++ b/cc.h @@ -30,6 +30,7 @@ void file_print(char* s, FILE* f); int match(char* a, char* b); char* copy_string(char* target, char* source); void reset_hold_string(); +int in_set(int c, char* s); struct type { diff --git a/cc_core.c b/cc_core.c index 65932df..7aaef88 100644 --- a/cc_core.c +++ b/cc_core.c @@ -1089,68 +1089,52 @@ struct token_list* declare_function(struct token_list* out) struct token_list* program(struct token_list* out) { struct type* type_size; - while(NULL != global_token) - { + new_type: - if(match("CONSTANT", global_token->s)) + if (NULL == global_token)return out; + if(match("CONSTANT", global_token->s)) + { + global_constant_list = sym_declare(global_token->next->s, NULL, global_constant_list); + global_constant_list->arguments = global_token->next->next; + global_token = global_token->next->next->next; + } + else + { + type_size = type_name(); + if(NULL == type_size) { - global_constant_list = sym_declare(global_token->next->s, NULL, global_constant_list); - global_constant_list->arguments = global_token->next->next; - global_token = global_token->next->next->next; + goto new_type; } - else + /* Add to global symbol table */ + global_symbol_list = sym_declare(global_token->s, type_size, global_symbol_list); + global_token = global_token->next; + if(match(";", global_token->s)) { - type_size = type_name(); - if(NULL == type_size) - { - goto new_type; - } + /* Ensure 4 bytes are allocated for the global */ + globals_list = emit(":GLOBAL_", globals_list); + globals_list = emit(global_token->prev->s, globals_list); + globals_list = emit("\nNOP\n", globals_list); + global_token = global_token->next; - if(global_token->s[0] == ';') - { - /* Add to global symbol table */ - global_symbol_list = sym_declare(global_token->prev->s, type_size, global_symbol_list); - - /* Ensure 4 bytes are allocated for the global */ - globals_list = emit(":GLOBAL_", globals_list); - globals_list = emit(global_token->prev->s, globals_list); - globals_list = emit("\nNOP\n", globals_list); - - global_token = global_token->next; - } - else if(match("=",global_token->s)) - { - /* Add to global symbol table */ - global_symbol_list = sym_declare(global_token->prev->s, type_size, global_symbol_list); - - /* Store the global's value*/ - globals_list = emit(":GLOBAL_", globals_list); - globals_list = emit(global_token->prev->s, globals_list); + } + else if(match("(", global_token->s)) out = declare_function(out); + else if(match("=",global_token->s)) + { + /* Store the global's value*/ + globals_list = emit(":GLOBAL_", globals_list); + globals_list = emit(global_token->prev->s, globals_list); + globals_list = emit("\n", globals_list); + global_token = global_token->next; + if(in_set(global_token->s[0], "0123456789")) + { /* Assume Int */ + globals_list = emit("%", globals_list); + globals_list = emit(global_token->s, globals_list); globals_list = emit("\n", globals_list); - global_token = global_token->next; - if(('0' <= global_token->s[0]) & (global_token->s[0] <= '9')) - { /* Assume Int */ - globals_list = emit("%", globals_list); - globals_list = emit(global_token->s, globals_list); - globals_list = emit("\n", globals_list); - } - else if(('"' == global_token->s[0])) - { /* Assume a string*/ - globals_list = emit(parse_string(global_token->s), globals_list); - } - else - { - file_print("Recieved ", stderr); - file_print(global_token->s, stderr); - file_print(" in program\n", stderr); - line_error(); - exit(EXIT_FAILURE); - } - - global_token = global_token->next; - require_match("ERROR in Program\nMissing ;\n", ";"); } - else if(global_token->s[0] == '(') out = declare_function(out); + else if(('"' == global_token->s[0])) + { /* Assume a string*/ + globals_list = emit(parse_string(global_token->s), globals_list); + } else { file_print("Recieved ", stderr); @@ -1159,9 +1143,20 @@ new_type: line_error(); exit(EXIT_FAILURE); } + + global_token = global_token->next; + require_match("ERROR in Program\nMissing ;\n", ";"); + } + else + { + file_print("Recieved ", stderr); + file_print(global_token->s, stderr); + file_print(" in program\n", stderr); + line_error(); + exit(EXIT_FAILURE); } } - return out; + goto new_type; } void recursive_output(struct token_list* i, FILE* out) diff --git a/cc_strings.c b/cc_strings.c index 280e20b..bee4eff 100644 --- a/cc_strings.c +++ b/cc_strings.c @@ -23,7 +23,7 @@ int char2hex(int c); char upcase(char a) { - if((97 <= a) && (122 >= a)) + if(in_set(a, "abcdefghijklmnopqrstuvwxyz")) { a = a - 32; } @@ -48,74 +48,44 @@ int hexify(int c, int high) return i; } +int escape_lookup(char* c); int weird(char* string) { - if(0 == string[0]) return FALSE; - if('\\' == string[0]) + int c; + string = string + 1; +weird_reset: + c = string[0]; + if(0 == c) return FALSE; + if('\\' == c) { - if('x' == string[1]) - { - if('0' == string[2]) return TRUE; - else if('1' == string[2]) return TRUE; - else if('2' == string[2]) - { - if('2' == string[3]) return TRUE; - else return weird(string+3); - } - else if('3' == string[2]) - { - if('A' == string[3]) return TRUE; - else return weird(string+3); - } - else if('8' == string[2]) return TRUE; - else if('9' == string[2]) return TRUE; - else if('a' == string[2]) return TRUE; - else if('A' == string[2]) return TRUE; - else if('b' == string[2]) return TRUE; - else if('B' == string[2]) return TRUE; - else if('c' == string[2]) return TRUE; - else if('C' == string[2]) return TRUE; - else if('d' == string[2]) return TRUE; - else if('D' == string[2]) return TRUE; - else if('e' == string[2]) return TRUE; - else if('E' == string[2]) return TRUE; - else if('f' == string[2]) return TRUE; - else if('F' == string[2]) return TRUE; - else return weird(string+3); - } - else if('n' == string[1]) - { - if(':' == string[2]) return TRUE; - return weird(string+2); - } - else if('t' == string[1]) - { - return weird(string+2); - } - else if('"' == string[1]) return TRUE; - else - { - return weird(string+3); - } + c = escape_lookup(string); + if('x' == string[1]) string = string + 2; + string = string + 1; } - return weird(string+1); + + if(!in_set(c, "\t\n !#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~")) return TRUE; + if(in_set(c, " \t\n\r") && (':' == string[1])) return TRUE; + string = string + 1; + goto weird_reset; } /* Lookup escape values */ int escape_lookup(char* c) { - if((c[0] == '\\') & (c[1] == 'x')) + if('\\' != c[0]) return c[0]; + + if(c[1] == 'x') { int t1 = hexify(c[2], TRUE); int t2 = hexify(c[3], FALSE); return t1 + t2; } - else if((c[0] == '\\') & (c[1] == 'n')) return 10; - else if((c[0] == '\\') & (c[1] == 't')) return 9; - else if((c[0] == '\\') & (c[1] == '\\')) return 92; - else if((c[0] == '\\') & (c[1] == '\'')) return 39; - else if((c[0] == '\\') & (c[1] == '"')) return 34; - else if((c[0] == '\\') & (c[1] == 'r')) return 13; + else if(c[1] == 'n') return 10; + else if(c[1] == 't') return 9; + else if(c[1] == '\\') return 92; + else if(c[1] == '\'') return 39; + else if(c[1] == '"') return 34; + else if(c[1] == 'r') return 13; file_print("Unknown escape recieved: ", stderr); file_print(c, stderr); @@ -126,36 +96,29 @@ int escape_lookup(char* c) /* Deal with human strings */ char* collect_regular_string(char* string) { - int i = 0; string_index = 0; - hold_string[0] = '"'; - while(string[i] != 0) +collect_regular_string_reset: + if(string[0] == '\\') { - if((string[i] == '\\') & (string[i + 1] == 'x')) - { - hold_string[string_index] = escape_lookup(string + i); - i = i + 4; - } - else if(string[i] == '\\') - { - hold_string[string_index] = escape_lookup(string + i); - i = i + 2; - } - else - { - hold_string[string_index] = string[i]; - i = i + 1; - } - - string_index = string_index + 1; + hold_string[string_index] = escape_lookup(string); + if (string[1] == 'x') string = string + 2; + string = string + 2; + } + else + { + hold_string[string_index] = string[0]; + string = string + 1; } + string_index = string_index + 1; + if(string[0] != 0) goto collect_regular_string_reset; + + hold_string[string_index] = '"'; + hold_string[string_index + 1] = '\n'; char* message = calloc(string_index + 3, sizeof(char)); copy_string(message, hold_string); reset_hold_string(); - message[string_index] = '"'; - message[string_index + 1] = '\n'; return message; } @@ -168,41 +131,31 @@ char* collect_weird_string(char* string) char* table = "0123456789ABCDEF"; hold_string[0] = '\''; - while(string[i] != 0) +collect_weird_string: + hold_string[string_index] = ' '; + temp = escape_lookup(string + i); + hold_string[string_index + 1] = table[(temp >> 4)]; + hold_string[string_index + 2] = table[(temp & 15)]; + + if(string[i] == '\\') { - hold_string[string_index] = ' '; - - if((string[i] == '\\') & (string[i + 1] == 'x')) - { - hold_string[string_index + 1] = upcase(string[i + 2]); - hold_string[string_index + 2] = upcase(string[i + 3]); - i = i + 4; - } - else if(string[i] == '\\') - { - temp = escape_lookup(string + i); - hold_string[string_index + 1] = table[(temp >> 4)]; - hold_string[string_index + 2] = table[(temp & 15)]; - i = i + 2; - } - else - { - hold_string[string_index + 1] = table[(string[i] >> 4)]; - hold_string[string_index + 2] = table[(string[i] & 15)]; - i = i + 1; - } - - string_index = string_index + 3; + if(string[i + 1] == 'x') i = i + 2; + i = i + 1; } + i = i + 1; + + string_index = string_index + 3; + if(string[i] != 0) goto collect_weird_string; + + hold_string[string_index] = ' '; + hold_string[string_index + 1] = '0'; + hold_string[string_index + 2] = '0'; + hold_string[string_index + 3] = '\''; + hold_string[string_index + 4] = '\n'; char* hold = calloc(string_index + 6, sizeof(char)); copy_string(hold, hold_string); reset_hold_string(); - hold[string_index] = ' '; - hold[string_index + 1] = '0'; - hold[string_index + 2] = '0'; - hold[string_index + 3] = '\''; - hold[string_index + 4] = '\n'; return hold; } @@ -210,6 +163,6 @@ char* collect_weird_string(char* string) char* parse_string(char* string) { /* the string */ - if((weird(string)) || ':' == string[1]) return collect_weird_string(string); + if(weird(string)) return collect_weird_string(string); else return collect_regular_string(string); } diff --git a/cc_types.c b/cc_types.c index c83fb82..13cd172 100644 --- a/cc_types.c +++ b/cc_types.c @@ -78,7 +78,7 @@ void initialize_types() /* FUNCTION* has the same properties as FUNCTION */ f->indirect = f; - /* Define FUNCTION */ + /* Define UNSIGNED */ struct type* g = calloc(1, sizeof(struct type)); g->name = "unsigned"; g->size = 4; @@ -90,7 +90,7 @@ void initialize_types() f->next = g; e->next = f; d->next = e; - c->next = d; + c->next = e; a->next = c; global_types->next = a; } @@ -160,6 +160,7 @@ struct type* build_union(struct type* last, int offset) void create_struct() { int offset = 0; + member_size = 0; struct type* head = calloc(1, sizeof(struct type)); struct type* i = calloc(1, sizeof(struct type)); head->name = global_token->s; @@ -191,7 +192,7 @@ void create_struct() head->size = offset; head->members = last; - head->indirect->members = last; + i->members = last; } @@ -205,11 +206,10 @@ void create_struct() */ struct type* type_name() { - int structure = FALSE; + int structure = match("struct", global_token->s); - if(match("struct", global_token->s)) + if(structure) { - structure = TRUE; global_token = global_token->next; } diff --git a/test/test.answers b/test/test.answers index 803f313..407c9dd 100644 --- a/test/test.answers +++ b/test/test.answers @@ -7,9 +7,9 @@ c921f545c7baebe05e1ce60c777d6d7782ba626fdf7520f576d3ee1b849a8bbf test/results/test06-binary b45fae655b7f848b28ebdb8eb2e30ae789fbcf7920bc315395d53986bb1adae4 test/results/test07-binary d511db73158a9544a5b5f828a79751e3de8a04b81c143fd0c146fc22c938aa9f test/results/test08-binary -6831ba0c4e01cea5fb524d811e75542875512fb417baa03d2515278d5b0ee6a5 test/results/test09-binary +907e1808f2e2b15ac72ebf13898b15c678e68ebd43d673dcd0f408d907e7962f test/results/test09-binary ef179cd359ba1d61d45089e314cd4ac2069c8dc4dd7494d7c766344ea3c8cf88 test/results/test10-binary -9d1ccb77471d95d25dc55782d386f64a2e1a777b1f46e9bb09dc4c7e23f31f55 test/results/test100-binary +f6b6383edcec936cb5e94f5311995750d4f55b8c0f37e57c97cb45e8a21a6a59 test/results/test100-binary 5aaf399fe706d4a8c85c121c75ada29a65c293b57c98e8999961a2ef0bab0d62 test/results/test11-binary 4f8111e73e07255ae203963438c82ea8bcff7474e1594b52b426c58a03cb30eb test/results/test12-binary dd74dabfdce8657ff440c1eef531cbf67a64854f2020d4d6bcb65c9cc2d199cb test/results/test13-binary @@ -18,9 +18,9 @@ e216869c3fb06de7a41578517c797169e219b20a5697a822ba11eeef0d04f181 test/results/t 315ae5cc5c9d5bdcae0eddd55371128e53e3e9267a2a7c53832ed0af51693bea test/results/test16-binary fdce9856f885418a7b2f69fc24a6cc0c85922313b49694d8030c544e4b2ad16f test/results/test17-binary 9a426972b6df90a158aebe3b8f3eb9ef8a63ce317d764afb92be4fce16542743 test/results/test18-binary -33b528e79793f7dc89490c386d96c9be82501a01ed795bb991b6404a472df4c8 test/results/test19-binary +8459b95fb7232f6c0d9c2af94be061d7a14f39659767486af51d5b76e818824e test/results/test19-binary 48d845d20fff86183047342641cc8a6174e71c0ca004be882f0195a141bd64ea test/results/test20-binary -fd7bd7f28151e503d475ec59608ab3c7d0853e237255cf0a1e418694ff061d25 test/results/test21-binary +65354ea5f760e42ea054785033a6519e0eee30d21b1b69ac7715ef958c5e0e2f test/results/test21-binary dffc0dbe1d99fd156ab406d0b71ce48e6d91072f75b8103a208510293e37735a test/results/test22-binary c745adaa7c5ba2230877fc8d2137a87d25597212069660813460cb6d764de2a0 test/results/test23-binary 50215e4b4e2ce22a959ea7bcfc77c4d6ac45464455f5103afcaea0e84f9bf1d1 test/results/test24-binary diff --git a/test/test100/proof.answer b/test/test100/proof.answer index a46f8d9..43d45e0 100644 --- a/test/test100/proof.answer +++ b/test/test100/proof.answer @@ -1 +1 @@ -b20ff7318f559391ba3c6c67bb992d9c2dd42148e4b857c4a7ee835b2f83fa4b test/test100/proof +3b81051a91a25db55946dfc322a1531d9b862f8c7d72a1b2fb6d122d4b8ba92e test/test100/proof