diff --git a/cc-minimal.c b/cc-minimal.c new file mode 100644 index 0000000..62ec871 --- /dev/null +++ b/cc-minimal.c @@ -0,0 +1,48 @@ +/* Copyright (C) 2016 Jeremiah Orians + * This file is part of stage0. + * + * stage0 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * stage0 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with stage0. If not, see . + */ + +#include "cc.h" +/* The core functions */ +void initialize_types(); +struct token_list* read_all_tokens(FILE* a, struct token_list* current); +struct token_list* reverse_list(struct token_list* head); +struct token_list* program(struct token_list* out); +void recursive_output(FILE* out, struct token_list* i); + +/* Our essential organizer */ +int main(int argc, char **argv) +{ + if (argc < 3) + { + fprintf(stderr, "We require more arguments\n"); + exit(EXIT_FAILURE); + } + + initialize_types(); + FILE* input = fopen(argv[1], "r"); + global_token = reverse_list(read_all_tokens(input, NULL)); + struct token_list* output_list = program(NULL); + FILE* output = fopen(argv[2], "w"); + fprintf(output, "\n# Core program\n\n"); + recursive_output(output, output_list); + fprintf(output, "\n# Program global variables\n\n"); + recursive_output(output, globals_list); + fprintf(output, "\n# Program strings\n\n"); + recursive_output(output, strings_list); + fclose(output); + return 0; +} diff --git a/cc.c b/cc.c index ef932c2..f583962 100644 --- a/cc.c +++ b/cc.c @@ -16,1054 +16,107 @@ */ #include "cc.h" -#include +#include + +/* The core functions */ +void initialize_types(); +struct token_list* read_all_tokens(FILE* a, struct token_list* current); +struct token_list* reverse_list(struct token_list* head); +struct token_list* program(struct token_list* out); +void recursive_output(FILE* out, struct token_list* i); + +#if !__MESC__ +static +#endif +struct option long_options[] = { + {"file", required_argument, 0, 'f'}, + {"output", required_argument, 0, 'o'}, + {"help", no_argument, 0, 'h'}, + {"version", no_argument, 0, 'V'}, + {0, 0, 0, 0} +}; -/* Global lists */ -struct type* global_types; -struct token_list* global_symbol_list; -struct token_list* global_function_list; -struct token_list* global_constant_list; - -/* What we are currently working on */ -struct token_list* global_token; -struct token_list* current_target; - -/* Output reorder collections*/ -struct token_list* strings_list; -struct token_list* globals_list; - -/* Imported functions */ -struct token_list* read_all_tokens(char* source_file); -char* parse_string(char* string); - -struct token_list* emit(char *s, struct token_list* head) -{ - struct token_list* t = calloc(1, sizeof(struct token_list)); - t->next = head; - t->s = s; - return t; -} - -struct token_list* double_emit(char* a, char* b, struct token_list* out, int flag) -{ - out = emit(a, out); - out = emit(b, out); - if(flag) out = emit("\n", out); - return out; -} - -char* numerate_number(int a) -{ - char* result = calloc(16, sizeof(char)); - int i = 0; - - /* Deal with Zero case */ - if(0 == a) - { - result[0] = '0'; - result[1] = '\n'; - return result; - } - - /* Deal with negatives */ - if(0 > a) - { - result[0] = '-'; - i = 1; - a = a * -1; - } - - /* Using the largest 10^n number possible in 32bits */ - int divisor = 0x3B9ACA00; - /* Skip leading Zeros */ - while(0 == (a / divisor)) divisor = divisor / 10; - - /* Now simply collect numbers until divisor is gone */ - while(0 < divisor) - { - result[i] = ((a / divisor) + 48); - a = a % divisor; - divisor = divisor / 10; - i = i + 1; - } - - result[i] = '\n'; - return result; -} - -struct token_list* sym_declare(char *s, struct type* t, struct token_list* list) -{ - struct token_list* a = calloc(1, sizeof(struct token_list)); - a->next = list; - a->s = s; - a->type = t; - return a; -} - -struct token_list* sym_lookup(char *s, struct token_list* symbol_list) -{ - for(struct token_list* i = symbol_list; NULL != i; i = i->next) - { - if(0 == strcmp(s,i->s)) return i; - } - return NULL; -} - -int stack_index(struct token_list* a, struct token_list* function) -{ - int depth = 4 * function->temps; - for(struct token_list* i = function->locals; NULL != i; i = i->next) - { - if(i == a) return depth; - else depth = depth + 4; - } - - /* Deal with offset caused by return pointer */ - depth = depth+ 4; - - for(struct token_list* i = function->arguments; NULL != i; i = i->next) - { - if(i == a) return depth; - else depth = depth + 4; - } - - fprintf(stderr, "%s does not exist in function %s\n", a->s, function->s); - exit(EXIT_FAILURE); -} - -struct token_list* sym_get_value(char *s, struct token_list* out, struct token_list* function) -{ - global_token = global_token->next; - struct token_list* a = sym_lookup(s, global_constant_list); - if(NULL != a) - { - out = double_emit("LOAD_IMMEDIATE_eax %", a->arguments->s, out, true); return out; - } - - a= sym_lookup(s, global_function_list); - if(NULL != a) - { - return out; - } - - a= sym_lookup(s, function->locals); - if(NULL != a) - { - current_target = a; - out = double_emit("LOAD_EFFECTIVE_ADDRESS %", numerate_number(stack_index(a, function)), out, false); - if(strcmp(global_token->s, "=")) out = emit("LOAD_INTEGER\n", out); - return out; - } - a = sym_lookup(s, function->arguments); - - if(NULL != a) - { - current_target = a; - out = double_emit("LOAD_EFFECTIVE_ADDRESS %", numerate_number(stack_index(a, function)), out, false); - if(strcmp(global_token->s, "=")) out = emit("LOAD_INTEGER\n", out); - return out; - } - - a = sym_lookup(s, global_symbol_list); - if(NULL != a) - { - current_target = a; - out = double_emit("LOAD_IMMEDIATE_eax &GLOBAL_", s, out, true); - if(strcmp(global_token->s, "=")) out = emit("LOAD_INTEGER\n", out); - return out; - } - - fprintf(stderr, "%s is not a defined symbol\n", s); - exit(EXIT_FAILURE); -} - -void require_char(char* message, char required) -{ - if(global_token->s[0] != required) - { - fprintf(stderr, "%s", message); - exit(EXIT_FAILURE); - } - global_token = global_token->next; -} - -struct token_list* expression(struct token_list* out, struct token_list* function); - -/* - * primary-expr: - * identifier - * constant - * ( expression ) - */ -struct token_list* primary_expr(struct token_list* out, struct token_list* function) -{ - if(('0' <= global_token->s[0]) & (global_token->s[0] <= '9')) - { - out = double_emit("LOAD_IMMEDIATE_eax %", global_token->s, out, true); - global_token = global_token->next; - } - else if((('a' <= global_token->s[0]) & (global_token->s[0] <= 'z')) | (('A' <= global_token->s[0]) & (global_token->s[0] <= 'Z'))) - { - out = sym_get_value(global_token->s, out, function); - } - else if(global_token->s[0] == '(') - { - global_token = global_token->next; - out = expression(out, function); - require_char("Error in Primary expression\nDidn't get )\n", ')'); - } - else if(global_token->s[0] == '\'') - { - out = emit("LOAD_IMMEDIATE_eax %", out); - out = emit(numerate_number(global_token->s[1]), out); - global_token = global_token->next; - } - else if(global_token->s[0] == '"') - { - static int string_num; - char* number_string = numerate_number(string_num); - out = emit("LOAD_IMMEDIATE_eax &STRING_", out); - out = emit(number_string, out); - - /* The target */ - strings_list = emit(":STRING_", strings_list); - strings_list = emit(number_string, strings_list); - - /* Parse the string */ - strings_list = emit(parse_string(global_token->s), strings_list); - global_token = global_token->next; - - string_num = string_num + 1; - } - else - { - fprintf(stderr, "Recieved %s in primary_expr\n", global_token->s); - exit(EXIT_FAILURE); - } - - return out; -} - -/* Deal with Expression lists */ -struct token_list* process_expression_list(struct token_list* out, struct token_list* function) -{ - char* func = global_token->prev->s; - global_token = global_token->next; - int temp = function->temps; - - if(global_token->s[0] != ')') - { - out = expression(out, function); - out = emit("PUSH_eax\t#_process_expression1\n", out); - function->temps = function->temps + 1; - - while(global_token->s[0] == ',') - { - global_token = global_token->next; - out = expression(out, function); - out = emit("PUSH_eax\t#_process_expression2\n", out); - function->temps = function->temps + 1; - } - require_char("ERROR in process_expression_list\nNo ) was found\n", ')'); - } - else global_token = global_token->next; - - out = double_emit("CALL_IMMEDIATE %FUNCTION_", func, out, true); - - for(int i = function->temps - temp; 0 != i; i = i - 1) - { - out = emit("POP_ebx\t# _process_expression_locals\n", out); - } - - function->temps = temp; - return out; -} - -struct token_list* common_recursion(struct token_list* (*function) (struct token_list*, struct token_list*), struct token_list* out, struct token_list* func) -{ - global_token = global_token->next; - out = emit("PUSH_eax\t#_common_recursion\n", out); - func->temps = func->temps + 1; - out = function(out, func); - func->temps = func->temps - 1; - out = emit("POP_ebx\t# _common_recursion\n", out); - return out; -} - -int ceil_log2(int a) -{ - int result = 0; - if((a & (a - 1)) == 0) - { - result = -1; - } - - while(a > 0) - { - result = result + 1; - a = a >> 1; - } - - return result; -} - -/* - * postfix-expr: - * primary-expr - * postfix-expr [ expression ] - * postfix-expr ( expression-list-opt ) - * postfix-expr -> member - */ -struct token_list* postfix_expr(struct token_list* out, struct token_list* function) -{ - out = primary_expr(out, function); - - while(1) - { - if(global_token->s[0] == '[') - { - struct token_list* target = current_target; - struct type* a = current_target->type; - out = common_recursion(expression, out, function); - - /* Add support for Ints */ - if( 1 != a->indirect->size) - { - out = double_emit("SAL_eax_Immediate8 !", numerate_number(ceil_log2(a->indirect->size)), out, false); - } - - out = emit("ADD_ebx_to_eax\n", out); - current_target = target; - - if(strcmp(global_token->next->s, "=")) - { - if( 4 == a->indirect->size) - { - out = emit("LOAD_INTEGER\n", out); - } - else - { - out = emit("LOAD_BYTE\n", out); - } - } - require_char("ERROR in postfix_expr\nMissing ]\n", ']'); - } - else if(global_token->s[0] == '(') - { - out = process_expression_list(out, function); - } - else if(!strcmp("->", global_token->s)) - { - out = emit("# looking up offset\n", out); - global_token = global_token->next; - struct type* i; - for(i = current_target->type->members; NULL != i; i = i->members) - { - if(!strcmp(i->name, global_token->s)) break; - } - if(NULL == i) - { - fprintf(stderr, "ERROR in postfix_expr %s->%s does not exist\n", current_target->type->name, global_token->s); - exit(EXIT_FAILURE); - } - if(0 != i->offset) - { - out = emit("# -> offset calculation\n", out); - out = double_emit("LOAD_IMMEDIATE_ebx %", numerate_number(i->offset), out, false); - out = emit("ADD_ebx_to_eax\n", out); - } - if(strcmp(global_token->next->s, "=")) - { - out = emit("LOAD_INTEGER\n", out); - } - global_token = global_token->next; - } - else return out; - } -} - -/* - * additive-expr: - * postfix-expr - * additive-expr + postfix-expr - * additive-expr - postfix-expr - */ -struct token_list* additive_expr(struct token_list* out, struct token_list* function) -{ - out = postfix_expr(out, function); - - while(1) - { - if(global_token->s[0] == '+') - { - out = common_recursion(postfix_expr, out, function); - out = emit("ADD_ebx_to_eax\n", out); - } - else if(global_token->s[0] == '-') - { - out = common_recursion(postfix_expr, out, function); - out = emit("SUBTRACT_eax_from_ebx_into_ebx\nMOVE_ebx_to_eax\n", out); - } - else return out; - } -} - -/* - * shift-expr: - * additive-expr - * shift-expr << additive-expr - * shift-expr >> additive-expr - */ -struct token_list* shift_expr(struct token_list* out, struct token_list* function) -{ - out = additive_expr(out, function); - - while(1) - { - if(!strcmp(global_token->s, "<<")) - { - out = common_recursion(additive_expr, out, function); - // Ugly hack to Work around flaw in x86 - struct token_list* old = out->next; - free(out); - out = emit("COPY_eax_to_ecx\nPOP_eax\nSAL_eax_cl\n", old); - } - else if(!strcmp(global_token->s, ">>")) - { - out = common_recursion(additive_expr, out, function); - // Ugly hack to Work around flaw in x86 - struct token_list* old = out->next; - free(out); - out = emit("COPY_eax_to_ecx\nPOP_eax\nSAR_eax_cl\n", old); - } - else - { - return out; - } - } -} - -/* - * relational-expr: - * shift-expr - * relational-expr < shift-expr - * relational-expr <= shift-expr - * relational-expr >= shift-expr - * relational-expr > shift-expr - */ -struct token_list* relational_expr(struct token_list* out, struct token_list* function) -{ - out = shift_expr(out, function); - - while(1) - { - if(!strcmp(global_token->s, "<")) - { - out = common_recursion(shift_expr, out, function); - out = emit("CMP\nSETL\nMOVEZBL\n", out); - } - else if(!strcmp(global_token->s, "<=")) - { - out = common_recursion(shift_expr, out, function); - out = emit("CMP\nSETLE\nMOVEZBL\n", out); - } - else if(!strcmp(global_token->s, ">=")) - { - out = common_recursion(shift_expr, out, function); - out = emit("CMP\nSETGE\nMOVEZBL\n", out); - } - else if(!strcmp(global_token->s, ">")) - { - out = common_recursion(shift_expr, out, function); - out = emit("CMP\nSETG\nMOVEZBL\n", out); - } - else return out; - } -} - -/* - * equality-expr: - * relational-expr - * equality-expr == relational-expr - * equality-expr != relational-expr - */ -struct token_list* equality_expr(struct token_list* out, struct token_list* function) -{ - out = relational_expr(out, function); - - while(1) - { - if(!strcmp(global_token->s, "==")) - { - out = common_recursion(relational_expr, out, function); - out = emit("CMP\nSETE\nMOVEZBL\n", out); - } - else if(!strcmp(global_token->s, "!=")) - { - out = common_recursion(relational_expr, out, function); - out = emit("CMP\nSETNE\nMOVEZBL\n", out); - } - else return out; - } -} - -/* - * bitwise-and-expr: - * equality-expr - * bitwise-and-expr & equality-expr - */ -struct token_list* bitwise_and_expr(struct token_list* out, struct token_list* function) -{ - out = equality_expr(out, function); - - while(global_token->s[0] == '&') - { - out = common_recursion(equality_expr, out, function); - out = emit("AND_eax_ebx\n", out); - } - return out; -} - -/* - * bitwise-or-expr: - * bitwise-and-expr - * bitwise-and-expr | bitwise-or-expr - */ -struct token_list* bitwise_or_expr(struct token_list* out, struct token_list* function) -{ - out = bitwise_and_expr(out, function); - - while(global_token->s[0] == '|') - { - out = common_recursion(bitwise_and_expr, out, function); - out = emit("OR_eax_ebx\n", out); - } - return out; -} - -/* - * expression: - * bitwise-or-expr - * bitwise-or-expr = expression - */ -struct token_list* expression(struct token_list* out, struct token_list* function) -{ - out = bitwise_or_expr(out, function); - - if(global_token->s[0] == '=') - { - struct token_list* target = current_target; - bool member = !strcmp(global_token->prev->s, "]"); - out = common_recursion(expression, out, function); - - if(member) - { - if(1 == target->type->indirect->size) out = emit("STORE_CHAR\n", out); - else if(4 == target->type->indirect->size) - { - out = emit("STORE_INTEGER\n", out); - } - } - else - { - out = emit("STORE_INTEGER\n", out); - } - } - return out; -} - -struct type* lookup_type(char* s) -{ - for(struct type* i = global_types; NULL != i; i = i->next) - { - if(!strcmp(i->name, s)) - { - return i; - } - } - return NULL; -} - -struct type* type_name(); -void create_struct() -{ - int offset = 0; - struct type* head = calloc(1, sizeof(struct type)); - struct type* i = calloc(1, sizeof(struct type)); - head->name = global_token->s; - i->name = global_token->s; - head->indirect = i; - i->indirect = head; - head->next = global_types; - global_types = head; - global_token = global_token->next; - i->size = 4; - require_char("ERROR in create_struct\nMissing {\n", '{'); - struct type* last = NULL; - while('}' != global_token->s[0]) - { - struct type* member_type = type_name(); - i = calloc(1, sizeof(struct type)); - i->name = global_token->s; - i->members = last; - i->size = member_type->size; - i->offset = offset; - offset = offset + member_type->size; - global_token = global_token->next; - require_char("ERROR in create_struct\nMissing ;\n", ';'); - last = i; - } - - global_token = global_token->next; - require_char("ERROR in create_struct\nMissing ;\n", ';'); - - head->size = offset; - head->members = last; - head->indirect->members = last; -} - - -/* - * type-name: - * char * - * int - */ -struct type* type_name() -{ - int structure = false; - - if(!strcmp(global_token->s, "struct")) - { - structure = true; - global_token = global_token->next; - } - - struct type* ret = lookup_type(global_token->s); - - if(NULL == ret && !structure) - { - fprintf(stderr, "Unknown type %s\n", global_token->s); - exit(EXIT_FAILURE); - } - else if(NULL == ret) - { - create_struct(); - return NULL; - } - - global_token = global_token->next; - - while(global_token->s[0] == '*') - { - ret = ret->indirect; - global_token = global_token->next; - } - - return ret; -} - -/* Process local variable */ -struct token_list* collect_local(struct token_list* out, struct token_list* function) -{ - struct type* type_size = type_name(); - out = double_emit("# Defining local ", global_token->s, out, true); - - struct token_list* a = sym_declare(global_token->s, type_size, function->locals); - function->locals = a; - global_token = global_token->next; - function->temps = function->temps - 1; - - if(global_token->s[0] == '=') - { - global_token = global_token->next; - out = expression(out, function); - } - function->temps = function->temps + 1; - - require_char("ERROR in collect_local\nMissing ;\n", ';'); - - out = double_emit("PUSH_eax\t#", a->s, out, true); - return out; -} - -struct token_list* statement(struct token_list* out, struct token_list* function); - -/* Evaluate if statements */ -int if_count; -struct token_list* process_if(struct token_list* out, struct token_list* function) -{ - char* number_string = numerate_number(if_count); - if_count = if_count + 1; - - out = double_emit("# IF_",number_string, out, false); - - global_token = global_token->next; - require_char("ERROR in process_if\nMISSING (\n", '('); - out = expression(out, function); - - out = double_emit("TEST\nJUMP_EQ %ELSE_", number_string, out, false); - - require_char("ERROR in process_if\nMISSING )\n", ')'); - out = statement(out, function); - - out = double_emit("JUMP %_END_IF_", number_string, out, false); - out = double_emit(":ELSE_", number_string, out, false); - - if(!strcmp(global_token->s, "else")) - { - global_token = global_token->next; - out = statement(out, function); - } - out = double_emit(":_END_IF_", number_string, out, false); - return out; -} - -int for_count; -struct token_list* process_for(struct token_list* out, struct token_list* function) -{ - char* number_string = numerate_number(for_count); - for_count = for_count + 1; - - out = double_emit("# FOR_initialization_", number_string, out, false); - - global_token = global_token->next; - - require_char("ERROR in process_for\nMISSING (\n", '('); - out = expression(out, function); - - out = double_emit(":FOR_", number_string, out , false); - - require_char("ERROR in process_for\nMISSING ;1\n", ';'); - out = expression(out, function); - - out = double_emit("TEST\nJUMP_EQ %FOR_END_", number_string, out, false); - out = double_emit("JUMP %FOR_THEN_", number_string, out, false); - out = double_emit(":FOR_ITER_", number_string, out, false); - - require_char("ERROR in process_for\nMISSING ;2\n", ';'); - out = expression(out, function); - - out = double_emit("JUMP %FOR_", number_string, out, false); - out = double_emit(":FOR_THEN_", number_string, out, false); - - require_char("ERROR in process_for\nMISSING )\n", ')'); - out = statement(out, function); - - out = double_emit("JUMP %FOR_ITER_", number_string, out, false); - out = double_emit(":FOR_END_", number_string, out, false); - return out; -} - -/* Process Assembly statements */ -struct token_list* process_asm(struct token_list* out) -{ - global_token = global_token->next; - require_char("ERROR in process_asm\nMISSING (\n", '('); - while('"' == global_token->s[0]) - { - out = emit((global_token->s + 1), out); - out = emit("\n", out); - global_token = global_token->next; - } - require_char("ERROR in process_asm\nMISSING )\n", ')'); - require_char("ERROR in process_asm\nMISSING ;\n", ';'); - return out; -} - -/* Process while loops */ -int while_count; -struct token_list* process_while(struct token_list* out, struct token_list* function) -{ - char* number_string = numerate_number(while_count); - while_count = while_count + 1; - - out = double_emit(":WHILE_", number_string, out, false); - - global_token = global_token->next; - require_char("ERROR in process_while\nMISSING (\n", '('); - out = expression(out, function); - - out = double_emit("TEST\nJUMP_EQ %END_WHILE_", number_string, out, false); - out = double_emit("# THEN_while_", number_string, out, false); - - require_char("ERROR in process_while\nMISSING )\n", ')'); - out = statement(out, function); - - out = double_emit("JUMP %WHILE_", number_string, out, false); - out = double_emit(":END_WHILE_", number_string, out, false); - return out; -} - -/* Ensure that functions return */ -struct token_list* return_result(struct token_list* out, struct token_list* function) -{ - global_token = global_token->next; - if(global_token->s[0] != ';') out = expression(out, function); - - require_char("ERROR in return_result\nMISSING ;\n", ';'); - - for(struct token_list* i = function->locals; NULL != i; i = i->next) - { - out = emit("POP_ebx\t# _return_result_locals\n", out); - function->locals = function->locals->next; - } - out = emit("RETURN\n", out); - return out; -} - -struct token_list* recursive_statement(struct token_list* out, struct token_list* function) -{ - global_token = global_token->next; - struct token_list* frame = function->locals; - - while(strcmp(global_token->s, "}")) - { - out = statement(out, function); - } - global_token = global_token->next; - - /* Clean up any locals added */ - if(NULL != function->locals) - { - for(struct token_list* i = function->locals; frame != i; i = i->next) - { - out = emit( "POP_ebx\t# _recursive_statement_locals\n", out); - function->locals = function->locals->next; - } - } - return out; -} - -/* - * statement: - * { statement-list-opt } - * type-name identifier ; - * type-name identifier = expression; - * if ( expression ) statement - * if ( expression ) statement else statement - * while ( expression ) statement - * for ( expression ; expression ; expression ) statement - * asm ( "assembly" ... "assembly" ) ; - * return ; - * expr ; - */ -struct token_list* statement(struct token_list* out, struct token_list* function) -{ - if(global_token->s[0] == '{') - { - out = recursive_statement(out, function); - } - else if((NULL != lookup_type(global_token->s)) || !strcmp("struct", global_token->s)) - { - out = collect_local(out, function); - } - else if(!strcmp(global_token->s, "if")) - { - out = process_if(out, function); - } - else if(!strcmp(global_token->s, "while")) - { - out = process_while(out, function); - } - else if(!strcmp(global_token->s, "for")) - { - out = process_for(out, function); - } - else if(!strcmp(global_token->s, "asm")) - { - out = process_asm(out); - } - else if(!strcmp(global_token->s, "return")) - { - out = return_result(out, function); - } - else - { - out = expression(out, function); - require_char("ERROR in statement\nMISSING ;\n", ';'); - } - return out; -} - -/* Collect function arguments */ -void collect_arguments(struct token_list* function) -{ - global_token = global_token->next; - - while(strcmp(global_token->s, ")")) - { - struct type* type_size = type_name(); - if(global_token->s[0] == ')') - { - /* deal with foo(int|char|void) */ - global_token = global_token->prev; - } - else if(global_token->s[0] != ',') - { - /* deal with foo(int a, char b) */ - struct token_list* a = sym_declare(global_token->s, type_size, function->arguments); - function->arguments = a; - } - - /* foo(int,char,void) doesn't need anything done */ - global_token = global_token->next; - - /* ignore trailing comma (needed for foo(bar(), 1); expressions*/ - if(global_token->s[0] == ',') global_token = global_token->next; - } - global_token = global_token->next; -} - -struct token_list* declare_function(struct token_list* out, struct type* type) -{ - char* essential = global_token->prev->s; - struct token_list* func = sym_declare(global_token->prev->s, calloc(1, sizeof(struct type)), global_function_list); - func->type = type; - collect_arguments(func); - - /* allow previously defined functions to be looked up */ - global_function_list = func; - - /* If just a prototype don't waste time */ - if(global_token->s[0] == ';') global_token = global_token->next; - else - { - out = double_emit("# Defining function ", essential, out, true); - out = double_emit(":FUNCTION_", essential, out, true); - out = statement(out, func); - - /* Prevent duplicate RETURNS */ - if(strcmp(out->s, "RETURN\n")) - { - out = emit("RETURN\n", out); - } - } - return out; -} - -/* - * program: - * declaration - * declaration program - * - * declaration: - * CONSTANT identifer value - * type-name identifier ; - * type-name identifier ( parameter-list ) ; - * type-name identifier ( parameter-list ) statement - * - * parameter-list: - * parameter-declaration - * parameter-list, parameter-declaration - * - * parameter-declaration: - * type-name identifier-opt - */ -struct token_list* program(struct token_list* out) -{ - while(NULL != global_token->next) - { -new_type: - if(!strcmp(global_token->s, "CONSTANT")) - { - global_constant_list = sym_declare(global_token->next->s, NULL, global_constant_list); - global_constant_list->arguments = global_token->next->next; - global_token = global_token->next->next->next; - } - else - { - struct type* type_size = type_name(); - if(NULL == type_size) - { - goto new_type; - } - global_token = global_token->next; - if(global_token->s[0] == ';') - { - /* Add to global symbol table */ - global_symbol_list = sym_declare(global_token->prev->s, type_size, global_symbol_list); - - /* Ensure 4 bytes are allocated for the global */ - globals_list = double_emit(":GLOBAL_", global_token->prev->s, globals_list, true); - globals_list = emit("NOP\n", globals_list); - - global_token = global_token->next; - } - else if(global_token->s[0] == '(') out = declare_function(out, type_size); - else - { - fprintf(stderr, "Recieved %s in program\n", global_token->s); - exit(EXIT_FAILURE); - } - } - } - return out; -} - -void recursive_output(FILE* out, struct token_list* i) -{ - if(NULL == i) return; - recursive_output(out, i->next); - fprintf(out, "%s", i->s); -} - -/* Initialize default types */ -void initialize_types() -{ - /* Define void */ - global_types = calloc(1, sizeof(struct type)); - global_types->name = "void"; - global_types->size = 4; - /* void* has the same properties as void */ - global_types->indirect = global_types; - - /* Define int */ - struct type* a = calloc(1, sizeof(struct type)); - a->name = "int"; - a->size = 4; - /* int* has the same properties as int */ - a->indirect = a; - - /* Define char* */ - struct type* b = calloc(1, sizeof(struct type)); - b->name = "char*"; - b->size = 4; - - /* Define char */ - struct type* c = calloc(1, sizeof(struct type)); - c->name = "char"; - c->size = 1; - - /* char** is char */ - c->indirect = b; - b->indirect = c; - - /* Finalize type list */ - a->next = c; - global_types->next = a; -} - /* Our essential organizer */ int main(int argc, char **argv) { - if (argc < 3) + global_token = NULL; + + int c; + FILE* source_file; + FILE* destination_file; + int option_index = 0; + while ((c = getopt_long(argc, argv, "f:h:o:V", long_options, &option_index)) != -1) { - fprintf(stderr, "We require more arguments\n"); - exit(EXIT_FAILURE); + switch(c) + { + case 0: break; + case 'h': + { + fprintf(stderr, "Usage: %s -f FILENAME1 {-f FILENAME2} -o OUTPUT\n", argv[0]); + exit(EXIT_SUCCESS); + } + case 'f': + { + #if __MESC__ + source_file = open(optarg, O_RDONLY); + #else + source_file = fopen(optarg, "r"); + #endif + + if(NULL == source_file) + { + fprintf(stderr, "The file: %s can not be opened!\n", optarg); + exit(EXIT_FAILURE); + } + + global_token = read_all_tokens(source_file, global_token); + break; + } + case 'o': + { + #if __MESC__ + destination_file = open(optarg, O_CREAT|O_TRUNC|O_WRONLY, S_IRUSR|S_IWUSR); + #else + destination_file = fopen(optarg, "w"); + #endif + + if(NULL == destination_file) + { + fprintf(stderr, "The file: %s can not be opened!\n", optarg); + exit(EXIT_FAILURE); + } + break; + } + case 'V': + { + fprintf(stdout, "M2-Planet 0.1\n"); + exit(EXIT_SUCCESS); + } + default: + { + fprintf(stderr, "Unknown option\n"); + exit(EXIT_FAILURE); + } + } } - initialize_types(); + if(NULL == global_token) + { + fprintf(stderr, "Either no input files were given or they were empty\n"); + exit(EXIT_FAILURE); + } + global_token = reverse_list(global_token); - global_token = read_all_tokens(argv[1]); + initialize_types(); struct token_list* output_list = program(NULL); - FILE* output = fopen(argv[2], "w"); - fprintf(output, "\n# Core program\n\n"); - recursive_output(output, output_list); - fprintf(output, "\n# Program global variables\n\n"); - recursive_output(output, globals_list); - fprintf(output, "\n# Program strings\n\n"); - recursive_output(output, strings_list); - fclose(output); - return 0; + + /* Output the program we have compiled */ + fprintf(destination_file, "\n# Core program\n\n"); + recursive_output(destination_file, output_list); + fprintf(destination_file, "\n# Program global variables\n\n"); + recursive_output(destination_file, globals_list); + fprintf(destination_file, "\n# Program strings\n\n"); + recursive_output(destination_file, strings_list); + return EXIT_SUCCESS; } diff --git a/cc.h b/cc.h index 970e94d..84edaf9 100644 --- a/cc.h +++ b/cc.h @@ -46,3 +46,10 @@ struct token_list struct token_list* locals; int temps; }; + +/* What we are currently working on */ +struct token_list* global_token; + +/* Output reorder collections*/ +struct token_list* strings_list; +struct token_list* globals_list; diff --git a/cc_core.c b/cc_core.c new file mode 100644 index 0000000..81869a5 --- /dev/null +++ b/cc_core.c @@ -0,0 +1,1038 @@ +/* Copyright (C) 2016 Jeremiah Orians + * This file is part of stage0. + * + * stage0 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * stage0 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with stage0. If not, see . + */ + +#include "cc.h" +#include + +/* Global lists */ +struct type* global_types; +struct token_list* global_symbol_list; +struct token_list* global_function_list; +struct token_list* global_constant_list; + +/* What we are currently working on */ +struct token_list* current_target; + +/* Imported functions */ +char* parse_string(char* string); + +struct token_list* emit(char *s, struct token_list* head) +{ + struct token_list* t = calloc(1, sizeof(struct token_list)); + t->next = head; + t->s = s; + return t; +} + +struct token_list* double_emit(char* a, char* b, struct token_list* out, int flag) +{ + out = emit(a, out); + out = emit(b, out); + if(flag) out = emit("\n", out); + return out; +} + +char* numerate_number(int a) +{ + char* result = calloc(16, sizeof(char)); + int i = 0; + + /* Deal with Zero case */ + if(0 == a) + { + result[0] = '0'; + result[1] = '\n'; + return result; + } + + /* Deal with negatives */ + if(0 > a) + { + result[0] = '-'; + i = 1; + a = a * -1; + } + + /* Using the largest 10^n number possible in 32bits */ + int divisor = 0x3B9ACA00; + /* Skip leading Zeros */ + while(0 == (a / divisor)) divisor = divisor / 10; + + /* Now simply collect numbers until divisor is gone */ + while(0 < divisor) + { + result[i] = ((a / divisor) + 48); + a = a % divisor; + divisor = divisor / 10; + i = i + 1; + } + + result[i] = '\n'; + return result; +} + +struct token_list* sym_declare(char *s, struct type* t, struct token_list* list) +{ + struct token_list* a = calloc(1, sizeof(struct token_list)); + a->next = list; + a->s = s; + a->type = t; + return a; +} + +struct token_list* sym_lookup(char *s, struct token_list* symbol_list) +{ + for(struct token_list* i = symbol_list; NULL != i; i = i->next) + { + if(0 == strcmp(s,i->s)) return i; + } + return NULL; +} + +int stack_index(struct token_list* a, struct token_list* function) +{ + int depth = 4 * function->temps; + for(struct token_list* i = function->locals; NULL != i; i = i->next) + { + if(i == a) return depth; + else depth = depth + 4; + } + + /* Deal with offset caused by return pointer */ + depth = depth+ 4; + + for(struct token_list* i = function->arguments; NULL != i; i = i->next) + { + if(i == a) return depth; + else depth = depth + 4; + } + + fprintf(stderr, "%s does not exist in function %s\n", a->s, function->s); + exit(EXIT_FAILURE); +} + +struct token_list* sym_get_value(char *s, struct token_list* out, struct token_list* function) +{ + global_token = global_token->next; + struct token_list* a = sym_lookup(s, global_constant_list); + if(NULL != a) + { + out = double_emit("LOAD_IMMEDIATE_eax %", a->arguments->s, out, true); return out; + } + + a= sym_lookup(s, global_function_list); + if(NULL != a) + { + return out; + } + + a= sym_lookup(s, function->locals); + if(NULL != a) + { + current_target = a; + out = double_emit("LOAD_EFFECTIVE_ADDRESS %", numerate_number(stack_index(a, function)), out, false); + if(strcmp(global_token->s, "=")) out = emit("LOAD_INTEGER\n", out); + return out; + } + a = sym_lookup(s, function->arguments); + + if(NULL != a) + { + current_target = a; + out = double_emit("LOAD_EFFECTIVE_ADDRESS %", numerate_number(stack_index(a, function)), out, false); + if(strcmp(global_token->s, "=")) out = emit("LOAD_INTEGER\n", out); + return out; + } + + a = sym_lookup(s, global_symbol_list); + if(NULL != a) + { + current_target = a; + out = double_emit("LOAD_IMMEDIATE_eax &GLOBAL_", s, out, true); + if(strcmp(global_token->s, "=")) out = emit("LOAD_INTEGER\n", out); + return out; + } + + fprintf(stderr, "%s is not a defined symbol\n", s); + exit(EXIT_FAILURE); +} + +void require_char(char* message, char required) +{ + if(global_token->s[0] != required) + { + fprintf(stderr, "%s", message); + exit(EXIT_FAILURE); + } + global_token = global_token->next; +} + +struct token_list* expression(struct token_list* out, struct token_list* function); + +/* + * primary-expr: + * identifier + * constant + * ( expression ) + */ +struct token_list* primary_expr(struct token_list* out, struct token_list* function) +{ + if(('0' <= global_token->s[0]) & (global_token->s[0] <= '9')) + { + out = double_emit("LOAD_IMMEDIATE_eax %", global_token->s, out, true); + global_token = global_token->next; + } + else if((('a' <= global_token->s[0]) & (global_token->s[0] <= 'z')) | (('A' <= global_token->s[0]) & (global_token->s[0] <= 'Z'))) + { + out = sym_get_value(global_token->s, out, function); + } + else if(global_token->s[0] == '(') + { + global_token = global_token->next; + out = expression(out, function); + require_char("Error in Primary expression\nDidn't get )\n", ')'); + } + else if(global_token->s[0] == '\'') + { + out = emit("LOAD_IMMEDIATE_eax %", out); + out = emit(numerate_number(global_token->s[1]), out); + global_token = global_token->next; + } + else if(global_token->s[0] == '"') + { + static int string_num; + char* number_string = numerate_number(string_num); + out = emit("LOAD_IMMEDIATE_eax &STRING_", out); + out = emit(number_string, out); + + /* The target */ + strings_list = emit(":STRING_", strings_list); + strings_list = emit(number_string, strings_list); + + /* Parse the string */ + strings_list = emit(parse_string(global_token->s), strings_list); + global_token = global_token->next; + + string_num = string_num + 1; + } + else + { + fprintf(stderr, "Recieved %s in primary_expr\n", global_token->s); + exit(EXIT_FAILURE); + } + + return out; +} + +/* Deal with Expression lists */ +struct token_list* process_expression_list(struct token_list* out, struct token_list* function) +{ + char* func = global_token->prev->s; + global_token = global_token->next; + int temp = function->temps; + + if(global_token->s[0] != ')') + { + out = expression(out, function); + out = emit("PUSH_eax\t#_process_expression1\n", out); + function->temps = function->temps + 1; + + while(global_token->s[0] == ',') + { + global_token = global_token->next; + out = expression(out, function); + out = emit("PUSH_eax\t#_process_expression2\n", out); + function->temps = function->temps + 1; + } + require_char("ERROR in process_expression_list\nNo ) was found\n", ')'); + } + else global_token = global_token->next; + + out = double_emit("CALL_IMMEDIATE %FUNCTION_", func, out, true); + + for(int i = function->temps - temp; 0 != i; i = i - 1) + { + out = emit("POP_ebx\t# _process_expression_locals\n", out); + } + + function->temps = temp; + return out; +} + +struct token_list* common_recursion(struct token_list* (*function) (struct token_list*, struct token_list*), struct token_list* out, struct token_list* func) +{ + global_token = global_token->next; + out = emit("PUSH_eax\t#_common_recursion\n", out); + func->temps = func->temps + 1; + out = function(out, func); + func->temps = func->temps - 1; + out = emit("POP_ebx\t# _common_recursion\n", out); + return out; +} + +int ceil_log2(int a) +{ + int result = 0; + if((a & (a - 1)) == 0) + { + result = -1; + } + + while(a > 0) + { + result = result + 1; + a = a >> 1; + } + + return result; +} + +/* + * postfix-expr: + * primary-expr + * postfix-expr [ expression ] + * postfix-expr ( expression-list-opt ) + * postfix-expr -> member + */ +struct token_list* postfix_expr(struct token_list* out, struct token_list* function) +{ + out = primary_expr(out, function); + + while(1) + { + if(global_token->s[0] == '[') + { + struct token_list* target = current_target; + struct type* a = current_target->type; + out = common_recursion(expression, out, function); + + /* Add support for Ints */ + if( 1 != a->indirect->size) + { + out = double_emit("SAL_eax_Immediate8 !", numerate_number(ceil_log2(a->indirect->size)), out, false); + } + + out = emit("ADD_ebx_to_eax\n", out); + current_target = target; + + if(strcmp(global_token->next->s, "=")) + { + if( 4 == a->indirect->size) + { + out = emit("LOAD_INTEGER\n", out); + } + else + { + out = emit("LOAD_BYTE\n", out); + } + } + require_char("ERROR in postfix_expr\nMissing ]\n", ']'); + } + else if(global_token->s[0] == '(') + { + out = process_expression_list(out, function); + } + else if(!strcmp("->", global_token->s)) + { + out = emit("# looking up offset\n", out); + global_token = global_token->next; + struct type* i; + for(i = current_target->type->members; NULL != i; i = i->members) + { + if(!strcmp(i->name, global_token->s)) break; + } + if(NULL == i) + { + fprintf(stderr, "ERROR in postfix_expr %s->%s does not exist\n", current_target->type->name, global_token->s); + exit(EXIT_FAILURE); + } + if(0 != i->offset) + { + out = emit("# -> offset calculation\n", out); + out = double_emit("LOAD_IMMEDIATE_ebx %", numerate_number(i->offset), out, false); + out = emit("ADD_ebx_to_eax\n", out); + } + if(strcmp(global_token->next->s, "=")) + { + out = emit("LOAD_INTEGER\n", out); + } + global_token = global_token->next; + } + else return out; + } +} + +/* + * additive-expr: + * postfix-expr + * additive-expr + postfix-expr + * additive-expr - postfix-expr + */ +struct token_list* additive_expr(struct token_list* out, struct token_list* function) +{ + out = postfix_expr(out, function); + + while(1) + { + if(global_token->s[0] == '+') + { + out = common_recursion(postfix_expr, out, function); + out = emit("ADD_ebx_to_eax\n", out); + } + else if(global_token->s[0] == '-') + { + out = common_recursion(postfix_expr, out, function); + out = emit("SUBTRACT_eax_from_ebx_into_ebx\nMOVE_ebx_to_eax\n", out); + } + else return out; + } +} + +/* + * shift-expr: + * additive-expr + * shift-expr << additive-expr + * shift-expr >> additive-expr + */ +struct token_list* shift_expr(struct token_list* out, struct token_list* function) +{ + out = additive_expr(out, function); + + while(1) + { + if(!strcmp(global_token->s, "<<")) + { + out = common_recursion(additive_expr, out, function); + // Ugly hack to Work around flaw in x86 + struct token_list* old = out->next; + free(out); + out = emit("COPY_eax_to_ecx\nPOP_eax\nSAL_eax_cl\n", old); + } + else if(!strcmp(global_token->s, ">>")) + { + out = common_recursion(additive_expr, out, function); + // Ugly hack to Work around flaw in x86 + struct token_list* old = out->next; + free(out); + out = emit("COPY_eax_to_ecx\nPOP_eax\nSAR_eax_cl\n", old); + } + else + { + return out; + } + } +} + +/* + * relational-expr: + * shift-expr + * relational-expr < shift-expr + * relational-expr <= shift-expr + * relational-expr >= shift-expr + * relational-expr > shift-expr + */ +struct token_list* relational_expr(struct token_list* out, struct token_list* function) +{ + out = shift_expr(out, function); + + while(1) + { + if(!strcmp(global_token->s, "<")) + { + out = common_recursion(shift_expr, out, function); + out = emit("CMP\nSETL\nMOVEZBL\n", out); + } + else if(!strcmp(global_token->s, "<=")) + { + out = common_recursion(shift_expr, out, function); + out = emit("CMP\nSETLE\nMOVEZBL\n", out); + } + else if(!strcmp(global_token->s, ">=")) + { + out = common_recursion(shift_expr, out, function); + out = emit("CMP\nSETGE\nMOVEZBL\n", out); + } + else if(!strcmp(global_token->s, ">")) + { + out = common_recursion(shift_expr, out, function); + out = emit("CMP\nSETG\nMOVEZBL\n", out); + } + else return out; + } +} + +/* + * equality-expr: + * relational-expr + * equality-expr == relational-expr + * equality-expr != relational-expr + */ +struct token_list* equality_expr(struct token_list* out, struct token_list* function) +{ + out = relational_expr(out, function); + + while(1) + { + if(!strcmp(global_token->s, "==")) + { + out = common_recursion(relational_expr, out, function); + out = emit("CMP\nSETE\nMOVEZBL\n", out); + } + else if(!strcmp(global_token->s, "!=")) + { + out = common_recursion(relational_expr, out, function); + out = emit("CMP\nSETNE\nMOVEZBL\n", out); + } + else return out; + } +} + +/* + * bitwise-and-expr: + * equality-expr + * bitwise-and-expr & equality-expr + */ +struct token_list* bitwise_and_expr(struct token_list* out, struct token_list* function) +{ + out = equality_expr(out, function); + + while(global_token->s[0] == '&') + { + out = common_recursion(equality_expr, out, function); + out = emit("AND_eax_ebx\n", out); + } + return out; +} + +/* + * bitwise-or-expr: + * bitwise-and-expr + * bitwise-and-expr | bitwise-or-expr + */ +struct token_list* bitwise_or_expr(struct token_list* out, struct token_list* function) +{ + out = bitwise_and_expr(out, function); + + while(global_token->s[0] == '|') + { + out = common_recursion(bitwise_and_expr, out, function); + out = emit("OR_eax_ebx\n", out); + } + return out; +} + +/* + * expression: + * bitwise-or-expr + * bitwise-or-expr = expression + */ +struct token_list* expression(struct token_list* out, struct token_list* function) +{ + out = bitwise_or_expr(out, function); + + if(global_token->s[0] == '=') + { + struct token_list* target = current_target; + bool member = !strcmp(global_token->prev->s, "]"); + out = common_recursion(expression, out, function); + + if(member) + { + if(1 == target->type->indirect->size) out = emit("STORE_CHAR\n", out); + else if(4 == target->type->indirect->size) + { + out = emit("STORE_INTEGER\n", out); + } + } + else + { + out = emit("STORE_INTEGER\n", out); + } + } + return out; +} + +struct type* lookup_type(char* s) +{ + for(struct type* i = global_types; NULL != i; i = i->next) + { + if(!strcmp(i->name, s)) + { + return i; + } + } + return NULL; +} + +struct type* type_name(); +void create_struct() +{ + int offset = 0; + struct type* head = calloc(1, sizeof(struct type)); + struct type* i = calloc(1, sizeof(struct type)); + head->name = global_token->s; + i->name = global_token->s; + head->indirect = i; + i->indirect = head; + head->next = global_types; + global_types = head; + global_token = global_token->next; + i->size = 4; + require_char("ERROR in create_struct\nMissing {\n", '{'); + struct type* last = NULL; + while('}' != global_token->s[0]) + { + struct type* member_type = type_name(); + i = calloc(1, sizeof(struct type)); + i->name = global_token->s; + i->members = last; + i->size = member_type->size; + i->offset = offset; + offset = offset + member_type->size; + global_token = global_token->next; + require_char("ERROR in create_struct\nMissing ;\n", ';'); + last = i; + } + + global_token = global_token->next; + require_char("ERROR in create_struct\nMissing ;\n", ';'); + + head->size = offset; + head->members = last; + head->indirect->members = last; +} + + +/* + * type-name: + * char * + * int + */ +struct type* type_name() +{ + int structure = false; + + if(!strcmp(global_token->s, "struct")) + { + structure = true; + global_token = global_token->next; + } + + struct type* ret = lookup_type(global_token->s); + + if(NULL == ret && !structure) + { + fprintf(stderr, "Unknown type %s\n", global_token->s); + exit(EXIT_FAILURE); + } + else if(NULL == ret) + { + create_struct(); + return NULL; + } + + global_token = global_token->next; + + while(global_token->s[0] == '*') + { + ret = ret->indirect; + global_token = global_token->next; + } + + return ret; +} + +/* Process local variable */ +struct token_list* collect_local(struct token_list* out, struct token_list* function) +{ + struct type* type_size = type_name(); + out = double_emit("# Defining local ", global_token->s, out, true); + + struct token_list* a = sym_declare(global_token->s, type_size, function->locals); + function->locals = a; + global_token = global_token->next; + function->temps = function->temps - 1; + + if(global_token->s[0] == '=') + { + global_token = global_token->next; + out = expression(out, function); + } + function->temps = function->temps + 1; + + require_char("ERROR in collect_local\nMissing ;\n", ';'); + + out = double_emit("PUSH_eax\t#", a->s, out, true); + return out; +} + +struct token_list* statement(struct token_list* out, struct token_list* function); + +/* Evaluate if statements */ +int if_count; +struct token_list* process_if(struct token_list* out, struct token_list* function) +{ + char* number_string = numerate_number(if_count); + if_count = if_count + 1; + + out = double_emit("# IF_",number_string, out, false); + + global_token = global_token->next; + require_char("ERROR in process_if\nMISSING (\n", '('); + out = expression(out, function); + + out = double_emit("TEST\nJUMP_EQ %ELSE_", number_string, out, false); + + require_char("ERROR in process_if\nMISSING )\n", ')'); + out = statement(out, function); + + out = double_emit("JUMP %_END_IF_", number_string, out, false); + out = double_emit(":ELSE_", number_string, out, false); + + if(!strcmp(global_token->s, "else")) + { + global_token = global_token->next; + out = statement(out, function); + } + out = double_emit(":_END_IF_", number_string, out, false); + return out; +} + +int for_count; +struct token_list* process_for(struct token_list* out, struct token_list* function) +{ + char* number_string = numerate_number(for_count); + for_count = for_count + 1; + + out = double_emit("# FOR_initialization_", number_string, out, false); + + global_token = global_token->next; + + require_char("ERROR in process_for\nMISSING (\n", '('); + out = expression(out, function); + + out = double_emit(":FOR_", number_string, out , false); + + require_char("ERROR in process_for\nMISSING ;1\n", ';'); + out = expression(out, function); + + out = double_emit("TEST\nJUMP_EQ %FOR_END_", number_string, out, false); + out = double_emit("JUMP %FOR_THEN_", number_string, out, false); + out = double_emit(":FOR_ITER_", number_string, out, false); + + require_char("ERROR in process_for\nMISSING ;2\n", ';'); + out = expression(out, function); + + out = double_emit("JUMP %FOR_", number_string, out, false); + out = double_emit(":FOR_THEN_", number_string, out, false); + + require_char("ERROR in process_for\nMISSING )\n", ')'); + out = statement(out, function); + + out = double_emit("JUMP %FOR_ITER_", number_string, out, false); + out = double_emit(":FOR_END_", number_string, out, false); + return out; +} + +/* Process Assembly statements */ +struct token_list* process_asm(struct token_list* out) +{ + global_token = global_token->next; + require_char("ERROR in process_asm\nMISSING (\n", '('); + while('"' == global_token->s[0]) + { + out = emit((global_token->s + 1), out); + out = emit("\n", out); + global_token = global_token->next; + } + require_char("ERROR in process_asm\nMISSING )\n", ')'); + require_char("ERROR in process_asm\nMISSING ;\n", ';'); + return out; +} + +/* Process while loops */ +int while_count; +struct token_list* process_while(struct token_list* out, struct token_list* function) +{ + char* number_string = numerate_number(while_count); + while_count = while_count + 1; + + out = double_emit(":WHILE_", number_string, out, false); + + global_token = global_token->next; + require_char("ERROR in process_while\nMISSING (\n", '('); + out = expression(out, function); + + out = double_emit("TEST\nJUMP_EQ %END_WHILE_", number_string, out, false); + out = double_emit("# THEN_while_", number_string, out, false); + + require_char("ERROR in process_while\nMISSING )\n", ')'); + out = statement(out, function); + + out = double_emit("JUMP %WHILE_", number_string, out, false); + out = double_emit(":END_WHILE_", number_string, out, false); + return out; +} + +/* Ensure that functions return */ +struct token_list* return_result(struct token_list* out, struct token_list* function) +{ + global_token = global_token->next; + if(global_token->s[0] != ';') out = expression(out, function); + + require_char("ERROR in return_result\nMISSING ;\n", ';'); + + for(struct token_list* i = function->locals; NULL != i; i = i->next) + { + out = emit("POP_ebx\t# _return_result_locals\n", out); + function->locals = function->locals->next; + } + out = emit("RETURN\n", out); + return out; +} + +struct token_list* recursive_statement(struct token_list* out, struct token_list* function) +{ + global_token = global_token->next; + struct token_list* frame = function->locals; + + while(strcmp(global_token->s, "}")) + { + out = statement(out, function); + } + global_token = global_token->next; + + /* Clean up any locals added */ + if(NULL != function->locals) + { + for(struct token_list* i = function->locals; frame != i; i = i->next) + { + out = emit( "POP_ebx\t# _recursive_statement_locals\n", out); + function->locals = function->locals->next; + } + } + return out; +} + +/* + * statement: + * { statement-list-opt } + * type-name identifier ; + * type-name identifier = expression; + * if ( expression ) statement + * if ( expression ) statement else statement + * while ( expression ) statement + * for ( expression ; expression ; expression ) statement + * asm ( "assembly" ... "assembly" ) ; + * return ; + * expr ; + */ +struct token_list* statement(struct token_list* out, struct token_list* function) +{ + if(global_token->s[0] == '{') + { + out = recursive_statement(out, function); + } + else if((NULL != lookup_type(global_token->s)) || !strcmp("struct", global_token->s)) + { + out = collect_local(out, function); + } + else if(!strcmp(global_token->s, "if")) + { + out = process_if(out, function); + } + else if(!strcmp(global_token->s, "while")) + { + out = process_while(out, function); + } + else if(!strcmp(global_token->s, "for")) + { + out = process_for(out, function); + } + else if(!strcmp(global_token->s, "asm")) + { + out = process_asm(out); + } + else if(!strcmp(global_token->s, "return")) + { + out = return_result(out, function); + } + else + { + out = expression(out, function); + require_char("ERROR in statement\nMISSING ;\n", ';'); + } + return out; +} + +/* Collect function arguments */ +void collect_arguments(struct token_list* function) +{ + global_token = global_token->next; + + while(strcmp(global_token->s, ")")) + { + struct type* type_size = type_name(); + if(global_token->s[0] == ')') + { + /* deal with foo(int|char|void) */ + global_token = global_token->prev; + } + else if(global_token->s[0] != ',') + { + /* deal with foo(int a, char b) */ + struct token_list* a = sym_declare(global_token->s, type_size, function->arguments); + function->arguments = a; + } + + /* foo(int,char,void) doesn't need anything done */ + global_token = global_token->next; + + /* ignore trailing comma (needed for foo(bar(), 1); expressions*/ + if(global_token->s[0] == ',') global_token = global_token->next; + } + global_token = global_token->next; +} + +struct token_list* declare_function(struct token_list* out, struct type* type) +{ + char* essential = global_token->prev->s; + struct token_list* func = sym_declare(global_token->prev->s, calloc(1, sizeof(struct type)), global_function_list); + func->type = type; + collect_arguments(func); + + /* allow previously defined functions to be looked up */ + global_function_list = func; + + /* If just a prototype don't waste time */ + if(global_token->s[0] == ';') global_token = global_token->next; + else + { + out = double_emit("# Defining function ", essential, out, true); + out = double_emit(":FUNCTION_", essential, out, true); + out = statement(out, func); + + /* Prevent duplicate RETURNS */ + if(strcmp(out->s, "RETURN\n")) + { + out = emit("RETURN\n", out); + } + } + return out; +} + +/* + * program: + * declaration + * declaration program + * + * declaration: + * CONSTANT identifer value + * type-name identifier ; + * type-name identifier ( parameter-list ) ; + * type-name identifier ( parameter-list ) statement + * + * parameter-list: + * parameter-declaration + * parameter-list, parameter-declaration + * + * parameter-declaration: + * type-name identifier-opt + */ +struct token_list* program(struct token_list* out) +{ + while(NULL != global_token) + { +new_type: + if(!strcmp(global_token->s, "CONSTANT")) + { + global_constant_list = sym_declare(global_token->next->s, NULL, global_constant_list); + global_constant_list->arguments = global_token->next->next; + global_token = global_token->next->next->next; + } + else + { + struct type* type_size = type_name(); + if(NULL == type_size) + { + goto new_type; + } + global_token = global_token->next; + if(global_token->s[0] == ';') + { + /* Add to global symbol table */ + global_symbol_list = sym_declare(global_token->prev->s, type_size, global_symbol_list); + + /* Ensure 4 bytes are allocated for the global */ + globals_list = double_emit(":GLOBAL_", global_token->prev->s, globals_list, true); + globals_list = emit("NOP\n", globals_list); + + global_token = global_token->next; + } + else if(global_token->s[0] == '(') out = declare_function(out, type_size); + else + { + fprintf(stderr, "Recieved %s in program\n", global_token->s); + exit(EXIT_FAILURE); + } + } + } + return out; +} + +void recursive_output(FILE* out, struct token_list* i) +{ + if(NULL == i) return; + recursive_output(out, i->next); + fprintf(out, "%s", i->s); +} + +/* Initialize default types */ +void initialize_types() +{ + /* Define void */ + global_types = calloc(1, sizeof(struct type)); + global_types->name = "void"; + global_types->size = 4; + /* void* has the same properties as void */ + global_types->indirect = global_types; + + /* Define int */ + struct type* a = calloc(1, sizeof(struct type)); + a->name = "int"; + a->size = 4; + /* int* has the same properties as int */ + a->indirect = a; + + /* Define char* */ + struct type* b = calloc(1, sizeof(struct type)); + b->name = "char*"; + b->size = 4; + + /* Define char */ + struct type* c = calloc(1, sizeof(struct type)); + c->name = "char"; + c->size = 1; + + /* char** is char */ + c->indirect = b; + b->indirect = c; + + /* Finalize type list */ + a->next = c; + global_types->next = a; +} diff --git a/cc_reader.c b/cc_reader.c index 0152bea..00d2fdd 100644 --- a/cc_reader.c +++ b/cc_reader.c @@ -114,7 +114,15 @@ reset: goto reset; } } - else if(c != EOF) c = consume_byte(current, c); + else if(c == EOF) + { + free(current); + return c; + } + else + { + c = consume_byte(current, c); + } current->prev = token; current->next = token; @@ -135,11 +143,12 @@ struct token_list* reverse_list(struct token_list* head) return root; } -struct token_list* read_all_tokens(char* source_file) +struct token_list* read_all_tokens(FILE* a, struct token_list* current) { - input = fopen(source_file, "r"); + input = a; + token = current; int ch =fgetc(input); while(EOF != ch) ch = get_token(ch); - return reverse_list(token); + return token; } diff --git a/makefile b/makefile index 70c8f02..df74613 100644 --- a/makefile +++ b/makefile @@ -6,8 +6,11 @@ all: M2-Planet CC=gcc CFLAGS=-D_GNU_SOURCE -O0 -std=c99 -ggdb -M2-Planet: cc_reader.c cc_strings.c cc.c cc.h | bin - $(CC) $(CFLAGS) cc_reader.c cc_strings.c cc.c cc.h -o bin/M2-Planet +M2-Planet: cc_reader.c cc_strings.c cc_core.c cc.c cc.h | bin + $(CC) $(CFLAGS) cc_reader.c cc_strings.c cc_core.c cc.c cc.h -o bin/M2-Planet + +M2-Planet-minimal: cc_reader.c cc_strings.c cc_core.c cc-minimal.c cc.h | bin + $(CC) $(CFLAGS) cc_reader.c cc_strings.c cc_core.c cc-minimal.c cc.h -o bin/M2-Planet-minimal # Clean up after ourselves .PHONY: clean @@ -15,6 +18,7 @@ clean: rm -rf bin/ test/results/ ./test/test0/cleanup.sh ./test/test1/cleanup.sh + ./test/test2/cleanup.sh # Directories bin: @@ -24,7 +28,7 @@ results: mkdir -p test/results # tests -test: test0-binary test1-binary | results +test: test0-binary test1-binary test2-binary | results sha256sum -c test/test.answers test0-binary: M2-Planet | results @@ -33,6 +37,9 @@ test0-binary: M2-Planet | results test1-binary: M2-Planet | results test/test1/hello.sh +test2-binary: M2-Planet | results + test/test2/hello.sh + # Generate test answers .PHONY: Generate-test-answers Generate-test-answers: diff --git a/test/test.answers b/test/test.answers index 984b952..3e6b945 100644 --- a/test/test.answers +++ b/test/test.answers @@ -1,2 +1,3 @@ be3e57116e09c63e7819a391c550d2bdde2eb9f3409d9a54fcd09d2062b75dcf test/results/test0-binary 9d26baf5537e489ba73c24feb229d863e356d4796a876f6dc773cc191bc0ce99 test/results/test1-binary +62e9942a705bb4b01756786b9f0df43035b93c4c8b878aa90b99c7f0c65dfb99 test/results/test2-binary diff --git a/test/test0/hello.sh b/test/test0/hello.sh index 59cb666..3e3742c 100755 --- a/test/test0/hello.sh +++ b/test/test0/hello.sh @@ -1,7 +1,7 @@ #! /bin/sh set -ex # Build the test -bin/M2-Planet test/test0/cc500.c test/test0/cc0.M1 || exit 1 +bin/M2-Planet -f test/test0/cc500.c -o test/test0/cc0.M1 || exit 1 # Macro assemble with libc written in M1-Macro M1 -f test/common_x86/x86_defs.M1 -f test/common_x86/libc.M1 -f test/test0/cc0.M1 --LittleEndian --Architecture 1 -o test/test0/cc0.hex2 || exit 2 # Resolve all linkages diff --git a/test/test1/hello.sh b/test/test1/hello.sh index fa2ecc5..0cd53b3 100755 --- a/test/test1/hello.sh +++ b/test/test1/hello.sh @@ -1,7 +1,7 @@ #! /bin/sh set -ex # Build the test -bin/M2-Planet test/test1/for.c test/test1/for.M1 || exit 1 +bin/M2-Planet -f test/test1/for.c -o test/test1/for.M1 || exit 1 # Macro assemble with libc written in M1-Macro M1 -f test/common_x86/x86_defs.M1 -f test/common_x86/libc.M1 -f test/test1/for.M1 --LittleEndian --Architecture 1 -o test/test1/for.hex2 || exit 2 # Resolve all linkages diff --git a/test/test2/.gitignore b/test/test2/.gitignore new file mode 100644 index 0000000..cc8b713 --- /dev/null +++ b/test/test2/.gitignore @@ -0,0 +1,8 @@ +# Ignore the files created by script +cc0.M1 +cc0.hex2 +cc1 +cc2 + +# A place to put a good run for comparison +actual.M1 diff --git a/test/test2/cc.c b/test/test2/cc.c new file mode 100644 index 0000000..361a3f2 --- /dev/null +++ b/test/test2/cc.c @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2006 Edmund GRIMLEY EVANS + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "cc.h" + +/* Imported functions */ +void emit(int n, char *s); +void sym_define_global(int current_symbol); +int sym_declare_global(char *s); +void save_int(char *p, int n); +void get_token(); +void program(); + +void be_start() +{ + emit(16, "\x7f\x45\x4c\x46\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00"); + emit(16, "\x02\x00\x03\x00\x01\x00\x00\x00\x54\x80\x04\x08\x34\x00\x00\x00"); + emit(16, "\x00\x00\x00\x00\x00\x00\x00\x00\x34\x00\x20\x00\x01\x00\x00\x00"); + emit(16, "\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x80\x04\x08"); + emit(16, "\x00\x80\x04\x08\x10\x4b\x00\x00\x10\x4b\x00\x00\x07\x00\x00\x00"); + emit(16, "\x00\x10\x00\x00\xe8\x00\x00\x00\x00\x89\xc3\x31\xc0\x40\xcd\x80"); + + sym_define_global(sym_declare_global("exit")); + /* pop %ebx ; pop %ebx ; xor %eax,%eax ; inc %eax ; int $0x80 */ + emit(7, "\x5b\x5b\x31\xc0\x40\xcd\x80"); + + sym_define_global(sym_declare_global("getchar")); + /* mov $3,%eax ; xor %ebx,%ebx ; push %ebx ; mov %esp,%ecx */ + emit(10, "\xb8\x03\x00\x00\x00\x31\xdb\x53\x89\xe1"); + /* xor %edx,%edx ; inc %edx ; int $0x80 */ + /* test %eax,%eax ; pop %eax ; jne . + 7 */ + emit(10, "\x31\xd2\x42\xcd\x80\x85\xc0\x58\x75\x05"); + /* mov $-1,%eax ; ret */ + emit(6, "\xb8\xff\xff\xff\xff\xc3"); + + sym_define_global(sym_declare_global("malloc")); + /* mov 4(%esp),%eax */ + emit(4, "\x8b\x44\x24\x04"); + /* push %eax ; xor %ebx,%ebx ; mov $45,%eax ; int $0x80 */ + emit(10, "\x50\x31\xdb\xb8\x2d\x00\x00\x00\xcd\x80"); + /* pop %ebx ; add %eax,%ebx ; push %eax ; push %ebx ; mov $45,%eax */ + emit(10, "\x5b\x01\xc3\x50\x53\xb8\x2d\x00\x00\x00"); + /* int $0x80 ; pop %ebx ; cmp %eax,%ebx ; pop %eax ; je . + 7 */ + emit(8, "\xcd\x80\x5b\x39\xc3\x58\x74\x05"); + /* mov $-1,%eax ; ret */ + emit(6, "\xb8\xff\xff\xff\xff\xc3"); + + sym_define_global(sym_declare_global("putchar")); + /* mov $4,%eax ; xor %ebx,%ebx ; inc %ebx */ + emit(8, "\xb8\x04\x00\x00\x00\x31\xdb\x43"); + /* lea 4(%esp),%ecx ; mov %ebx,%edx ; int $0x80 ; ret */ + emit(9, "\x8d\x4c\x24\x04\x89\xda\xcd\x80\xc3"); + + save_int(code + 85, codepos - 89); /* entry set to first thing in file */ +} + +void be_finish() +{ + save_int(code + 68, codepos); + save_int(code + 72, codepos); + i = 0; + while (i <= codepos - 1) { + putchar(code[i]); + i = i + 1; + } +} + + +int main() +{ + code_offset = 134512640; /* 0x08048000 */ + be_start(); + nextc = getchar(); + get_token(); + program(); + be_finish(); + return 0; +} diff --git a/test/test2/cc.h b/test/test2/cc.h new file mode 100644 index 0000000..1a4d1c4 --- /dev/null +++ b/test/test2/cc.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2006 Edmund GRIMLEY EVANS + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * A self-compiling compiler for a small subset of C. + */ + +/* Our library functions. */ +void exit(int); +int getchar(void); +void *malloc(int); +int putchar(int); + +/* Our globals */ +int code_offset; +char *code; +int codepos; +int i; +int nextc; diff --git a/test/test2/cc1.c b/test/test2/cc1.c new file mode 100644 index 0000000..5532188 --- /dev/null +++ b/test/test2/cc1.c @@ -0,0 +1,681 @@ +/* + * Copyright (C) 2006 Edmund GRIMLEY EVANS + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "cc.h" + +char *my_realloc(char *old, int oldlen, int newlen) +{ + char *new = malloc(newlen); + int i = 0; + while (i <= oldlen - 1) { + new[i] = old[i]; + i = i + 1; + } + return new; +} + +char *token; +int token_size; + +void error() +{ + exit(1); +} + +void takechar() +{ + if (token_size <= i + 1) { + int x = (i + 10) << 1; + token = my_realloc(token, token_size, x); + token_size = x; + } + token[i] = nextc; + i = i + 1; + nextc = getchar(); +} + +void get_token() +{ + int w = 1; + while (w) { + w = 0; + while ((nextc == ' ') | (nextc == 9) | (nextc == 10)) + nextc = getchar(); + i = 0; + while ((('a' <= nextc) & (nextc <= 'z')) | + (('0' <= nextc) & (nextc <= '9')) | (nextc == '_')) + takechar(); + if (i == 0) + while ((nextc == '<') | (nextc == '=') | (nextc == '>') | + (nextc == '|') | (nextc == '&') | (nextc == '!')) + takechar(); + if (i == 0) { + if (nextc == 39) { + takechar(); + while (nextc != 39) + takechar(); + takechar(); + } + else if (nextc == '"') { + takechar(); + while (nextc != '"') + takechar(); + takechar(); + } + else if (nextc == '/') { + takechar(); + if (nextc == '*') { + nextc = getchar(); + while (nextc != '/') { + while (nextc != '*') + nextc = getchar(); + nextc = getchar(); + } + nextc = getchar(); + w = 1; + } + } + else if (nextc != 0-1) + takechar(); + } + token[i] = 0; + } +} + +int peek(char *s) +{ + int i = 0; + while ((s[i] == token[i]) & (s[i] != 0)) + i = i + 1; + return s[i] == token[i]; +} + +int accept(char *s) +{ + if (peek(s)) { + get_token(); + return 1; + } + else + return 0; +} + +void expect(char *s) +{ + if (accept(s) == 0) + error(); +} + +int code_size; + +void save_int(char *p, int n) +{ + p[0] = n; + p[1] = n >> 8; + p[2] = n >> 16; + p[3] = n >> 24; +} + +int load_int(char *p) +{ + return ((p[0] & 255) + ((p[1] & 255) << 8) + + ((p[2] & 255) << 16) + ((p[3] & 255) << 24)); +} + +void emit(int n, char *s) +{ + i = 0; + if (code_size <= codepos + n) { + int x = (codepos + n) << 1; + code = my_realloc(code, code_size, x); + code_size = x; + } + while (i <= n - 1) { + code[codepos] = s[i]; + codepos = codepos + 1; + i = i + 1; + } +} + +void be_push() +{ + emit(1, "\x50"); /* push %eax */ +} + +void be_pop(int n) +{ + emit(6, "\x81\xc4...."); /* add $(n * 4),%esp */ + save_int(code + codepos - 4, n << 2); +} + +char *table; +int table_size; +int table_pos; +int stack_pos; + +int sym_lookup(char *s) +{ + int t = 0; + int current_symbol = 0; + while (t <= table_pos - 1) { + i = 0; + while ((s[i] == table[t]) & (s[i] != 0)) { + i = i + 1; + t = t + 1; + } + if (s[i] == table[t]) + current_symbol = t; + while (table[t] != 0) + t = t + 1; + t = t + 6; + } + return current_symbol; +} + +void sym_declare(char *s, int type, int value) +{ + int t = table_pos; + i = 0; + while (s[i] != 0) { + if (table_size <= t + 10) { + int x = (t + 10) << 1; + table = my_realloc(table, table_size, x); + table_size = x; + } + table[t] = s[i]; + i = i + 1; + t = t + 1; + } + table[t] = 0; + table[t + 1] = type; + save_int(table + t + 2, value); + table_pos = t + 6; +} + +int sym_declare_global(char *s) +{ + int current_symbol = sym_lookup(s); + if (current_symbol == 0) { + sym_declare(s, 'U', code_offset); + current_symbol = table_pos - 6; + } + return current_symbol; +} + +void sym_define_global(int current_symbol) +{ + int i; + int j; + int t = current_symbol; + int v = codepos + code_offset; + if (table[t + 1] != 'U') + error(); /* symbol redefined */ + i = load_int(table + t + 2) - code_offset; + while (i) { + j = load_int(code + i) - code_offset; + save_int(code + i, v); + i = j; + } + table[t + 1] = 'D'; + save_int(table + t + 2, v); +} + +int number_of_args; + +void sym_get_value(char *s) +{ + int t; + if ((t = sym_lookup(s)) == 0) + error(); + emit(5, "\xb8...."); /* mov $n,%eax */ + save_int(code + codepos - 4, load_int(table + t + 2)); + if (table[t + 1] == 'D') { /* defined global */ + } + else if (table[t + 1] == 'U') /* undefined global */ + save_int(table + t + 2, codepos + code_offset - 4); + else if (table[t + 1] == 'L') { /* local variable */ + int k = (stack_pos - table[t + 2] - 1) << 2; + emit(7, "\x8d\x84\x24...."); /* lea (n * 4)(%esp),%eax */ + save_int(code + codepos - 4, k); + } + else if (table[t + 1] == 'A') { /* argument */ + int k = (stack_pos + number_of_args - table[t + 2] + 1) << 2; + emit(7, "\x8d\x84\x24...."); /* lea (n * 4)(%esp),%eax */ + save_int(code + codepos - 4, k); + } + else + error(); +} + +void promote(int type) +{ + /* 1 = char lval, 2 = int lval, 3 = other */ + if (type == 1) + emit(3, "\x0f\xbe\x00"); /* movsbl (%eax),%eax */ + else if (type == 2) + emit(2, "\x8b\x00"); /* mov (%eax),%eax */ +} + +int expression(); + +/* + * primary-expr: + * identifier + * constant + * ( expression ) + */ +int primary_expr() +{ + int type; + if (('0' <= token[0]) & (token[0] <= '9')) { + int n = 0; + i = 0; + while (token[i]) { + n = (n << 1) + (n << 3) + token[i] - '0'; + i = i + 1; + } + emit(5, "\xb8...."); /* mov $x,%eax */ + save_int(code + codepos - 4, n); + type = 3; + } + else if (('a' <= token[0]) & (token[0] <= 'z')) { + sym_get_value(token); + type = 2; + } + else if (accept("(")) { + type = expression(); + if (peek(")") == 0) + error(); + } + else if ((token[0] == 39) & (token[1] != 0) & + (token[2] == 39) & (token[3] == 0)) { + emit(5, "\xb8...."); /* mov $x,%eax */ + save_int(code + codepos - 4, token[1]); + type = 3; + } + else if (token[0] == '"') { + int i = 0; + int j = 1; + int k; + while (token[j] != '"') { + if ((token[j] == 92) & (token[j + 1] == 'x')) { + if (token[j + 2] <= '9') + k = token[j + 2] - '0'; + else + k = token[j + 2] - 'a' + 10; + k = k << 4; + if (token[j + 3] <= '9') + k = k + token[j + 3] - '0'; + else + k = k + token[j + 3] - 'a' + 10; + token[i] = k; + j = j + 4; + } + else { + token[i] = token[j]; + j = j + 1; + } + i = i + 1; + } + token[i] = 0; + /* call ... ; the string ; pop %eax */ + emit(5, "\xe8...."); + save_int(code + codepos - 4, i + 1); + emit(i + 1, token); + emit(1, "\x58"); + type = 3; + } + else + error(); + get_token(); + return type; +} + +void binary1(int type) +{ + promote(type); + be_push(); + stack_pos = stack_pos + 1; +} + +int binary2(int type, int n, char *s) +{ + promote(type); + emit(n, s); + stack_pos = stack_pos - 1; + return 3; +} + +/* + * postfix-expr: + * primary-expr + * postfix-expr [ expression ] + * postfix-expr ( expression-list-opt ) + */ +int postfix_expr() +{ + int type = primary_expr(); + if (accept("[")) { + binary1(type); /* pop %ebx ; add %ebx,%eax */ + binary2(expression(), 3, "\x5b\x01\xd8"); + expect("]"); + type = 1; + } + else if (accept("(")) { + int s = stack_pos; + be_push(); + stack_pos = stack_pos + 1; + if (accept(")") == 0) { + promote(expression()); + be_push(); + stack_pos = stack_pos + 1; + while (accept(",")) { + promote(expression()); + be_push(); + stack_pos = stack_pos + 1; + } + expect(")"); + } + emit(7, "\x8b\x84\x24...."); /* mov (n * 4)(%esp),%eax */ + save_int(code + codepos - 4, (stack_pos - s - 1) << 2); + emit(2, "\xff\xd0"); /* call *%eax */ + be_pop(stack_pos - s); + stack_pos = s; + type = 3; + } + return type; +} + +/* + * additive-expr: + * postfix-expr + * additive-expr + postfix-expr + * additive-expr - postfix-expr + */ +int additive_expr() +{ + int type = postfix_expr(); + while (1) { + if (accept("+")) { + binary1(type); /* pop %ebx ; add %ebx,%eax */ + type = binary2(postfix_expr(), 3, "\x5b\x01\xd8"); + } + else if (accept("-")) { + binary1(type); /* pop %ebx ; sub %eax,%ebx ; mov %ebx,%eax */ + type = binary2(postfix_expr(), 5, "\x5b\x29\xc3\x89\xd8"); + } + else + return type; + } +} + +/* + * shift-expr: + * additive-expr + * shift-expr << additive-expr + * shift-expr >> additive-expr + */ +int shift_expr() +{ + int type = additive_expr(); + while (1) { + if (accept("<<")) { + binary1(type); /* mov %eax,%ecx ; pop %eax ; shl %cl,%eax */ + type = binary2(additive_expr(), 5, "\x89\xc1\x58\xd3\xe0"); + } + else if (accept(">>")) { + binary1(type); /* mov %eax,%ecx ; pop %eax ; sar %cl,%eax */ + type = binary2(additive_expr(), 5, "\x89\xc1\x58\xd3\xf8"); + } + else + return type; + } +} + +/* + * relational-expr: + * shift-expr + * relational-expr <= shift-expr + */ +int relational_expr() +{ + int type = shift_expr(); + while (accept("<=")) { + binary1(type); + /* pop %ebx ; cmp %eax,%ebx ; setle %al ; movzbl %al,%eax */ + type = binary2(shift_expr(), + 9, "\x5b\x39\xc3\x0f\x9e\xc0\x0f\xb6\xc0"); + } + return type; +} + +/* + * equality-expr: + * relational-expr + * equality-expr == relational-expr + * equality-expr != relational-expr + */ +int equality_expr() +{ + int type = relational_expr(); + while (1) { + if (accept("==")) { + binary1(type); + /* pop %ebx ; cmp %eax,%ebx ; sete %al ; movzbl %al,%eax */ + type = binary2(relational_expr(), + 9, "\x5b\x39\xc3\x0f\x94\xc0\x0f\xb6\xc0"); + } + else if (accept("!=")) { + binary1(type); + /* pop %ebx ; cmp %eax,%ebx ; setne %al ; movzbl %al,%eax */ + type = binary2(relational_expr(), + 9, "\x5b\x39\xc3\x0f\x95\xc0\x0f\xb6\xc0"); + } + else + return type; + } +} + +/* + * bitwise-and-expr: + * equality-expr + * bitwise-and-expr & equality-expr + */ +int bitwise_and_expr() +{ + int type = equality_expr(); + while (accept("&")) { + binary1(type); /* pop %ebx ; and %ebx,%eax */ + type = binary2(equality_expr(), 3, "\x5b\x21\xd8"); + } + return type; +} + +/* + * bitwise-or-expr: + * bitwise-and-expr + * bitwise-and-expr | bitwise-or-expr + */ +int bitwise_or_expr() +{ + int type = bitwise_and_expr(); + while (accept("|")) { + binary1(type); /* pop %ebx ; or %ebx,%eax */ + type = binary2(bitwise_and_expr(), 3, "\x5b\x09\xd8"); + } + return type; +} + +/* + * expression: + * bitwise-or-expr + * bitwise-or-expr = expression + */ +int expression() +{ + int type = bitwise_or_expr(); + if (accept("=")) { + be_push(); + stack_pos = stack_pos + 1; + promote(expression()); + if (type == 2) + emit(3, "\x5b\x89\x03"); /* pop %ebx ; mov %eax,(%ebx) */ + else + emit(3, "\x5b\x88\x03"); /* pop %ebx ; mov %al,(%ebx) */ + stack_pos = stack_pos - 1; + type = 3; + } + return type; +} + +/* + * type-name: + * char * + * int + */ +void type_name() +{ + get_token(); + while (accept("*")) { + } +} + +/* + * statement: + * { statement-list-opt } + * type-name identifier ; + * type-name identifier = expression; + * if ( expression ) statement + * if ( expression ) statement else statement + * while ( expression ) statement + * return ; + * expr ; + */ +void statement() +{ + int p1; + int p2; + if (accept("{")) { + int n = table_pos; + int s = stack_pos; + while (accept("}") == 0) + statement(); + table_pos = n; + be_pop(stack_pos - s); + stack_pos = s; + } + else if (peek("char") | peek("int")) { + type_name(); + sym_declare(token, 'L', stack_pos); + get_token(); + if (accept("=")) + promote(expression()); + expect(";"); + be_push(); + stack_pos = stack_pos + 1; + } + else if (accept("if")) { + expect("("); + promote(expression()); + emit(8, "\x85\xc0\x0f\x84...."); /* test %eax,%eax ; je ... */ + p1 = codepos; + expect(")"); + statement(); + emit(5, "\xe9...."); /* jmp ... */ + p2 = codepos; + save_int(code + p1 - 4, codepos - p1); + if (accept("else")) + statement(); + save_int(code + p2 - 4, codepos - p2); + } + else if (accept("while")) { + expect("("); + p1 = codepos; + promote(expression()); + emit(8, "\x85\xc0\x0f\x84...."); /* test %eax,%eax ; je ... */ + p2 = codepos; + expect(")"); + statement(); + emit(5, "\xe9...."); /* jmp ... */ + save_int(code + codepos - 4, p1 - codepos); + save_int(code + p2 - 4, codepos - p2); + } + else if (accept("return")) { + if (peek(";") == 0) + promote(expression()); + expect(";"); + be_pop(stack_pos); + emit(1, "\xc3"); /* ret */ + } + else { + expression(); + expect(";"); + } +} + +/* + * program: + * declaration + * declaration program + * + * declaration: + * type-name identifier ; + * type-name identifier ( parameter-list ) ; + * type-name identifier ( parameter-list ) statement + * + * parameter-list: + * parameter-declaration + * parameter-list, parameter-declaration + * + * parameter-declaration: + * type-name identifier-opt + */ +void program() +{ + int current_symbol; + while (token[0]) { + type_name(); + current_symbol = sym_declare_global(token); + get_token(); + if (accept(";")) { + sym_define_global(current_symbol); + emit(4, "\x00\x00\x00\x00"); + } + else if (accept("(")) { + int n = table_pos; + number_of_args = 0; + while (accept(")") == 0) { + number_of_args = number_of_args + 1; + type_name(); + if (peek(")") == 0) { + sym_declare(token, 'A', number_of_args); + get_token(); + } + accept(","); /* ignore trailing comma */ + } + if (accept(";") == 0) { + sym_define_global(current_symbol); + statement(); + emit(1, "\xc3"); /* ret */ + } + table_pos = n; + } + else + error(); + } +} diff --git a/test/test2/cleanup.sh b/test/test2/cleanup.sh new file mode 100755 index 0000000..083e6ad --- /dev/null +++ b/test/test2/cleanup.sh @@ -0,0 +1,6 @@ +#! /bin/sh +rm -f test/test2/cc0.M1 +rm -f test/test2/cc0.hex2 +rm -f test/test2/cc1 +rm -f test/test2/cc2 +exit 0 diff --git a/test/test2/hello.sh b/test/test2/hello.sh new file mode 100755 index 0000000..2bf379e --- /dev/null +++ b/test/test2/hello.sh @@ -0,0 +1,26 @@ +#! /bin/sh +set -ex +# Build the test +bin/M2-Planet -f test/test2/cc.h -f test/test2/cc1.c -f test/test2/cc.c -o test/test2/cc0.M1 || exit 1 +# Macro assemble with libc written in M1-Macro +M1 -f test/common_x86/x86_defs.M1 -f test/common_x86/libc.M1 -f test/test2/cc0.M1 --LittleEndian --Architecture 1 -o test/test2/cc0.hex2 || exit 2 +# Resolve all linkages +hex2 -f test/common_x86/ELF-i386.hex2 -f test/test2/cc0.hex2 --LittleEndian --Architecture 1 --BaseAddress 0x8048000 -o test/results/test2-binary --exec_enable || exit 3 + +# Ensure binary works if host machine supports test +if [ "$(get_machine)" = "x86_64" ] +then + # Verify that the compiled program can compile itself + ./test/results/test2-binary < test/test0/cc500.c >| test/test2/cc1 || exit 4 + out=$(sha256sum -c test/test2/proof0.answer) + [ "$out" = "test/test2/cc1: OK" ] || exit 5 + + # Make it executable + exec_enable test/test2/cc1 + + # Verify that the result of it compiling itself can compile itself + ./test/test2/cc1 < test/test0/cc500.c >| test/test2/cc2 || exit 6 + out=$(sha256sum -c test/test2/proof1.answer) + [ "$out" = "test/test2/cc2: OK" ] || exit 7 +fi +exit 0 diff --git a/test/test2/proof0.answer b/test/test2/proof0.answer new file mode 100644 index 0000000..a1b87d1 --- /dev/null +++ b/test/test2/proof0.answer @@ -0,0 +1 @@ +3987b6a29775e015c11e35008ad0d0b3ee57f32655dc26f2360000b345139f54 test/test2/cc1 diff --git a/test/test2/proof1.answer b/test/test2/proof1.answer new file mode 100644 index 0000000..5bf6885 --- /dev/null +++ b/test/test2/proof1.answer @@ -0,0 +1 @@ +3987b6a29775e015c11e35008ad0d0b3ee57f32655dc26f2360000b345139f54 test/test2/cc2