diff --git a/cc-minimal.c b/cc-minimal.c
new file mode 100644
index 0000000..62ec871
--- /dev/null
+++ b/cc-minimal.c
@@ -0,0 +1,48 @@
+/* Copyright (C) 2016 Jeremiah Orians
+ * This file is part of stage0.
+ *
+ * stage0 is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * stage0 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with stage0. If not, see .
+ */
+
+#include "cc.h"
+/* The core functions */
+void initialize_types();
+struct token_list* read_all_tokens(FILE* a, struct token_list* current);
+struct token_list* reverse_list(struct token_list* head);
+struct token_list* program(struct token_list* out);
+void recursive_output(FILE* out, struct token_list* i);
+
+/* Our essential organizer */
+int main(int argc, char **argv)
+{
+ if (argc < 3)
+ {
+ fprintf(stderr, "We require more arguments\n");
+ exit(EXIT_FAILURE);
+ }
+
+ initialize_types();
+ FILE* input = fopen(argv[1], "r");
+ global_token = reverse_list(read_all_tokens(input, NULL));
+ struct token_list* output_list = program(NULL);
+ FILE* output = fopen(argv[2], "w");
+ fprintf(output, "\n# Core program\n\n");
+ recursive_output(output, output_list);
+ fprintf(output, "\n# Program global variables\n\n");
+ recursive_output(output, globals_list);
+ fprintf(output, "\n# Program strings\n\n");
+ recursive_output(output, strings_list);
+ fclose(output);
+ return 0;
+}
diff --git a/cc.c b/cc.c
index ef932c2..f583962 100644
--- a/cc.c
+++ b/cc.c
@@ -16,1054 +16,107 @@
*/
#include "cc.h"
-#include
+#include
+
+/* The core functions */
+void initialize_types();
+struct token_list* read_all_tokens(FILE* a, struct token_list* current);
+struct token_list* reverse_list(struct token_list* head);
+struct token_list* program(struct token_list* out);
+void recursive_output(FILE* out, struct token_list* i);
+
+#if !__MESC__
+static
+#endif
+struct option long_options[] = {
+ {"file", required_argument, 0, 'f'},
+ {"output", required_argument, 0, 'o'},
+ {"help", no_argument, 0, 'h'},
+ {"version", no_argument, 0, 'V'},
+ {0, 0, 0, 0}
+};
-/* Global lists */
-struct type* global_types;
-struct token_list* global_symbol_list;
-struct token_list* global_function_list;
-struct token_list* global_constant_list;
-
-/* What we are currently working on */
-struct token_list* global_token;
-struct token_list* current_target;
-
-/* Output reorder collections*/
-struct token_list* strings_list;
-struct token_list* globals_list;
-
-/* Imported functions */
-struct token_list* read_all_tokens(char* source_file);
-char* parse_string(char* string);
-
-struct token_list* emit(char *s, struct token_list* head)
-{
- struct token_list* t = calloc(1, sizeof(struct token_list));
- t->next = head;
- t->s = s;
- return t;
-}
-
-struct token_list* double_emit(char* a, char* b, struct token_list* out, int flag)
-{
- out = emit(a, out);
- out = emit(b, out);
- if(flag) out = emit("\n", out);
- return out;
-}
-
-char* numerate_number(int a)
-{
- char* result = calloc(16, sizeof(char));
- int i = 0;
-
- /* Deal with Zero case */
- if(0 == a)
- {
- result[0] = '0';
- result[1] = '\n';
- return result;
- }
-
- /* Deal with negatives */
- if(0 > a)
- {
- result[0] = '-';
- i = 1;
- a = a * -1;
- }
-
- /* Using the largest 10^n number possible in 32bits */
- int divisor = 0x3B9ACA00;
- /* Skip leading Zeros */
- while(0 == (a / divisor)) divisor = divisor / 10;
-
- /* Now simply collect numbers until divisor is gone */
- while(0 < divisor)
- {
- result[i] = ((a / divisor) + 48);
- a = a % divisor;
- divisor = divisor / 10;
- i = i + 1;
- }
-
- result[i] = '\n';
- return result;
-}
-
-struct token_list* sym_declare(char *s, struct type* t, struct token_list* list)
-{
- struct token_list* a = calloc(1, sizeof(struct token_list));
- a->next = list;
- a->s = s;
- a->type = t;
- return a;
-}
-
-struct token_list* sym_lookup(char *s, struct token_list* symbol_list)
-{
- for(struct token_list* i = symbol_list; NULL != i; i = i->next)
- {
- if(0 == strcmp(s,i->s)) return i;
- }
- return NULL;
-}
-
-int stack_index(struct token_list* a, struct token_list* function)
-{
- int depth = 4 * function->temps;
- for(struct token_list* i = function->locals; NULL != i; i = i->next)
- {
- if(i == a) return depth;
- else depth = depth + 4;
- }
-
- /* Deal with offset caused by return pointer */
- depth = depth+ 4;
-
- for(struct token_list* i = function->arguments; NULL != i; i = i->next)
- {
- if(i == a) return depth;
- else depth = depth + 4;
- }
-
- fprintf(stderr, "%s does not exist in function %s\n", a->s, function->s);
- exit(EXIT_FAILURE);
-}
-
-struct token_list* sym_get_value(char *s, struct token_list* out, struct token_list* function)
-{
- global_token = global_token->next;
- struct token_list* a = sym_lookup(s, global_constant_list);
- if(NULL != a)
- {
- out = double_emit("LOAD_IMMEDIATE_eax %", a->arguments->s, out, true); return out;
- }
-
- a= sym_lookup(s, global_function_list);
- if(NULL != a)
- {
- return out;
- }
-
- a= sym_lookup(s, function->locals);
- if(NULL != a)
- {
- current_target = a;
- out = double_emit("LOAD_EFFECTIVE_ADDRESS %", numerate_number(stack_index(a, function)), out, false);
- if(strcmp(global_token->s, "=")) out = emit("LOAD_INTEGER\n", out);
- return out;
- }
- a = sym_lookup(s, function->arguments);
-
- if(NULL != a)
- {
- current_target = a;
- out = double_emit("LOAD_EFFECTIVE_ADDRESS %", numerate_number(stack_index(a, function)), out, false);
- if(strcmp(global_token->s, "=")) out = emit("LOAD_INTEGER\n", out);
- return out;
- }
-
- a = sym_lookup(s, global_symbol_list);
- if(NULL != a)
- {
- current_target = a;
- out = double_emit("LOAD_IMMEDIATE_eax &GLOBAL_", s, out, true);
- if(strcmp(global_token->s, "=")) out = emit("LOAD_INTEGER\n", out);
- return out;
- }
-
- fprintf(stderr, "%s is not a defined symbol\n", s);
- exit(EXIT_FAILURE);
-}
-
-void require_char(char* message, char required)
-{
- if(global_token->s[0] != required)
- {
- fprintf(stderr, "%s", message);
- exit(EXIT_FAILURE);
- }
- global_token = global_token->next;
-}
-
-struct token_list* expression(struct token_list* out, struct token_list* function);
-
-/*
- * primary-expr:
- * identifier
- * constant
- * ( expression )
- */
-struct token_list* primary_expr(struct token_list* out, struct token_list* function)
-{
- if(('0' <= global_token->s[0]) & (global_token->s[0] <= '9'))
- {
- out = double_emit("LOAD_IMMEDIATE_eax %", global_token->s, out, true);
- global_token = global_token->next;
- }
- else if((('a' <= global_token->s[0]) & (global_token->s[0] <= 'z')) | (('A' <= global_token->s[0]) & (global_token->s[0] <= 'Z')))
- {
- out = sym_get_value(global_token->s, out, function);
- }
- else if(global_token->s[0] == '(')
- {
- global_token = global_token->next;
- out = expression(out, function);
- require_char("Error in Primary expression\nDidn't get )\n", ')');
- }
- else if(global_token->s[0] == '\'')
- {
- out = emit("LOAD_IMMEDIATE_eax %", out);
- out = emit(numerate_number(global_token->s[1]), out);
- global_token = global_token->next;
- }
- else if(global_token->s[0] == '"')
- {
- static int string_num;
- char* number_string = numerate_number(string_num);
- out = emit("LOAD_IMMEDIATE_eax &STRING_", out);
- out = emit(number_string, out);
-
- /* The target */
- strings_list = emit(":STRING_", strings_list);
- strings_list = emit(number_string, strings_list);
-
- /* Parse the string */
- strings_list = emit(parse_string(global_token->s), strings_list);
- global_token = global_token->next;
-
- string_num = string_num + 1;
- }
- else
- {
- fprintf(stderr, "Recieved %s in primary_expr\n", global_token->s);
- exit(EXIT_FAILURE);
- }
-
- return out;
-}
-
-/* Deal with Expression lists */
-struct token_list* process_expression_list(struct token_list* out, struct token_list* function)
-{
- char* func = global_token->prev->s;
- global_token = global_token->next;
- int temp = function->temps;
-
- if(global_token->s[0] != ')')
- {
- out = expression(out, function);
- out = emit("PUSH_eax\t#_process_expression1\n", out);
- function->temps = function->temps + 1;
-
- while(global_token->s[0] == ',')
- {
- global_token = global_token->next;
- out = expression(out, function);
- out = emit("PUSH_eax\t#_process_expression2\n", out);
- function->temps = function->temps + 1;
- }
- require_char("ERROR in process_expression_list\nNo ) was found\n", ')');
- }
- else global_token = global_token->next;
-
- out = double_emit("CALL_IMMEDIATE %FUNCTION_", func, out, true);
-
- for(int i = function->temps - temp; 0 != i; i = i - 1)
- {
- out = emit("POP_ebx\t# _process_expression_locals\n", out);
- }
-
- function->temps = temp;
- return out;
-}
-
-struct token_list* common_recursion(struct token_list* (*function) (struct token_list*, struct token_list*), struct token_list* out, struct token_list* func)
-{
- global_token = global_token->next;
- out = emit("PUSH_eax\t#_common_recursion\n", out);
- func->temps = func->temps + 1;
- out = function(out, func);
- func->temps = func->temps - 1;
- out = emit("POP_ebx\t# _common_recursion\n", out);
- return out;
-}
-
-int ceil_log2(int a)
-{
- int result = 0;
- if((a & (a - 1)) == 0)
- {
- result = -1;
- }
-
- while(a > 0)
- {
- result = result + 1;
- a = a >> 1;
- }
-
- return result;
-}
-
-/*
- * postfix-expr:
- * primary-expr
- * postfix-expr [ expression ]
- * postfix-expr ( expression-list-opt )
- * postfix-expr -> member
- */
-struct token_list* postfix_expr(struct token_list* out, struct token_list* function)
-{
- out = primary_expr(out, function);
-
- while(1)
- {
- if(global_token->s[0] == '[')
- {
- struct token_list* target = current_target;
- struct type* a = current_target->type;
- out = common_recursion(expression, out, function);
-
- /* Add support for Ints */
- if( 1 != a->indirect->size)
- {
- out = double_emit("SAL_eax_Immediate8 !", numerate_number(ceil_log2(a->indirect->size)), out, false);
- }
-
- out = emit("ADD_ebx_to_eax\n", out);
- current_target = target;
-
- if(strcmp(global_token->next->s, "="))
- {
- if( 4 == a->indirect->size)
- {
- out = emit("LOAD_INTEGER\n", out);
- }
- else
- {
- out = emit("LOAD_BYTE\n", out);
- }
- }
- require_char("ERROR in postfix_expr\nMissing ]\n", ']');
- }
- else if(global_token->s[0] == '(')
- {
- out = process_expression_list(out, function);
- }
- else if(!strcmp("->", global_token->s))
- {
- out = emit("# looking up offset\n", out);
- global_token = global_token->next;
- struct type* i;
- for(i = current_target->type->members; NULL != i; i = i->members)
- {
- if(!strcmp(i->name, global_token->s)) break;
- }
- if(NULL == i)
- {
- fprintf(stderr, "ERROR in postfix_expr %s->%s does not exist\n", current_target->type->name, global_token->s);
- exit(EXIT_FAILURE);
- }
- if(0 != i->offset)
- {
- out = emit("# -> offset calculation\n", out);
- out = double_emit("LOAD_IMMEDIATE_ebx %", numerate_number(i->offset), out, false);
- out = emit("ADD_ebx_to_eax\n", out);
- }
- if(strcmp(global_token->next->s, "="))
- {
- out = emit("LOAD_INTEGER\n", out);
- }
- global_token = global_token->next;
- }
- else return out;
- }
-}
-
-/*
- * additive-expr:
- * postfix-expr
- * additive-expr + postfix-expr
- * additive-expr - postfix-expr
- */
-struct token_list* additive_expr(struct token_list* out, struct token_list* function)
-{
- out = postfix_expr(out, function);
-
- while(1)
- {
- if(global_token->s[0] == '+')
- {
- out = common_recursion(postfix_expr, out, function);
- out = emit("ADD_ebx_to_eax\n", out);
- }
- else if(global_token->s[0] == '-')
- {
- out = common_recursion(postfix_expr, out, function);
- out = emit("SUBTRACT_eax_from_ebx_into_ebx\nMOVE_ebx_to_eax\n", out);
- }
- else return out;
- }
-}
-
-/*
- * shift-expr:
- * additive-expr
- * shift-expr << additive-expr
- * shift-expr >> additive-expr
- */
-struct token_list* shift_expr(struct token_list* out, struct token_list* function)
-{
- out = additive_expr(out, function);
-
- while(1)
- {
- if(!strcmp(global_token->s, "<<"))
- {
- out = common_recursion(additive_expr, out, function);
- // Ugly hack to Work around flaw in x86
- struct token_list* old = out->next;
- free(out);
- out = emit("COPY_eax_to_ecx\nPOP_eax\nSAL_eax_cl\n", old);
- }
- else if(!strcmp(global_token->s, ">>"))
- {
- out = common_recursion(additive_expr, out, function);
- // Ugly hack to Work around flaw in x86
- struct token_list* old = out->next;
- free(out);
- out = emit("COPY_eax_to_ecx\nPOP_eax\nSAR_eax_cl\n", old);
- }
- else
- {
- return out;
- }
- }
-}
-
-/*
- * relational-expr:
- * shift-expr
- * relational-expr < shift-expr
- * relational-expr <= shift-expr
- * relational-expr >= shift-expr
- * relational-expr > shift-expr
- */
-struct token_list* relational_expr(struct token_list* out, struct token_list* function)
-{
- out = shift_expr(out, function);
-
- while(1)
- {
- if(!strcmp(global_token->s, "<"))
- {
- out = common_recursion(shift_expr, out, function);
- out = emit("CMP\nSETL\nMOVEZBL\n", out);
- }
- else if(!strcmp(global_token->s, "<="))
- {
- out = common_recursion(shift_expr, out, function);
- out = emit("CMP\nSETLE\nMOVEZBL\n", out);
- }
- else if(!strcmp(global_token->s, ">="))
- {
- out = common_recursion(shift_expr, out, function);
- out = emit("CMP\nSETGE\nMOVEZBL\n", out);
- }
- else if(!strcmp(global_token->s, ">"))
- {
- out = common_recursion(shift_expr, out, function);
- out = emit("CMP\nSETG\nMOVEZBL\n", out);
- }
- else return out;
- }
-}
-
-/*
- * equality-expr:
- * relational-expr
- * equality-expr == relational-expr
- * equality-expr != relational-expr
- */
-struct token_list* equality_expr(struct token_list* out, struct token_list* function)
-{
- out = relational_expr(out, function);
-
- while(1)
- {
- if(!strcmp(global_token->s, "=="))
- {
- out = common_recursion(relational_expr, out, function);
- out = emit("CMP\nSETE\nMOVEZBL\n", out);
- }
- else if(!strcmp(global_token->s, "!="))
- {
- out = common_recursion(relational_expr, out, function);
- out = emit("CMP\nSETNE\nMOVEZBL\n", out);
- }
- else return out;
- }
-}
-
-/*
- * bitwise-and-expr:
- * equality-expr
- * bitwise-and-expr & equality-expr
- */
-struct token_list* bitwise_and_expr(struct token_list* out, struct token_list* function)
-{
- out = equality_expr(out, function);
-
- while(global_token->s[0] == '&')
- {
- out = common_recursion(equality_expr, out, function);
- out = emit("AND_eax_ebx\n", out);
- }
- return out;
-}
-
-/*
- * bitwise-or-expr:
- * bitwise-and-expr
- * bitwise-and-expr | bitwise-or-expr
- */
-struct token_list* bitwise_or_expr(struct token_list* out, struct token_list* function)
-{
- out = bitwise_and_expr(out, function);
-
- while(global_token->s[0] == '|')
- {
- out = common_recursion(bitwise_and_expr, out, function);
- out = emit("OR_eax_ebx\n", out);
- }
- return out;
-}
-
-/*
- * expression:
- * bitwise-or-expr
- * bitwise-or-expr = expression
- */
-struct token_list* expression(struct token_list* out, struct token_list* function)
-{
- out = bitwise_or_expr(out, function);
-
- if(global_token->s[0] == '=')
- {
- struct token_list* target = current_target;
- bool member = !strcmp(global_token->prev->s, "]");
- out = common_recursion(expression, out, function);
-
- if(member)
- {
- if(1 == target->type->indirect->size) out = emit("STORE_CHAR\n", out);
- else if(4 == target->type->indirect->size)
- {
- out = emit("STORE_INTEGER\n", out);
- }
- }
- else
- {
- out = emit("STORE_INTEGER\n", out);
- }
- }
- return out;
-}
-
-struct type* lookup_type(char* s)
-{
- for(struct type* i = global_types; NULL != i; i = i->next)
- {
- if(!strcmp(i->name, s))
- {
- return i;
- }
- }
- return NULL;
-}
-
-struct type* type_name();
-void create_struct()
-{
- int offset = 0;
- struct type* head = calloc(1, sizeof(struct type));
- struct type* i = calloc(1, sizeof(struct type));
- head->name = global_token->s;
- i->name = global_token->s;
- head->indirect = i;
- i->indirect = head;
- head->next = global_types;
- global_types = head;
- global_token = global_token->next;
- i->size = 4;
- require_char("ERROR in create_struct\nMissing {\n", '{');
- struct type* last = NULL;
- while('}' != global_token->s[0])
- {
- struct type* member_type = type_name();
- i = calloc(1, sizeof(struct type));
- i->name = global_token->s;
- i->members = last;
- i->size = member_type->size;
- i->offset = offset;
- offset = offset + member_type->size;
- global_token = global_token->next;
- require_char("ERROR in create_struct\nMissing ;\n", ';');
- last = i;
- }
-
- global_token = global_token->next;
- require_char("ERROR in create_struct\nMissing ;\n", ';');
-
- head->size = offset;
- head->members = last;
- head->indirect->members = last;
-}
-
-
-/*
- * type-name:
- * char *
- * int
- */
-struct type* type_name()
-{
- int structure = false;
-
- if(!strcmp(global_token->s, "struct"))
- {
- structure = true;
- global_token = global_token->next;
- }
-
- struct type* ret = lookup_type(global_token->s);
-
- if(NULL == ret && !structure)
- {
- fprintf(stderr, "Unknown type %s\n", global_token->s);
- exit(EXIT_FAILURE);
- }
- else if(NULL == ret)
- {
- create_struct();
- return NULL;
- }
-
- global_token = global_token->next;
-
- while(global_token->s[0] == '*')
- {
- ret = ret->indirect;
- global_token = global_token->next;
- }
-
- return ret;
-}
-
-/* Process local variable */
-struct token_list* collect_local(struct token_list* out, struct token_list* function)
-{
- struct type* type_size = type_name();
- out = double_emit("# Defining local ", global_token->s, out, true);
-
- struct token_list* a = sym_declare(global_token->s, type_size, function->locals);
- function->locals = a;
- global_token = global_token->next;
- function->temps = function->temps - 1;
-
- if(global_token->s[0] == '=')
- {
- global_token = global_token->next;
- out = expression(out, function);
- }
- function->temps = function->temps + 1;
-
- require_char("ERROR in collect_local\nMissing ;\n", ';');
-
- out = double_emit("PUSH_eax\t#", a->s, out, true);
- return out;
-}
-
-struct token_list* statement(struct token_list* out, struct token_list* function);
-
-/* Evaluate if statements */
-int if_count;
-struct token_list* process_if(struct token_list* out, struct token_list* function)
-{
- char* number_string = numerate_number(if_count);
- if_count = if_count + 1;
-
- out = double_emit("# IF_",number_string, out, false);
-
- global_token = global_token->next;
- require_char("ERROR in process_if\nMISSING (\n", '(');
- out = expression(out, function);
-
- out = double_emit("TEST\nJUMP_EQ %ELSE_", number_string, out, false);
-
- require_char("ERROR in process_if\nMISSING )\n", ')');
- out = statement(out, function);
-
- out = double_emit("JUMP %_END_IF_", number_string, out, false);
- out = double_emit(":ELSE_", number_string, out, false);
-
- if(!strcmp(global_token->s, "else"))
- {
- global_token = global_token->next;
- out = statement(out, function);
- }
- out = double_emit(":_END_IF_", number_string, out, false);
- return out;
-}
-
-int for_count;
-struct token_list* process_for(struct token_list* out, struct token_list* function)
-{
- char* number_string = numerate_number(for_count);
- for_count = for_count + 1;
-
- out = double_emit("# FOR_initialization_", number_string, out, false);
-
- global_token = global_token->next;
-
- require_char("ERROR in process_for\nMISSING (\n", '(');
- out = expression(out, function);
-
- out = double_emit(":FOR_", number_string, out , false);
-
- require_char("ERROR in process_for\nMISSING ;1\n", ';');
- out = expression(out, function);
-
- out = double_emit("TEST\nJUMP_EQ %FOR_END_", number_string, out, false);
- out = double_emit("JUMP %FOR_THEN_", number_string, out, false);
- out = double_emit(":FOR_ITER_", number_string, out, false);
-
- require_char("ERROR in process_for\nMISSING ;2\n", ';');
- out = expression(out, function);
-
- out = double_emit("JUMP %FOR_", number_string, out, false);
- out = double_emit(":FOR_THEN_", number_string, out, false);
-
- require_char("ERROR in process_for\nMISSING )\n", ')');
- out = statement(out, function);
-
- out = double_emit("JUMP %FOR_ITER_", number_string, out, false);
- out = double_emit(":FOR_END_", number_string, out, false);
- return out;
-}
-
-/* Process Assembly statements */
-struct token_list* process_asm(struct token_list* out)
-{
- global_token = global_token->next;
- require_char("ERROR in process_asm\nMISSING (\n", '(');
- while('"' == global_token->s[0])
- {
- out = emit((global_token->s + 1), out);
- out = emit("\n", out);
- global_token = global_token->next;
- }
- require_char("ERROR in process_asm\nMISSING )\n", ')');
- require_char("ERROR in process_asm\nMISSING ;\n", ';');
- return out;
-}
-
-/* Process while loops */
-int while_count;
-struct token_list* process_while(struct token_list* out, struct token_list* function)
-{
- char* number_string = numerate_number(while_count);
- while_count = while_count + 1;
-
- out = double_emit(":WHILE_", number_string, out, false);
-
- global_token = global_token->next;
- require_char("ERROR in process_while\nMISSING (\n", '(');
- out = expression(out, function);
-
- out = double_emit("TEST\nJUMP_EQ %END_WHILE_", number_string, out, false);
- out = double_emit("# THEN_while_", number_string, out, false);
-
- require_char("ERROR in process_while\nMISSING )\n", ')');
- out = statement(out, function);
-
- out = double_emit("JUMP %WHILE_", number_string, out, false);
- out = double_emit(":END_WHILE_", number_string, out, false);
- return out;
-}
-
-/* Ensure that functions return */
-struct token_list* return_result(struct token_list* out, struct token_list* function)
-{
- global_token = global_token->next;
- if(global_token->s[0] != ';') out = expression(out, function);
-
- require_char("ERROR in return_result\nMISSING ;\n", ';');
-
- for(struct token_list* i = function->locals; NULL != i; i = i->next)
- {
- out = emit("POP_ebx\t# _return_result_locals\n", out);
- function->locals = function->locals->next;
- }
- out = emit("RETURN\n", out);
- return out;
-}
-
-struct token_list* recursive_statement(struct token_list* out, struct token_list* function)
-{
- global_token = global_token->next;
- struct token_list* frame = function->locals;
-
- while(strcmp(global_token->s, "}"))
- {
- out = statement(out, function);
- }
- global_token = global_token->next;
-
- /* Clean up any locals added */
- if(NULL != function->locals)
- {
- for(struct token_list* i = function->locals; frame != i; i = i->next)
- {
- out = emit( "POP_ebx\t# _recursive_statement_locals\n", out);
- function->locals = function->locals->next;
- }
- }
- return out;
-}
-
-/*
- * statement:
- * { statement-list-opt }
- * type-name identifier ;
- * type-name identifier = expression;
- * if ( expression ) statement
- * if ( expression ) statement else statement
- * while ( expression ) statement
- * for ( expression ; expression ; expression ) statement
- * asm ( "assembly" ... "assembly" ) ;
- * return ;
- * expr ;
- */
-struct token_list* statement(struct token_list* out, struct token_list* function)
-{
- if(global_token->s[0] == '{')
- {
- out = recursive_statement(out, function);
- }
- else if((NULL != lookup_type(global_token->s)) || !strcmp("struct", global_token->s))
- {
- out = collect_local(out, function);
- }
- else if(!strcmp(global_token->s, "if"))
- {
- out = process_if(out, function);
- }
- else if(!strcmp(global_token->s, "while"))
- {
- out = process_while(out, function);
- }
- else if(!strcmp(global_token->s, "for"))
- {
- out = process_for(out, function);
- }
- else if(!strcmp(global_token->s, "asm"))
- {
- out = process_asm(out);
- }
- else if(!strcmp(global_token->s, "return"))
- {
- out = return_result(out, function);
- }
- else
- {
- out = expression(out, function);
- require_char("ERROR in statement\nMISSING ;\n", ';');
- }
- return out;
-}
-
-/* Collect function arguments */
-void collect_arguments(struct token_list* function)
-{
- global_token = global_token->next;
-
- while(strcmp(global_token->s, ")"))
- {
- struct type* type_size = type_name();
- if(global_token->s[0] == ')')
- {
- /* deal with foo(int|char|void) */
- global_token = global_token->prev;
- }
- else if(global_token->s[0] != ',')
- {
- /* deal with foo(int a, char b) */
- struct token_list* a = sym_declare(global_token->s, type_size, function->arguments);
- function->arguments = a;
- }
-
- /* foo(int,char,void) doesn't need anything done */
- global_token = global_token->next;
-
- /* ignore trailing comma (needed for foo(bar(), 1); expressions*/
- if(global_token->s[0] == ',') global_token = global_token->next;
- }
- global_token = global_token->next;
-}
-
-struct token_list* declare_function(struct token_list* out, struct type* type)
-{
- char* essential = global_token->prev->s;
- struct token_list* func = sym_declare(global_token->prev->s, calloc(1, sizeof(struct type)), global_function_list);
- func->type = type;
- collect_arguments(func);
-
- /* allow previously defined functions to be looked up */
- global_function_list = func;
-
- /* If just a prototype don't waste time */
- if(global_token->s[0] == ';') global_token = global_token->next;
- else
- {
- out = double_emit("# Defining function ", essential, out, true);
- out = double_emit(":FUNCTION_", essential, out, true);
- out = statement(out, func);
-
- /* Prevent duplicate RETURNS */
- if(strcmp(out->s, "RETURN\n"))
- {
- out = emit("RETURN\n", out);
- }
- }
- return out;
-}
-
-/*
- * program:
- * declaration
- * declaration program
- *
- * declaration:
- * CONSTANT identifer value
- * type-name identifier ;
- * type-name identifier ( parameter-list ) ;
- * type-name identifier ( parameter-list ) statement
- *
- * parameter-list:
- * parameter-declaration
- * parameter-list, parameter-declaration
- *
- * parameter-declaration:
- * type-name identifier-opt
- */
-struct token_list* program(struct token_list* out)
-{
- while(NULL != global_token->next)
- {
-new_type:
- if(!strcmp(global_token->s, "CONSTANT"))
- {
- global_constant_list = sym_declare(global_token->next->s, NULL, global_constant_list);
- global_constant_list->arguments = global_token->next->next;
- global_token = global_token->next->next->next;
- }
- else
- {
- struct type* type_size = type_name();
- if(NULL == type_size)
- {
- goto new_type;
- }
- global_token = global_token->next;
- if(global_token->s[0] == ';')
- {
- /* Add to global symbol table */
- global_symbol_list = sym_declare(global_token->prev->s, type_size, global_symbol_list);
-
- /* Ensure 4 bytes are allocated for the global */
- globals_list = double_emit(":GLOBAL_", global_token->prev->s, globals_list, true);
- globals_list = emit("NOP\n", globals_list);
-
- global_token = global_token->next;
- }
- else if(global_token->s[0] == '(') out = declare_function(out, type_size);
- else
- {
- fprintf(stderr, "Recieved %s in program\n", global_token->s);
- exit(EXIT_FAILURE);
- }
- }
- }
- return out;
-}
-
-void recursive_output(FILE* out, struct token_list* i)
-{
- if(NULL == i) return;
- recursive_output(out, i->next);
- fprintf(out, "%s", i->s);
-}
-
-/* Initialize default types */
-void initialize_types()
-{
- /* Define void */
- global_types = calloc(1, sizeof(struct type));
- global_types->name = "void";
- global_types->size = 4;
- /* void* has the same properties as void */
- global_types->indirect = global_types;
-
- /* Define int */
- struct type* a = calloc(1, sizeof(struct type));
- a->name = "int";
- a->size = 4;
- /* int* has the same properties as int */
- a->indirect = a;
-
- /* Define char* */
- struct type* b = calloc(1, sizeof(struct type));
- b->name = "char*";
- b->size = 4;
-
- /* Define char */
- struct type* c = calloc(1, sizeof(struct type));
- c->name = "char";
- c->size = 1;
-
- /* char** is char */
- c->indirect = b;
- b->indirect = c;
-
- /* Finalize type list */
- a->next = c;
- global_types->next = a;
-}
-
/* Our essential organizer */
int main(int argc, char **argv)
{
- if (argc < 3)
+ global_token = NULL;
+
+ int c;
+ FILE* source_file;
+ FILE* destination_file;
+ int option_index = 0;
+ while ((c = getopt_long(argc, argv, "f:h:o:V", long_options, &option_index)) != -1)
{
- fprintf(stderr, "We require more arguments\n");
- exit(EXIT_FAILURE);
+ switch(c)
+ {
+ case 0: break;
+ case 'h':
+ {
+ fprintf(stderr, "Usage: %s -f FILENAME1 {-f FILENAME2} -o OUTPUT\n", argv[0]);
+ exit(EXIT_SUCCESS);
+ }
+ case 'f':
+ {
+ #if __MESC__
+ source_file = open(optarg, O_RDONLY);
+ #else
+ source_file = fopen(optarg, "r");
+ #endif
+
+ if(NULL == source_file)
+ {
+ fprintf(stderr, "The file: %s can not be opened!\n", optarg);
+ exit(EXIT_FAILURE);
+ }
+
+ global_token = read_all_tokens(source_file, global_token);
+ break;
+ }
+ case 'o':
+ {
+ #if __MESC__
+ destination_file = open(optarg, O_CREAT|O_TRUNC|O_WRONLY, S_IRUSR|S_IWUSR);
+ #else
+ destination_file = fopen(optarg, "w");
+ #endif
+
+ if(NULL == destination_file)
+ {
+ fprintf(stderr, "The file: %s can not be opened!\n", optarg);
+ exit(EXIT_FAILURE);
+ }
+ break;
+ }
+ case 'V':
+ {
+ fprintf(stdout, "M2-Planet 0.1\n");
+ exit(EXIT_SUCCESS);
+ }
+ default:
+ {
+ fprintf(stderr, "Unknown option\n");
+ exit(EXIT_FAILURE);
+ }
+ }
}
- initialize_types();
+ if(NULL == global_token)
+ {
+ fprintf(stderr, "Either no input files were given or they were empty\n");
+ exit(EXIT_FAILURE);
+ }
+ global_token = reverse_list(global_token);
- global_token = read_all_tokens(argv[1]);
+ initialize_types();
struct token_list* output_list = program(NULL);
- FILE* output = fopen(argv[2], "w");
- fprintf(output, "\n# Core program\n\n");
- recursive_output(output, output_list);
- fprintf(output, "\n# Program global variables\n\n");
- recursive_output(output, globals_list);
- fprintf(output, "\n# Program strings\n\n");
- recursive_output(output, strings_list);
- fclose(output);
- return 0;
+
+ /* Output the program we have compiled */
+ fprintf(destination_file, "\n# Core program\n\n");
+ recursive_output(destination_file, output_list);
+ fprintf(destination_file, "\n# Program global variables\n\n");
+ recursive_output(destination_file, globals_list);
+ fprintf(destination_file, "\n# Program strings\n\n");
+ recursive_output(destination_file, strings_list);
+ return EXIT_SUCCESS;
}
diff --git a/cc.h b/cc.h
index 970e94d..84edaf9 100644
--- a/cc.h
+++ b/cc.h
@@ -46,3 +46,10 @@ struct token_list
struct token_list* locals;
int temps;
};
+
+/* What we are currently working on */
+struct token_list* global_token;
+
+/* Output reorder collections*/
+struct token_list* strings_list;
+struct token_list* globals_list;
diff --git a/cc_core.c b/cc_core.c
new file mode 100644
index 0000000..81869a5
--- /dev/null
+++ b/cc_core.c
@@ -0,0 +1,1038 @@
+/* Copyright (C) 2016 Jeremiah Orians
+ * This file is part of stage0.
+ *
+ * stage0 is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * stage0 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with stage0. If not, see .
+ */
+
+#include "cc.h"
+#include
+
+/* Global lists */
+struct type* global_types;
+struct token_list* global_symbol_list;
+struct token_list* global_function_list;
+struct token_list* global_constant_list;
+
+/* What we are currently working on */
+struct token_list* current_target;
+
+/* Imported functions */
+char* parse_string(char* string);
+
+struct token_list* emit(char *s, struct token_list* head)
+{
+ struct token_list* t = calloc(1, sizeof(struct token_list));
+ t->next = head;
+ t->s = s;
+ return t;
+}
+
+struct token_list* double_emit(char* a, char* b, struct token_list* out, int flag)
+{
+ out = emit(a, out);
+ out = emit(b, out);
+ if(flag) out = emit("\n", out);
+ return out;
+}
+
+char* numerate_number(int a)
+{
+ char* result = calloc(16, sizeof(char));
+ int i = 0;
+
+ /* Deal with Zero case */
+ if(0 == a)
+ {
+ result[0] = '0';
+ result[1] = '\n';
+ return result;
+ }
+
+ /* Deal with negatives */
+ if(0 > a)
+ {
+ result[0] = '-';
+ i = 1;
+ a = a * -1;
+ }
+
+ /* Using the largest 10^n number possible in 32bits */
+ int divisor = 0x3B9ACA00;
+ /* Skip leading Zeros */
+ while(0 == (a / divisor)) divisor = divisor / 10;
+
+ /* Now simply collect numbers until divisor is gone */
+ while(0 < divisor)
+ {
+ result[i] = ((a / divisor) + 48);
+ a = a % divisor;
+ divisor = divisor / 10;
+ i = i + 1;
+ }
+
+ result[i] = '\n';
+ return result;
+}
+
+struct token_list* sym_declare(char *s, struct type* t, struct token_list* list)
+{
+ struct token_list* a = calloc(1, sizeof(struct token_list));
+ a->next = list;
+ a->s = s;
+ a->type = t;
+ return a;
+}
+
+struct token_list* sym_lookup(char *s, struct token_list* symbol_list)
+{
+ for(struct token_list* i = symbol_list; NULL != i; i = i->next)
+ {
+ if(0 == strcmp(s,i->s)) return i;
+ }
+ return NULL;
+}
+
+int stack_index(struct token_list* a, struct token_list* function)
+{
+ int depth = 4 * function->temps;
+ for(struct token_list* i = function->locals; NULL != i; i = i->next)
+ {
+ if(i == a) return depth;
+ else depth = depth + 4;
+ }
+
+ /* Deal with offset caused by return pointer */
+ depth = depth+ 4;
+
+ for(struct token_list* i = function->arguments; NULL != i; i = i->next)
+ {
+ if(i == a) return depth;
+ else depth = depth + 4;
+ }
+
+ fprintf(stderr, "%s does not exist in function %s\n", a->s, function->s);
+ exit(EXIT_FAILURE);
+}
+
+struct token_list* sym_get_value(char *s, struct token_list* out, struct token_list* function)
+{
+ global_token = global_token->next;
+ struct token_list* a = sym_lookup(s, global_constant_list);
+ if(NULL != a)
+ {
+ out = double_emit("LOAD_IMMEDIATE_eax %", a->arguments->s, out, true); return out;
+ }
+
+ a= sym_lookup(s, global_function_list);
+ if(NULL != a)
+ {
+ return out;
+ }
+
+ a= sym_lookup(s, function->locals);
+ if(NULL != a)
+ {
+ current_target = a;
+ out = double_emit("LOAD_EFFECTIVE_ADDRESS %", numerate_number(stack_index(a, function)), out, false);
+ if(strcmp(global_token->s, "=")) out = emit("LOAD_INTEGER\n", out);
+ return out;
+ }
+ a = sym_lookup(s, function->arguments);
+
+ if(NULL != a)
+ {
+ current_target = a;
+ out = double_emit("LOAD_EFFECTIVE_ADDRESS %", numerate_number(stack_index(a, function)), out, false);
+ if(strcmp(global_token->s, "=")) out = emit("LOAD_INTEGER\n", out);
+ return out;
+ }
+
+ a = sym_lookup(s, global_symbol_list);
+ if(NULL != a)
+ {
+ current_target = a;
+ out = double_emit("LOAD_IMMEDIATE_eax &GLOBAL_", s, out, true);
+ if(strcmp(global_token->s, "=")) out = emit("LOAD_INTEGER\n", out);
+ return out;
+ }
+
+ fprintf(stderr, "%s is not a defined symbol\n", s);
+ exit(EXIT_FAILURE);
+}
+
+void require_char(char* message, char required)
+{
+ if(global_token->s[0] != required)
+ {
+ fprintf(stderr, "%s", message);
+ exit(EXIT_FAILURE);
+ }
+ global_token = global_token->next;
+}
+
+struct token_list* expression(struct token_list* out, struct token_list* function);
+
+/*
+ * primary-expr:
+ * identifier
+ * constant
+ * ( expression )
+ */
+struct token_list* primary_expr(struct token_list* out, struct token_list* function)
+{
+ if(('0' <= global_token->s[0]) & (global_token->s[0] <= '9'))
+ {
+ out = double_emit("LOAD_IMMEDIATE_eax %", global_token->s, out, true);
+ global_token = global_token->next;
+ }
+ else if((('a' <= global_token->s[0]) & (global_token->s[0] <= 'z')) | (('A' <= global_token->s[0]) & (global_token->s[0] <= 'Z')))
+ {
+ out = sym_get_value(global_token->s, out, function);
+ }
+ else if(global_token->s[0] == '(')
+ {
+ global_token = global_token->next;
+ out = expression(out, function);
+ require_char("Error in Primary expression\nDidn't get )\n", ')');
+ }
+ else if(global_token->s[0] == '\'')
+ {
+ out = emit("LOAD_IMMEDIATE_eax %", out);
+ out = emit(numerate_number(global_token->s[1]), out);
+ global_token = global_token->next;
+ }
+ else if(global_token->s[0] == '"')
+ {
+ static int string_num;
+ char* number_string = numerate_number(string_num);
+ out = emit("LOAD_IMMEDIATE_eax &STRING_", out);
+ out = emit(number_string, out);
+
+ /* The target */
+ strings_list = emit(":STRING_", strings_list);
+ strings_list = emit(number_string, strings_list);
+
+ /* Parse the string */
+ strings_list = emit(parse_string(global_token->s), strings_list);
+ global_token = global_token->next;
+
+ string_num = string_num + 1;
+ }
+ else
+ {
+ fprintf(stderr, "Recieved %s in primary_expr\n", global_token->s);
+ exit(EXIT_FAILURE);
+ }
+
+ return out;
+}
+
+/* Deal with Expression lists */
+struct token_list* process_expression_list(struct token_list* out, struct token_list* function)
+{
+ char* func = global_token->prev->s;
+ global_token = global_token->next;
+ int temp = function->temps;
+
+ if(global_token->s[0] != ')')
+ {
+ out = expression(out, function);
+ out = emit("PUSH_eax\t#_process_expression1\n", out);
+ function->temps = function->temps + 1;
+
+ while(global_token->s[0] == ',')
+ {
+ global_token = global_token->next;
+ out = expression(out, function);
+ out = emit("PUSH_eax\t#_process_expression2\n", out);
+ function->temps = function->temps + 1;
+ }
+ require_char("ERROR in process_expression_list\nNo ) was found\n", ')');
+ }
+ else global_token = global_token->next;
+
+ out = double_emit("CALL_IMMEDIATE %FUNCTION_", func, out, true);
+
+ for(int i = function->temps - temp; 0 != i; i = i - 1)
+ {
+ out = emit("POP_ebx\t# _process_expression_locals\n", out);
+ }
+
+ function->temps = temp;
+ return out;
+}
+
+struct token_list* common_recursion(struct token_list* (*function) (struct token_list*, struct token_list*), struct token_list* out, struct token_list* func)
+{
+ global_token = global_token->next;
+ out = emit("PUSH_eax\t#_common_recursion\n", out);
+ func->temps = func->temps + 1;
+ out = function(out, func);
+ func->temps = func->temps - 1;
+ out = emit("POP_ebx\t# _common_recursion\n", out);
+ return out;
+}
+
+int ceil_log2(int a)
+{
+ int result = 0;
+ if((a & (a - 1)) == 0)
+ {
+ result = -1;
+ }
+
+ while(a > 0)
+ {
+ result = result + 1;
+ a = a >> 1;
+ }
+
+ return result;
+}
+
+/*
+ * postfix-expr:
+ * primary-expr
+ * postfix-expr [ expression ]
+ * postfix-expr ( expression-list-opt )
+ * postfix-expr -> member
+ */
+struct token_list* postfix_expr(struct token_list* out, struct token_list* function)
+{
+ out = primary_expr(out, function);
+
+ while(1)
+ {
+ if(global_token->s[0] == '[')
+ {
+ struct token_list* target = current_target;
+ struct type* a = current_target->type;
+ out = common_recursion(expression, out, function);
+
+ /* Add support for Ints */
+ if( 1 != a->indirect->size)
+ {
+ out = double_emit("SAL_eax_Immediate8 !", numerate_number(ceil_log2(a->indirect->size)), out, false);
+ }
+
+ out = emit("ADD_ebx_to_eax\n", out);
+ current_target = target;
+
+ if(strcmp(global_token->next->s, "="))
+ {
+ if( 4 == a->indirect->size)
+ {
+ out = emit("LOAD_INTEGER\n", out);
+ }
+ else
+ {
+ out = emit("LOAD_BYTE\n", out);
+ }
+ }
+ require_char("ERROR in postfix_expr\nMissing ]\n", ']');
+ }
+ else if(global_token->s[0] == '(')
+ {
+ out = process_expression_list(out, function);
+ }
+ else if(!strcmp("->", global_token->s))
+ {
+ out = emit("# looking up offset\n", out);
+ global_token = global_token->next;
+ struct type* i;
+ for(i = current_target->type->members; NULL != i; i = i->members)
+ {
+ if(!strcmp(i->name, global_token->s)) break;
+ }
+ if(NULL == i)
+ {
+ fprintf(stderr, "ERROR in postfix_expr %s->%s does not exist\n", current_target->type->name, global_token->s);
+ exit(EXIT_FAILURE);
+ }
+ if(0 != i->offset)
+ {
+ out = emit("# -> offset calculation\n", out);
+ out = double_emit("LOAD_IMMEDIATE_ebx %", numerate_number(i->offset), out, false);
+ out = emit("ADD_ebx_to_eax\n", out);
+ }
+ if(strcmp(global_token->next->s, "="))
+ {
+ out = emit("LOAD_INTEGER\n", out);
+ }
+ global_token = global_token->next;
+ }
+ else return out;
+ }
+}
+
+/*
+ * additive-expr:
+ * postfix-expr
+ * additive-expr + postfix-expr
+ * additive-expr - postfix-expr
+ */
+struct token_list* additive_expr(struct token_list* out, struct token_list* function)
+{
+ out = postfix_expr(out, function);
+
+ while(1)
+ {
+ if(global_token->s[0] == '+')
+ {
+ out = common_recursion(postfix_expr, out, function);
+ out = emit("ADD_ebx_to_eax\n", out);
+ }
+ else if(global_token->s[0] == '-')
+ {
+ out = common_recursion(postfix_expr, out, function);
+ out = emit("SUBTRACT_eax_from_ebx_into_ebx\nMOVE_ebx_to_eax\n", out);
+ }
+ else return out;
+ }
+}
+
+/*
+ * shift-expr:
+ * additive-expr
+ * shift-expr << additive-expr
+ * shift-expr >> additive-expr
+ */
+struct token_list* shift_expr(struct token_list* out, struct token_list* function)
+{
+ out = additive_expr(out, function);
+
+ while(1)
+ {
+ if(!strcmp(global_token->s, "<<"))
+ {
+ out = common_recursion(additive_expr, out, function);
+ // Ugly hack to Work around flaw in x86
+ struct token_list* old = out->next;
+ free(out);
+ out = emit("COPY_eax_to_ecx\nPOP_eax\nSAL_eax_cl\n", old);
+ }
+ else if(!strcmp(global_token->s, ">>"))
+ {
+ out = common_recursion(additive_expr, out, function);
+ // Ugly hack to Work around flaw in x86
+ struct token_list* old = out->next;
+ free(out);
+ out = emit("COPY_eax_to_ecx\nPOP_eax\nSAR_eax_cl\n", old);
+ }
+ else
+ {
+ return out;
+ }
+ }
+}
+
+/*
+ * relational-expr:
+ * shift-expr
+ * relational-expr < shift-expr
+ * relational-expr <= shift-expr
+ * relational-expr >= shift-expr
+ * relational-expr > shift-expr
+ */
+struct token_list* relational_expr(struct token_list* out, struct token_list* function)
+{
+ out = shift_expr(out, function);
+
+ while(1)
+ {
+ if(!strcmp(global_token->s, "<"))
+ {
+ out = common_recursion(shift_expr, out, function);
+ out = emit("CMP\nSETL\nMOVEZBL\n", out);
+ }
+ else if(!strcmp(global_token->s, "<="))
+ {
+ out = common_recursion(shift_expr, out, function);
+ out = emit("CMP\nSETLE\nMOVEZBL\n", out);
+ }
+ else if(!strcmp(global_token->s, ">="))
+ {
+ out = common_recursion(shift_expr, out, function);
+ out = emit("CMP\nSETGE\nMOVEZBL\n", out);
+ }
+ else if(!strcmp(global_token->s, ">"))
+ {
+ out = common_recursion(shift_expr, out, function);
+ out = emit("CMP\nSETG\nMOVEZBL\n", out);
+ }
+ else return out;
+ }
+}
+
+/*
+ * equality-expr:
+ * relational-expr
+ * equality-expr == relational-expr
+ * equality-expr != relational-expr
+ */
+struct token_list* equality_expr(struct token_list* out, struct token_list* function)
+{
+ out = relational_expr(out, function);
+
+ while(1)
+ {
+ if(!strcmp(global_token->s, "=="))
+ {
+ out = common_recursion(relational_expr, out, function);
+ out = emit("CMP\nSETE\nMOVEZBL\n", out);
+ }
+ else if(!strcmp(global_token->s, "!="))
+ {
+ out = common_recursion(relational_expr, out, function);
+ out = emit("CMP\nSETNE\nMOVEZBL\n", out);
+ }
+ else return out;
+ }
+}
+
+/*
+ * bitwise-and-expr:
+ * equality-expr
+ * bitwise-and-expr & equality-expr
+ */
+struct token_list* bitwise_and_expr(struct token_list* out, struct token_list* function)
+{
+ out = equality_expr(out, function);
+
+ while(global_token->s[0] == '&')
+ {
+ out = common_recursion(equality_expr, out, function);
+ out = emit("AND_eax_ebx\n", out);
+ }
+ return out;
+}
+
+/*
+ * bitwise-or-expr:
+ * bitwise-and-expr
+ * bitwise-and-expr | bitwise-or-expr
+ */
+struct token_list* bitwise_or_expr(struct token_list* out, struct token_list* function)
+{
+ out = bitwise_and_expr(out, function);
+
+ while(global_token->s[0] == '|')
+ {
+ out = common_recursion(bitwise_and_expr, out, function);
+ out = emit("OR_eax_ebx\n", out);
+ }
+ return out;
+}
+
+/*
+ * expression:
+ * bitwise-or-expr
+ * bitwise-or-expr = expression
+ */
+struct token_list* expression(struct token_list* out, struct token_list* function)
+{
+ out = bitwise_or_expr(out, function);
+
+ if(global_token->s[0] == '=')
+ {
+ struct token_list* target = current_target;
+ bool member = !strcmp(global_token->prev->s, "]");
+ out = common_recursion(expression, out, function);
+
+ if(member)
+ {
+ if(1 == target->type->indirect->size) out = emit("STORE_CHAR\n", out);
+ else if(4 == target->type->indirect->size)
+ {
+ out = emit("STORE_INTEGER\n", out);
+ }
+ }
+ else
+ {
+ out = emit("STORE_INTEGER\n", out);
+ }
+ }
+ return out;
+}
+
+struct type* lookup_type(char* s)
+{
+ for(struct type* i = global_types; NULL != i; i = i->next)
+ {
+ if(!strcmp(i->name, s))
+ {
+ return i;
+ }
+ }
+ return NULL;
+}
+
+struct type* type_name();
+void create_struct()
+{
+ int offset = 0;
+ struct type* head = calloc(1, sizeof(struct type));
+ struct type* i = calloc(1, sizeof(struct type));
+ head->name = global_token->s;
+ i->name = global_token->s;
+ head->indirect = i;
+ i->indirect = head;
+ head->next = global_types;
+ global_types = head;
+ global_token = global_token->next;
+ i->size = 4;
+ require_char("ERROR in create_struct\nMissing {\n", '{');
+ struct type* last = NULL;
+ while('}' != global_token->s[0])
+ {
+ struct type* member_type = type_name();
+ i = calloc(1, sizeof(struct type));
+ i->name = global_token->s;
+ i->members = last;
+ i->size = member_type->size;
+ i->offset = offset;
+ offset = offset + member_type->size;
+ global_token = global_token->next;
+ require_char("ERROR in create_struct\nMissing ;\n", ';');
+ last = i;
+ }
+
+ global_token = global_token->next;
+ require_char("ERROR in create_struct\nMissing ;\n", ';');
+
+ head->size = offset;
+ head->members = last;
+ head->indirect->members = last;
+}
+
+
+/*
+ * type-name:
+ * char *
+ * int
+ */
+struct type* type_name()
+{
+ int structure = false;
+
+ if(!strcmp(global_token->s, "struct"))
+ {
+ structure = true;
+ global_token = global_token->next;
+ }
+
+ struct type* ret = lookup_type(global_token->s);
+
+ if(NULL == ret && !structure)
+ {
+ fprintf(stderr, "Unknown type %s\n", global_token->s);
+ exit(EXIT_FAILURE);
+ }
+ else if(NULL == ret)
+ {
+ create_struct();
+ return NULL;
+ }
+
+ global_token = global_token->next;
+
+ while(global_token->s[0] == '*')
+ {
+ ret = ret->indirect;
+ global_token = global_token->next;
+ }
+
+ return ret;
+}
+
+/* Process local variable */
+struct token_list* collect_local(struct token_list* out, struct token_list* function)
+{
+ struct type* type_size = type_name();
+ out = double_emit("# Defining local ", global_token->s, out, true);
+
+ struct token_list* a = sym_declare(global_token->s, type_size, function->locals);
+ function->locals = a;
+ global_token = global_token->next;
+ function->temps = function->temps - 1;
+
+ if(global_token->s[0] == '=')
+ {
+ global_token = global_token->next;
+ out = expression(out, function);
+ }
+ function->temps = function->temps + 1;
+
+ require_char("ERROR in collect_local\nMissing ;\n", ';');
+
+ out = double_emit("PUSH_eax\t#", a->s, out, true);
+ return out;
+}
+
+struct token_list* statement(struct token_list* out, struct token_list* function);
+
+/* Evaluate if statements */
+int if_count;
+struct token_list* process_if(struct token_list* out, struct token_list* function)
+{
+ char* number_string = numerate_number(if_count);
+ if_count = if_count + 1;
+
+ out = double_emit("# IF_",number_string, out, false);
+
+ global_token = global_token->next;
+ require_char("ERROR in process_if\nMISSING (\n", '(');
+ out = expression(out, function);
+
+ out = double_emit("TEST\nJUMP_EQ %ELSE_", number_string, out, false);
+
+ require_char("ERROR in process_if\nMISSING )\n", ')');
+ out = statement(out, function);
+
+ out = double_emit("JUMP %_END_IF_", number_string, out, false);
+ out = double_emit(":ELSE_", number_string, out, false);
+
+ if(!strcmp(global_token->s, "else"))
+ {
+ global_token = global_token->next;
+ out = statement(out, function);
+ }
+ out = double_emit(":_END_IF_", number_string, out, false);
+ return out;
+}
+
+int for_count;
+struct token_list* process_for(struct token_list* out, struct token_list* function)
+{
+ char* number_string = numerate_number(for_count);
+ for_count = for_count + 1;
+
+ out = double_emit("# FOR_initialization_", number_string, out, false);
+
+ global_token = global_token->next;
+
+ require_char("ERROR in process_for\nMISSING (\n", '(');
+ out = expression(out, function);
+
+ out = double_emit(":FOR_", number_string, out , false);
+
+ require_char("ERROR in process_for\nMISSING ;1\n", ';');
+ out = expression(out, function);
+
+ out = double_emit("TEST\nJUMP_EQ %FOR_END_", number_string, out, false);
+ out = double_emit("JUMP %FOR_THEN_", number_string, out, false);
+ out = double_emit(":FOR_ITER_", number_string, out, false);
+
+ require_char("ERROR in process_for\nMISSING ;2\n", ';');
+ out = expression(out, function);
+
+ out = double_emit("JUMP %FOR_", number_string, out, false);
+ out = double_emit(":FOR_THEN_", number_string, out, false);
+
+ require_char("ERROR in process_for\nMISSING )\n", ')');
+ out = statement(out, function);
+
+ out = double_emit("JUMP %FOR_ITER_", number_string, out, false);
+ out = double_emit(":FOR_END_", number_string, out, false);
+ return out;
+}
+
+/* Process Assembly statements */
+struct token_list* process_asm(struct token_list* out)
+{
+ global_token = global_token->next;
+ require_char("ERROR in process_asm\nMISSING (\n", '(');
+ while('"' == global_token->s[0])
+ {
+ out = emit((global_token->s + 1), out);
+ out = emit("\n", out);
+ global_token = global_token->next;
+ }
+ require_char("ERROR in process_asm\nMISSING )\n", ')');
+ require_char("ERROR in process_asm\nMISSING ;\n", ';');
+ return out;
+}
+
+/* Process while loops */
+int while_count;
+struct token_list* process_while(struct token_list* out, struct token_list* function)
+{
+ char* number_string = numerate_number(while_count);
+ while_count = while_count + 1;
+
+ out = double_emit(":WHILE_", number_string, out, false);
+
+ global_token = global_token->next;
+ require_char("ERROR in process_while\nMISSING (\n", '(');
+ out = expression(out, function);
+
+ out = double_emit("TEST\nJUMP_EQ %END_WHILE_", number_string, out, false);
+ out = double_emit("# THEN_while_", number_string, out, false);
+
+ require_char("ERROR in process_while\nMISSING )\n", ')');
+ out = statement(out, function);
+
+ out = double_emit("JUMP %WHILE_", number_string, out, false);
+ out = double_emit(":END_WHILE_", number_string, out, false);
+ return out;
+}
+
+/* Ensure that functions return */
+struct token_list* return_result(struct token_list* out, struct token_list* function)
+{
+ global_token = global_token->next;
+ if(global_token->s[0] != ';') out = expression(out, function);
+
+ require_char("ERROR in return_result\nMISSING ;\n", ';');
+
+ for(struct token_list* i = function->locals; NULL != i; i = i->next)
+ {
+ out = emit("POP_ebx\t# _return_result_locals\n", out);
+ function->locals = function->locals->next;
+ }
+ out = emit("RETURN\n", out);
+ return out;
+}
+
+struct token_list* recursive_statement(struct token_list* out, struct token_list* function)
+{
+ global_token = global_token->next;
+ struct token_list* frame = function->locals;
+
+ while(strcmp(global_token->s, "}"))
+ {
+ out = statement(out, function);
+ }
+ global_token = global_token->next;
+
+ /* Clean up any locals added */
+ if(NULL != function->locals)
+ {
+ for(struct token_list* i = function->locals; frame != i; i = i->next)
+ {
+ out = emit( "POP_ebx\t# _recursive_statement_locals\n", out);
+ function->locals = function->locals->next;
+ }
+ }
+ return out;
+}
+
+/*
+ * statement:
+ * { statement-list-opt }
+ * type-name identifier ;
+ * type-name identifier = expression;
+ * if ( expression ) statement
+ * if ( expression ) statement else statement
+ * while ( expression ) statement
+ * for ( expression ; expression ; expression ) statement
+ * asm ( "assembly" ... "assembly" ) ;
+ * return ;
+ * expr ;
+ */
+struct token_list* statement(struct token_list* out, struct token_list* function)
+{
+ if(global_token->s[0] == '{')
+ {
+ out = recursive_statement(out, function);
+ }
+ else if((NULL != lookup_type(global_token->s)) || !strcmp("struct", global_token->s))
+ {
+ out = collect_local(out, function);
+ }
+ else if(!strcmp(global_token->s, "if"))
+ {
+ out = process_if(out, function);
+ }
+ else if(!strcmp(global_token->s, "while"))
+ {
+ out = process_while(out, function);
+ }
+ else if(!strcmp(global_token->s, "for"))
+ {
+ out = process_for(out, function);
+ }
+ else if(!strcmp(global_token->s, "asm"))
+ {
+ out = process_asm(out);
+ }
+ else if(!strcmp(global_token->s, "return"))
+ {
+ out = return_result(out, function);
+ }
+ else
+ {
+ out = expression(out, function);
+ require_char("ERROR in statement\nMISSING ;\n", ';');
+ }
+ return out;
+}
+
+/* Collect function arguments */
+void collect_arguments(struct token_list* function)
+{
+ global_token = global_token->next;
+
+ while(strcmp(global_token->s, ")"))
+ {
+ struct type* type_size = type_name();
+ if(global_token->s[0] == ')')
+ {
+ /* deal with foo(int|char|void) */
+ global_token = global_token->prev;
+ }
+ else if(global_token->s[0] != ',')
+ {
+ /* deal with foo(int a, char b) */
+ struct token_list* a = sym_declare(global_token->s, type_size, function->arguments);
+ function->arguments = a;
+ }
+
+ /* foo(int,char,void) doesn't need anything done */
+ global_token = global_token->next;
+
+ /* ignore trailing comma (needed for foo(bar(), 1); expressions*/
+ if(global_token->s[0] == ',') global_token = global_token->next;
+ }
+ global_token = global_token->next;
+}
+
+struct token_list* declare_function(struct token_list* out, struct type* type)
+{
+ char* essential = global_token->prev->s;
+ struct token_list* func = sym_declare(global_token->prev->s, calloc(1, sizeof(struct type)), global_function_list);
+ func->type = type;
+ collect_arguments(func);
+
+ /* allow previously defined functions to be looked up */
+ global_function_list = func;
+
+ /* If just a prototype don't waste time */
+ if(global_token->s[0] == ';') global_token = global_token->next;
+ else
+ {
+ out = double_emit("# Defining function ", essential, out, true);
+ out = double_emit(":FUNCTION_", essential, out, true);
+ out = statement(out, func);
+
+ /* Prevent duplicate RETURNS */
+ if(strcmp(out->s, "RETURN\n"))
+ {
+ out = emit("RETURN\n", out);
+ }
+ }
+ return out;
+}
+
+/*
+ * program:
+ * declaration
+ * declaration program
+ *
+ * declaration:
+ * CONSTANT identifer value
+ * type-name identifier ;
+ * type-name identifier ( parameter-list ) ;
+ * type-name identifier ( parameter-list ) statement
+ *
+ * parameter-list:
+ * parameter-declaration
+ * parameter-list, parameter-declaration
+ *
+ * parameter-declaration:
+ * type-name identifier-opt
+ */
+struct token_list* program(struct token_list* out)
+{
+ while(NULL != global_token)
+ {
+new_type:
+ if(!strcmp(global_token->s, "CONSTANT"))
+ {
+ global_constant_list = sym_declare(global_token->next->s, NULL, global_constant_list);
+ global_constant_list->arguments = global_token->next->next;
+ global_token = global_token->next->next->next;
+ }
+ else
+ {
+ struct type* type_size = type_name();
+ if(NULL == type_size)
+ {
+ goto new_type;
+ }
+ global_token = global_token->next;
+ if(global_token->s[0] == ';')
+ {
+ /* Add to global symbol table */
+ global_symbol_list = sym_declare(global_token->prev->s, type_size, global_symbol_list);
+
+ /* Ensure 4 bytes are allocated for the global */
+ globals_list = double_emit(":GLOBAL_", global_token->prev->s, globals_list, true);
+ globals_list = emit("NOP\n", globals_list);
+
+ global_token = global_token->next;
+ }
+ else if(global_token->s[0] == '(') out = declare_function(out, type_size);
+ else
+ {
+ fprintf(stderr, "Recieved %s in program\n", global_token->s);
+ exit(EXIT_FAILURE);
+ }
+ }
+ }
+ return out;
+}
+
+void recursive_output(FILE* out, struct token_list* i)
+{
+ if(NULL == i) return;
+ recursive_output(out, i->next);
+ fprintf(out, "%s", i->s);
+}
+
+/* Initialize default types */
+void initialize_types()
+{
+ /* Define void */
+ global_types = calloc(1, sizeof(struct type));
+ global_types->name = "void";
+ global_types->size = 4;
+ /* void* has the same properties as void */
+ global_types->indirect = global_types;
+
+ /* Define int */
+ struct type* a = calloc(1, sizeof(struct type));
+ a->name = "int";
+ a->size = 4;
+ /* int* has the same properties as int */
+ a->indirect = a;
+
+ /* Define char* */
+ struct type* b = calloc(1, sizeof(struct type));
+ b->name = "char*";
+ b->size = 4;
+
+ /* Define char */
+ struct type* c = calloc(1, sizeof(struct type));
+ c->name = "char";
+ c->size = 1;
+
+ /* char** is char */
+ c->indirect = b;
+ b->indirect = c;
+
+ /* Finalize type list */
+ a->next = c;
+ global_types->next = a;
+}
diff --git a/cc_reader.c b/cc_reader.c
index 0152bea..00d2fdd 100644
--- a/cc_reader.c
+++ b/cc_reader.c
@@ -114,7 +114,15 @@ reset:
goto reset;
}
}
- else if(c != EOF) c = consume_byte(current, c);
+ else if(c == EOF)
+ {
+ free(current);
+ return c;
+ }
+ else
+ {
+ c = consume_byte(current, c);
+ }
current->prev = token;
current->next = token;
@@ -135,11 +143,12 @@ struct token_list* reverse_list(struct token_list* head)
return root;
}
-struct token_list* read_all_tokens(char* source_file)
+struct token_list* read_all_tokens(FILE* a, struct token_list* current)
{
- input = fopen(source_file, "r");
+ input = a;
+ token = current;
int ch =fgetc(input);
while(EOF != ch) ch = get_token(ch);
- return reverse_list(token);
+ return token;
}
diff --git a/makefile b/makefile
index 70c8f02..df74613 100644
--- a/makefile
+++ b/makefile
@@ -6,8 +6,11 @@ all: M2-Planet
CC=gcc
CFLAGS=-D_GNU_SOURCE -O0 -std=c99 -ggdb
-M2-Planet: cc_reader.c cc_strings.c cc.c cc.h | bin
- $(CC) $(CFLAGS) cc_reader.c cc_strings.c cc.c cc.h -o bin/M2-Planet
+M2-Planet: cc_reader.c cc_strings.c cc_core.c cc.c cc.h | bin
+ $(CC) $(CFLAGS) cc_reader.c cc_strings.c cc_core.c cc.c cc.h -o bin/M2-Planet
+
+M2-Planet-minimal: cc_reader.c cc_strings.c cc_core.c cc-minimal.c cc.h | bin
+ $(CC) $(CFLAGS) cc_reader.c cc_strings.c cc_core.c cc-minimal.c cc.h -o bin/M2-Planet-minimal
# Clean up after ourselves
.PHONY: clean
@@ -15,6 +18,7 @@ clean:
rm -rf bin/ test/results/
./test/test0/cleanup.sh
./test/test1/cleanup.sh
+ ./test/test2/cleanup.sh
# Directories
bin:
@@ -24,7 +28,7 @@ results:
mkdir -p test/results
# tests
-test: test0-binary test1-binary | results
+test: test0-binary test1-binary test2-binary | results
sha256sum -c test/test.answers
test0-binary: M2-Planet | results
@@ -33,6 +37,9 @@ test0-binary: M2-Planet | results
test1-binary: M2-Planet | results
test/test1/hello.sh
+test2-binary: M2-Planet | results
+ test/test2/hello.sh
+
# Generate test answers
.PHONY: Generate-test-answers
Generate-test-answers:
diff --git a/test/test.answers b/test/test.answers
index 984b952..3e6b945 100644
--- a/test/test.answers
+++ b/test/test.answers
@@ -1,2 +1,3 @@
be3e57116e09c63e7819a391c550d2bdde2eb9f3409d9a54fcd09d2062b75dcf test/results/test0-binary
9d26baf5537e489ba73c24feb229d863e356d4796a876f6dc773cc191bc0ce99 test/results/test1-binary
+62e9942a705bb4b01756786b9f0df43035b93c4c8b878aa90b99c7f0c65dfb99 test/results/test2-binary
diff --git a/test/test0/hello.sh b/test/test0/hello.sh
index 59cb666..3e3742c 100755
--- a/test/test0/hello.sh
+++ b/test/test0/hello.sh
@@ -1,7 +1,7 @@
#! /bin/sh
set -ex
# Build the test
-bin/M2-Planet test/test0/cc500.c test/test0/cc0.M1 || exit 1
+bin/M2-Planet -f test/test0/cc500.c -o test/test0/cc0.M1 || exit 1
# Macro assemble with libc written in M1-Macro
M1 -f test/common_x86/x86_defs.M1 -f test/common_x86/libc.M1 -f test/test0/cc0.M1 --LittleEndian --Architecture 1 -o test/test0/cc0.hex2 || exit 2
# Resolve all linkages
diff --git a/test/test1/hello.sh b/test/test1/hello.sh
index fa2ecc5..0cd53b3 100755
--- a/test/test1/hello.sh
+++ b/test/test1/hello.sh
@@ -1,7 +1,7 @@
#! /bin/sh
set -ex
# Build the test
-bin/M2-Planet test/test1/for.c test/test1/for.M1 || exit 1
+bin/M2-Planet -f test/test1/for.c -o test/test1/for.M1 || exit 1
# Macro assemble with libc written in M1-Macro
M1 -f test/common_x86/x86_defs.M1 -f test/common_x86/libc.M1 -f test/test1/for.M1 --LittleEndian --Architecture 1 -o test/test1/for.hex2 || exit 2
# Resolve all linkages
diff --git a/test/test2/.gitignore b/test/test2/.gitignore
new file mode 100644
index 0000000..cc8b713
--- /dev/null
+++ b/test/test2/.gitignore
@@ -0,0 +1,8 @@
+# Ignore the files created by script
+cc0.M1
+cc0.hex2
+cc1
+cc2
+
+# A place to put a good run for comparison
+actual.M1
diff --git a/test/test2/cc.c b/test/test2/cc.c
new file mode 100644
index 0000000..361a3f2
--- /dev/null
+++ b/test/test2/cc.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2006 Edmund GRIMLEY EVANS
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "cc.h"
+
+/* Imported functions */
+void emit(int n, char *s);
+void sym_define_global(int current_symbol);
+int sym_declare_global(char *s);
+void save_int(char *p, int n);
+void get_token();
+void program();
+
+void be_start()
+{
+ emit(16, "\x7f\x45\x4c\x46\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00");
+ emit(16, "\x02\x00\x03\x00\x01\x00\x00\x00\x54\x80\x04\x08\x34\x00\x00\x00");
+ emit(16, "\x00\x00\x00\x00\x00\x00\x00\x00\x34\x00\x20\x00\x01\x00\x00\x00");
+ emit(16, "\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x80\x04\x08");
+ emit(16, "\x00\x80\x04\x08\x10\x4b\x00\x00\x10\x4b\x00\x00\x07\x00\x00\x00");
+ emit(16, "\x00\x10\x00\x00\xe8\x00\x00\x00\x00\x89\xc3\x31\xc0\x40\xcd\x80");
+
+ sym_define_global(sym_declare_global("exit"));
+ /* pop %ebx ; pop %ebx ; xor %eax,%eax ; inc %eax ; int $0x80 */
+ emit(7, "\x5b\x5b\x31\xc0\x40\xcd\x80");
+
+ sym_define_global(sym_declare_global("getchar"));
+ /* mov $3,%eax ; xor %ebx,%ebx ; push %ebx ; mov %esp,%ecx */
+ emit(10, "\xb8\x03\x00\x00\x00\x31\xdb\x53\x89\xe1");
+ /* xor %edx,%edx ; inc %edx ; int $0x80 */
+ /* test %eax,%eax ; pop %eax ; jne . + 7 */
+ emit(10, "\x31\xd2\x42\xcd\x80\x85\xc0\x58\x75\x05");
+ /* mov $-1,%eax ; ret */
+ emit(6, "\xb8\xff\xff\xff\xff\xc3");
+
+ sym_define_global(sym_declare_global("malloc"));
+ /* mov 4(%esp),%eax */
+ emit(4, "\x8b\x44\x24\x04");
+ /* push %eax ; xor %ebx,%ebx ; mov $45,%eax ; int $0x80 */
+ emit(10, "\x50\x31\xdb\xb8\x2d\x00\x00\x00\xcd\x80");
+ /* pop %ebx ; add %eax,%ebx ; push %eax ; push %ebx ; mov $45,%eax */
+ emit(10, "\x5b\x01\xc3\x50\x53\xb8\x2d\x00\x00\x00");
+ /* int $0x80 ; pop %ebx ; cmp %eax,%ebx ; pop %eax ; je . + 7 */
+ emit(8, "\xcd\x80\x5b\x39\xc3\x58\x74\x05");
+ /* mov $-1,%eax ; ret */
+ emit(6, "\xb8\xff\xff\xff\xff\xc3");
+
+ sym_define_global(sym_declare_global("putchar"));
+ /* mov $4,%eax ; xor %ebx,%ebx ; inc %ebx */
+ emit(8, "\xb8\x04\x00\x00\x00\x31\xdb\x43");
+ /* lea 4(%esp),%ecx ; mov %ebx,%edx ; int $0x80 ; ret */
+ emit(9, "\x8d\x4c\x24\x04\x89\xda\xcd\x80\xc3");
+
+ save_int(code + 85, codepos - 89); /* entry set to first thing in file */
+}
+
+void be_finish()
+{
+ save_int(code + 68, codepos);
+ save_int(code + 72, codepos);
+ i = 0;
+ while (i <= codepos - 1) {
+ putchar(code[i]);
+ i = i + 1;
+ }
+}
+
+
+int main()
+{
+ code_offset = 134512640; /* 0x08048000 */
+ be_start();
+ nextc = getchar();
+ get_token();
+ program();
+ be_finish();
+ return 0;
+}
diff --git a/test/test2/cc.h b/test/test2/cc.h
new file mode 100644
index 0000000..1a4d1c4
--- /dev/null
+++ b/test/test2/cc.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2006 Edmund GRIMLEY EVANS
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ * A self-compiling compiler for a small subset of C.
+ */
+
+/* Our library functions. */
+void exit(int);
+int getchar(void);
+void *malloc(int);
+int putchar(int);
+
+/* Our globals */
+int code_offset;
+char *code;
+int codepos;
+int i;
+int nextc;
diff --git a/test/test2/cc1.c b/test/test2/cc1.c
new file mode 100644
index 0000000..5532188
--- /dev/null
+++ b/test/test2/cc1.c
@@ -0,0 +1,681 @@
+/*
+ * Copyright (C) 2006 Edmund GRIMLEY EVANS
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "cc.h"
+
+char *my_realloc(char *old, int oldlen, int newlen)
+{
+ char *new = malloc(newlen);
+ int i = 0;
+ while (i <= oldlen - 1) {
+ new[i] = old[i];
+ i = i + 1;
+ }
+ return new;
+}
+
+char *token;
+int token_size;
+
+void error()
+{
+ exit(1);
+}
+
+void takechar()
+{
+ if (token_size <= i + 1) {
+ int x = (i + 10) << 1;
+ token = my_realloc(token, token_size, x);
+ token_size = x;
+ }
+ token[i] = nextc;
+ i = i + 1;
+ nextc = getchar();
+}
+
+void get_token()
+{
+ int w = 1;
+ while (w) {
+ w = 0;
+ while ((nextc == ' ') | (nextc == 9) | (nextc == 10))
+ nextc = getchar();
+ i = 0;
+ while ((('a' <= nextc) & (nextc <= 'z')) |
+ (('0' <= nextc) & (nextc <= '9')) | (nextc == '_'))
+ takechar();
+ if (i == 0)
+ while ((nextc == '<') | (nextc == '=') | (nextc == '>') |
+ (nextc == '|') | (nextc == '&') | (nextc == '!'))
+ takechar();
+ if (i == 0) {
+ if (nextc == 39) {
+ takechar();
+ while (nextc != 39)
+ takechar();
+ takechar();
+ }
+ else if (nextc == '"') {
+ takechar();
+ while (nextc != '"')
+ takechar();
+ takechar();
+ }
+ else if (nextc == '/') {
+ takechar();
+ if (nextc == '*') {
+ nextc = getchar();
+ while (nextc != '/') {
+ while (nextc != '*')
+ nextc = getchar();
+ nextc = getchar();
+ }
+ nextc = getchar();
+ w = 1;
+ }
+ }
+ else if (nextc != 0-1)
+ takechar();
+ }
+ token[i] = 0;
+ }
+}
+
+int peek(char *s)
+{
+ int i = 0;
+ while ((s[i] == token[i]) & (s[i] != 0))
+ i = i + 1;
+ return s[i] == token[i];
+}
+
+int accept(char *s)
+{
+ if (peek(s)) {
+ get_token();
+ return 1;
+ }
+ else
+ return 0;
+}
+
+void expect(char *s)
+{
+ if (accept(s) == 0)
+ error();
+}
+
+int code_size;
+
+void save_int(char *p, int n)
+{
+ p[0] = n;
+ p[1] = n >> 8;
+ p[2] = n >> 16;
+ p[3] = n >> 24;
+}
+
+int load_int(char *p)
+{
+ return ((p[0] & 255) + ((p[1] & 255) << 8) +
+ ((p[2] & 255) << 16) + ((p[3] & 255) << 24));
+}
+
+void emit(int n, char *s)
+{
+ i = 0;
+ if (code_size <= codepos + n) {
+ int x = (codepos + n) << 1;
+ code = my_realloc(code, code_size, x);
+ code_size = x;
+ }
+ while (i <= n - 1) {
+ code[codepos] = s[i];
+ codepos = codepos + 1;
+ i = i + 1;
+ }
+}
+
+void be_push()
+{
+ emit(1, "\x50"); /* push %eax */
+}
+
+void be_pop(int n)
+{
+ emit(6, "\x81\xc4...."); /* add $(n * 4),%esp */
+ save_int(code + codepos - 4, n << 2);
+}
+
+char *table;
+int table_size;
+int table_pos;
+int stack_pos;
+
+int sym_lookup(char *s)
+{
+ int t = 0;
+ int current_symbol = 0;
+ while (t <= table_pos - 1) {
+ i = 0;
+ while ((s[i] == table[t]) & (s[i] != 0)) {
+ i = i + 1;
+ t = t + 1;
+ }
+ if (s[i] == table[t])
+ current_symbol = t;
+ while (table[t] != 0)
+ t = t + 1;
+ t = t + 6;
+ }
+ return current_symbol;
+}
+
+void sym_declare(char *s, int type, int value)
+{
+ int t = table_pos;
+ i = 0;
+ while (s[i] != 0) {
+ if (table_size <= t + 10) {
+ int x = (t + 10) << 1;
+ table = my_realloc(table, table_size, x);
+ table_size = x;
+ }
+ table[t] = s[i];
+ i = i + 1;
+ t = t + 1;
+ }
+ table[t] = 0;
+ table[t + 1] = type;
+ save_int(table + t + 2, value);
+ table_pos = t + 6;
+}
+
+int sym_declare_global(char *s)
+{
+ int current_symbol = sym_lookup(s);
+ if (current_symbol == 0) {
+ sym_declare(s, 'U', code_offset);
+ current_symbol = table_pos - 6;
+ }
+ return current_symbol;
+}
+
+void sym_define_global(int current_symbol)
+{
+ int i;
+ int j;
+ int t = current_symbol;
+ int v = codepos + code_offset;
+ if (table[t + 1] != 'U')
+ error(); /* symbol redefined */
+ i = load_int(table + t + 2) - code_offset;
+ while (i) {
+ j = load_int(code + i) - code_offset;
+ save_int(code + i, v);
+ i = j;
+ }
+ table[t + 1] = 'D';
+ save_int(table + t + 2, v);
+}
+
+int number_of_args;
+
+void sym_get_value(char *s)
+{
+ int t;
+ if ((t = sym_lookup(s)) == 0)
+ error();
+ emit(5, "\xb8...."); /* mov $n,%eax */
+ save_int(code + codepos - 4, load_int(table + t + 2));
+ if (table[t + 1] == 'D') { /* defined global */
+ }
+ else if (table[t + 1] == 'U') /* undefined global */
+ save_int(table + t + 2, codepos + code_offset - 4);
+ else if (table[t + 1] == 'L') { /* local variable */
+ int k = (stack_pos - table[t + 2] - 1) << 2;
+ emit(7, "\x8d\x84\x24...."); /* lea (n * 4)(%esp),%eax */
+ save_int(code + codepos - 4, k);
+ }
+ else if (table[t + 1] == 'A') { /* argument */
+ int k = (stack_pos + number_of_args - table[t + 2] + 1) << 2;
+ emit(7, "\x8d\x84\x24...."); /* lea (n * 4)(%esp),%eax */
+ save_int(code + codepos - 4, k);
+ }
+ else
+ error();
+}
+
+void promote(int type)
+{
+ /* 1 = char lval, 2 = int lval, 3 = other */
+ if (type == 1)
+ emit(3, "\x0f\xbe\x00"); /* movsbl (%eax),%eax */
+ else if (type == 2)
+ emit(2, "\x8b\x00"); /* mov (%eax),%eax */
+}
+
+int expression();
+
+/*
+ * primary-expr:
+ * identifier
+ * constant
+ * ( expression )
+ */
+int primary_expr()
+{
+ int type;
+ if (('0' <= token[0]) & (token[0] <= '9')) {
+ int n = 0;
+ i = 0;
+ while (token[i]) {
+ n = (n << 1) + (n << 3) + token[i] - '0';
+ i = i + 1;
+ }
+ emit(5, "\xb8...."); /* mov $x,%eax */
+ save_int(code + codepos - 4, n);
+ type = 3;
+ }
+ else if (('a' <= token[0]) & (token[0] <= 'z')) {
+ sym_get_value(token);
+ type = 2;
+ }
+ else if (accept("(")) {
+ type = expression();
+ if (peek(")") == 0)
+ error();
+ }
+ else if ((token[0] == 39) & (token[1] != 0) &
+ (token[2] == 39) & (token[3] == 0)) {
+ emit(5, "\xb8...."); /* mov $x,%eax */
+ save_int(code + codepos - 4, token[1]);
+ type = 3;
+ }
+ else if (token[0] == '"') {
+ int i = 0;
+ int j = 1;
+ int k;
+ while (token[j] != '"') {
+ if ((token[j] == 92) & (token[j + 1] == 'x')) {
+ if (token[j + 2] <= '9')
+ k = token[j + 2] - '0';
+ else
+ k = token[j + 2] - 'a' + 10;
+ k = k << 4;
+ if (token[j + 3] <= '9')
+ k = k + token[j + 3] - '0';
+ else
+ k = k + token[j + 3] - 'a' + 10;
+ token[i] = k;
+ j = j + 4;
+ }
+ else {
+ token[i] = token[j];
+ j = j + 1;
+ }
+ i = i + 1;
+ }
+ token[i] = 0;
+ /* call ... ; the string ; pop %eax */
+ emit(5, "\xe8....");
+ save_int(code + codepos - 4, i + 1);
+ emit(i + 1, token);
+ emit(1, "\x58");
+ type = 3;
+ }
+ else
+ error();
+ get_token();
+ return type;
+}
+
+void binary1(int type)
+{
+ promote(type);
+ be_push();
+ stack_pos = stack_pos + 1;
+}
+
+int binary2(int type, int n, char *s)
+{
+ promote(type);
+ emit(n, s);
+ stack_pos = stack_pos - 1;
+ return 3;
+}
+
+/*
+ * postfix-expr:
+ * primary-expr
+ * postfix-expr [ expression ]
+ * postfix-expr ( expression-list-opt )
+ */
+int postfix_expr()
+{
+ int type = primary_expr();
+ if (accept("[")) {
+ binary1(type); /* pop %ebx ; add %ebx,%eax */
+ binary2(expression(), 3, "\x5b\x01\xd8");
+ expect("]");
+ type = 1;
+ }
+ else if (accept("(")) {
+ int s = stack_pos;
+ be_push();
+ stack_pos = stack_pos + 1;
+ if (accept(")") == 0) {
+ promote(expression());
+ be_push();
+ stack_pos = stack_pos + 1;
+ while (accept(",")) {
+ promote(expression());
+ be_push();
+ stack_pos = stack_pos + 1;
+ }
+ expect(")");
+ }
+ emit(7, "\x8b\x84\x24...."); /* mov (n * 4)(%esp),%eax */
+ save_int(code + codepos - 4, (stack_pos - s - 1) << 2);
+ emit(2, "\xff\xd0"); /* call *%eax */
+ be_pop(stack_pos - s);
+ stack_pos = s;
+ type = 3;
+ }
+ return type;
+}
+
+/*
+ * additive-expr:
+ * postfix-expr
+ * additive-expr + postfix-expr
+ * additive-expr - postfix-expr
+ */
+int additive_expr()
+{
+ int type = postfix_expr();
+ while (1) {
+ if (accept("+")) {
+ binary1(type); /* pop %ebx ; add %ebx,%eax */
+ type = binary2(postfix_expr(), 3, "\x5b\x01\xd8");
+ }
+ else if (accept("-")) {
+ binary1(type); /* pop %ebx ; sub %eax,%ebx ; mov %ebx,%eax */
+ type = binary2(postfix_expr(), 5, "\x5b\x29\xc3\x89\xd8");
+ }
+ else
+ return type;
+ }
+}
+
+/*
+ * shift-expr:
+ * additive-expr
+ * shift-expr << additive-expr
+ * shift-expr >> additive-expr
+ */
+int shift_expr()
+{
+ int type = additive_expr();
+ while (1) {
+ if (accept("<<")) {
+ binary1(type); /* mov %eax,%ecx ; pop %eax ; shl %cl,%eax */
+ type = binary2(additive_expr(), 5, "\x89\xc1\x58\xd3\xe0");
+ }
+ else if (accept(">>")) {
+ binary1(type); /* mov %eax,%ecx ; pop %eax ; sar %cl,%eax */
+ type = binary2(additive_expr(), 5, "\x89\xc1\x58\xd3\xf8");
+ }
+ else
+ return type;
+ }
+}
+
+/*
+ * relational-expr:
+ * shift-expr
+ * relational-expr <= shift-expr
+ */
+int relational_expr()
+{
+ int type = shift_expr();
+ while (accept("<=")) {
+ binary1(type);
+ /* pop %ebx ; cmp %eax,%ebx ; setle %al ; movzbl %al,%eax */
+ type = binary2(shift_expr(),
+ 9, "\x5b\x39\xc3\x0f\x9e\xc0\x0f\xb6\xc0");
+ }
+ return type;
+}
+
+/*
+ * equality-expr:
+ * relational-expr
+ * equality-expr == relational-expr
+ * equality-expr != relational-expr
+ */
+int equality_expr()
+{
+ int type = relational_expr();
+ while (1) {
+ if (accept("==")) {
+ binary1(type);
+ /* pop %ebx ; cmp %eax,%ebx ; sete %al ; movzbl %al,%eax */
+ type = binary2(relational_expr(),
+ 9, "\x5b\x39\xc3\x0f\x94\xc0\x0f\xb6\xc0");
+ }
+ else if (accept("!=")) {
+ binary1(type);
+ /* pop %ebx ; cmp %eax,%ebx ; setne %al ; movzbl %al,%eax */
+ type = binary2(relational_expr(),
+ 9, "\x5b\x39\xc3\x0f\x95\xc0\x0f\xb6\xc0");
+ }
+ else
+ return type;
+ }
+}
+
+/*
+ * bitwise-and-expr:
+ * equality-expr
+ * bitwise-and-expr & equality-expr
+ */
+int bitwise_and_expr()
+{
+ int type = equality_expr();
+ while (accept("&")) {
+ binary1(type); /* pop %ebx ; and %ebx,%eax */
+ type = binary2(equality_expr(), 3, "\x5b\x21\xd8");
+ }
+ return type;
+}
+
+/*
+ * bitwise-or-expr:
+ * bitwise-and-expr
+ * bitwise-and-expr | bitwise-or-expr
+ */
+int bitwise_or_expr()
+{
+ int type = bitwise_and_expr();
+ while (accept("|")) {
+ binary1(type); /* pop %ebx ; or %ebx,%eax */
+ type = binary2(bitwise_and_expr(), 3, "\x5b\x09\xd8");
+ }
+ return type;
+}
+
+/*
+ * expression:
+ * bitwise-or-expr
+ * bitwise-or-expr = expression
+ */
+int expression()
+{
+ int type = bitwise_or_expr();
+ if (accept("=")) {
+ be_push();
+ stack_pos = stack_pos + 1;
+ promote(expression());
+ if (type == 2)
+ emit(3, "\x5b\x89\x03"); /* pop %ebx ; mov %eax,(%ebx) */
+ else
+ emit(3, "\x5b\x88\x03"); /* pop %ebx ; mov %al,(%ebx) */
+ stack_pos = stack_pos - 1;
+ type = 3;
+ }
+ return type;
+}
+
+/*
+ * type-name:
+ * char *
+ * int
+ */
+void type_name()
+{
+ get_token();
+ while (accept("*")) {
+ }
+}
+
+/*
+ * statement:
+ * { statement-list-opt }
+ * type-name identifier ;
+ * type-name identifier = expression;
+ * if ( expression ) statement
+ * if ( expression ) statement else statement
+ * while ( expression ) statement
+ * return ;
+ * expr ;
+ */
+void statement()
+{
+ int p1;
+ int p2;
+ if (accept("{")) {
+ int n = table_pos;
+ int s = stack_pos;
+ while (accept("}") == 0)
+ statement();
+ table_pos = n;
+ be_pop(stack_pos - s);
+ stack_pos = s;
+ }
+ else if (peek("char") | peek("int")) {
+ type_name();
+ sym_declare(token, 'L', stack_pos);
+ get_token();
+ if (accept("="))
+ promote(expression());
+ expect(";");
+ be_push();
+ stack_pos = stack_pos + 1;
+ }
+ else if (accept("if")) {
+ expect("(");
+ promote(expression());
+ emit(8, "\x85\xc0\x0f\x84...."); /* test %eax,%eax ; je ... */
+ p1 = codepos;
+ expect(")");
+ statement();
+ emit(5, "\xe9...."); /* jmp ... */
+ p2 = codepos;
+ save_int(code + p1 - 4, codepos - p1);
+ if (accept("else"))
+ statement();
+ save_int(code + p2 - 4, codepos - p2);
+ }
+ else if (accept("while")) {
+ expect("(");
+ p1 = codepos;
+ promote(expression());
+ emit(8, "\x85\xc0\x0f\x84...."); /* test %eax,%eax ; je ... */
+ p2 = codepos;
+ expect(")");
+ statement();
+ emit(5, "\xe9...."); /* jmp ... */
+ save_int(code + codepos - 4, p1 - codepos);
+ save_int(code + p2 - 4, codepos - p2);
+ }
+ else if (accept("return")) {
+ if (peek(";") == 0)
+ promote(expression());
+ expect(";");
+ be_pop(stack_pos);
+ emit(1, "\xc3"); /* ret */
+ }
+ else {
+ expression();
+ expect(";");
+ }
+}
+
+/*
+ * program:
+ * declaration
+ * declaration program
+ *
+ * declaration:
+ * type-name identifier ;
+ * type-name identifier ( parameter-list ) ;
+ * type-name identifier ( parameter-list ) statement
+ *
+ * parameter-list:
+ * parameter-declaration
+ * parameter-list, parameter-declaration
+ *
+ * parameter-declaration:
+ * type-name identifier-opt
+ */
+void program()
+{
+ int current_symbol;
+ while (token[0]) {
+ type_name();
+ current_symbol = sym_declare_global(token);
+ get_token();
+ if (accept(";")) {
+ sym_define_global(current_symbol);
+ emit(4, "\x00\x00\x00\x00");
+ }
+ else if (accept("(")) {
+ int n = table_pos;
+ number_of_args = 0;
+ while (accept(")") == 0) {
+ number_of_args = number_of_args + 1;
+ type_name();
+ if (peek(")") == 0) {
+ sym_declare(token, 'A', number_of_args);
+ get_token();
+ }
+ accept(","); /* ignore trailing comma */
+ }
+ if (accept(";") == 0) {
+ sym_define_global(current_symbol);
+ statement();
+ emit(1, "\xc3"); /* ret */
+ }
+ table_pos = n;
+ }
+ else
+ error();
+ }
+}
diff --git a/test/test2/cleanup.sh b/test/test2/cleanup.sh
new file mode 100755
index 0000000..083e6ad
--- /dev/null
+++ b/test/test2/cleanup.sh
@@ -0,0 +1,6 @@
+#! /bin/sh
+rm -f test/test2/cc0.M1
+rm -f test/test2/cc0.hex2
+rm -f test/test2/cc1
+rm -f test/test2/cc2
+exit 0
diff --git a/test/test2/hello.sh b/test/test2/hello.sh
new file mode 100755
index 0000000..2bf379e
--- /dev/null
+++ b/test/test2/hello.sh
@@ -0,0 +1,26 @@
+#! /bin/sh
+set -ex
+# Build the test
+bin/M2-Planet -f test/test2/cc.h -f test/test2/cc1.c -f test/test2/cc.c -o test/test2/cc0.M1 || exit 1
+# Macro assemble with libc written in M1-Macro
+M1 -f test/common_x86/x86_defs.M1 -f test/common_x86/libc.M1 -f test/test2/cc0.M1 --LittleEndian --Architecture 1 -o test/test2/cc0.hex2 || exit 2
+# Resolve all linkages
+hex2 -f test/common_x86/ELF-i386.hex2 -f test/test2/cc0.hex2 --LittleEndian --Architecture 1 --BaseAddress 0x8048000 -o test/results/test2-binary --exec_enable || exit 3
+
+# Ensure binary works if host machine supports test
+if [ "$(get_machine)" = "x86_64" ]
+then
+ # Verify that the compiled program can compile itself
+ ./test/results/test2-binary < test/test0/cc500.c >| test/test2/cc1 || exit 4
+ out=$(sha256sum -c test/test2/proof0.answer)
+ [ "$out" = "test/test2/cc1: OK" ] || exit 5
+
+ # Make it executable
+ exec_enable test/test2/cc1
+
+ # Verify that the result of it compiling itself can compile itself
+ ./test/test2/cc1 < test/test0/cc500.c >| test/test2/cc2 || exit 6
+ out=$(sha256sum -c test/test2/proof1.answer)
+ [ "$out" = "test/test2/cc2: OK" ] || exit 7
+fi
+exit 0
diff --git a/test/test2/proof0.answer b/test/test2/proof0.answer
new file mode 100644
index 0000000..a1b87d1
--- /dev/null
+++ b/test/test2/proof0.answer
@@ -0,0 +1 @@
+3987b6a29775e015c11e35008ad0d0b3ee57f32655dc26f2360000b345139f54 test/test2/cc1
diff --git a/test/test2/proof1.answer b/test/test2/proof1.answer
new file mode 100644
index 0000000..5bf6885
--- /dev/null
+++ b/test/test2/proof1.answer
@@ -0,0 +1 @@
+3987b6a29775e015c11e35008ad0d0b3ee57f32655dc26f2360000b345139f54 test/test2/cc2