From dd1daf92e4bd6c33b05b6257ca1f0c8c18b7e441 Mon Sep 17 00:00:00 2001 From: Jan Nieuwenhuizen Date: Tue, 13 Dec 2016 19:58:34 +0100 Subject: [PATCH] Extend Scheme reader, reduce C reader dependency. * mes.c (bload_env): Mark as internal. (load_env): Likewise. Load Scheme reader from source. Remove dumping. (dump): New function. * (vm_begin_env): Allow for gc while read_input_file_env. (mes_builtins): Add *dot*. (read_input_file_env)[!READER]: Invoke read-input-file. * module/mes/read-0.mes (read-env): New function. (read-word): Support quasisyntax. Remove usage of ' thoughout. * module/mes/repl.mes (repl): Use read instead of read-env. * guile/mes.scm (environment): Add *dot*. * guile/reader.mes: Update. * NEWS: Update. --- NEWS | 6 ++++ guile/mes.scm | 1 + guile/reader.mes | 56 ++++++++++++++--------------- mes.c | 84 +++++++++++++++++++++++++------------------ module/mes/read-0.mes | 74 +++++++++++++++++++++++--------------- module/mes/repl.mes | 6 ++-- reader.c | 26 +++++++------- 7 files changed, 146 insertions(+), 107 deletions(-) diff --git a/NEWS b/NEWS index 71cdabd9..aeda4e79 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,12 @@ Copyright © 2016 Jan Nieuwenhuizen Please send Mes bug reports to janneke@gnu.org. +* Changes in 0.4 since 0.3 +** Core +*** Smaller C-reader +The C-reader needs only support reading of words and lists +(s-expressions), line-comments. Quoting, characters, strings, +block-comments are all handled by the Scheme reader later. * Changes in 0.3 since 0.2 ** Core *** Number-based rather than pointer-based cells. diff --git a/guile/mes.scm b/guile/mes.scm index 5128df03..d9830a5e 100755 --- a/guile/mes.scm +++ b/guile/mes.scm @@ -211,6 +211,7 @@ exec guile -L $(pwd) -e '(mes)' -s "$0" "$@" (exit . guile:exit) (*macro* . (guile:list)) + (*dot* . '.) ;; (stderr . stderr)))) diff --git a/guile/reader.mes b/guile/reader.mes index bb42194b..d1fc56fe 100644 --- a/guile/reader.mes +++ b/guile/reader.mes @@ -37,7 +37,7 @@ ;; * read characters, quote, strings (define (read) - (read-word (read-byte) '() (current-module))) + (read-word (read-byte) (list) (current-module))) (define (read-input-file) (define (helper x) @@ -46,18 +46,18 @@ (helper (read))) (define-macro (cond . clauses) - (list 'if (null? clauses) *unspecified* + (list (quote if) (null? clauses) *unspecified* (if (null? (cdr clauses)) - (list 'if (car (car clauses)) - (list (cons 'lambda (cons '() (cons (car (car clauses)) (cdr (car clauses)))))) + (list (quote if) (car (car clauses)) + (list (cons (quote lambda) (cons (list) (cons (car (car clauses)) (cdr (car clauses)))))) *unspecified*) - (if (eq? (car (cadr clauses)) 'else) - (list 'if (car (car clauses)) - (list (cons 'lambda (cons '() (car clauses)))) - (list (cons 'lambda (cons '() (cons *unspecified* (cdr (cadr clauses))))))) - (list 'if (car (car clauses)) - (list (cons 'lambda (cons '() (car clauses)))) - (cons 'cond (cdr clauses))))))) + (if (eq? (car (cadr clauses)) (quote else)) + (list (quote if) (car (car clauses)) + (list (cons (quote lambda) (cons (list) (car clauses)))) + (list (cons (quote lambda) (cons (list) (cons *unspecified* (cdr (cadr clauses))))))) + (list (quote if) (car (car clauses)) + (list (cons (quote lambda) (cons (list) (car clauses)))) + (cons (quote cond) (cdr clauses))))))) (define (eat-whitespace) (cond @@ -88,21 +88,21 @@ (define (read-list a) (eat-whitespace) - (if (eq? (peek-byte) 41) (begin (read-byte) '()) + (if (eq? (peek-byte) 41) (begin (read-byte) (list)) ((lambda (w) - (if (eq? w '.) (car (read-list a)) + (if (eq? w *dot*) (car (read-list a)) (cons w (read-list a)))) - (read-word (read-byte) '() a)))) + (read-word (read-byte) (list) a)))) ;;(define (read-string)) (define (lookup-char c a) - (lookup (cons (integer->char c) '()) a)) + (lookup (cons (integer->char c) (list)) a)) (define (read-word c w a) (cond - ((eq? c -1) '()) - ((eq? c 10) (if (null? w) (read-word (read-byte) '() a) + ((eq? c -1) (list)) + ((eq? c 10) (if (null? w) (read-word (read-byte) (list) a) (lookup w a))) ((eq? c 32) (read-word 10 w a)) ((eq? c 34) (if (null? w) (read-string) @@ -114,28 +114,28 @@ ((eq? (peek-byte) 40) (read-byte) (list->vector (read-list a))) ((eq? (peek-byte) 92) (read-byte) (read-character)) ((eq? (peek-byte) 120) (read-byte) (read-hex)) - (else (read-word (read-byte) (append2 w (cons (integer->char c) '())) a)))) - ((eq? c 39) (if (null? w) (cons (lookup (cons (integer->char c) '()) a) - (cons (read-word (read-byte) w a) '())) + (else (read-word (read-byte) (append2 w (cons (integer->char c) (list))) a)))) + ((eq? c 39) (if (null? w) (cons (lookup (cons (integer->char c) (list)) a) + (cons (read-word (read-byte) w a) (list))) (begin (unread-byte c) (lookup w a)))) ((eq? c 40) (if (null? w) (read-list a) (begin (unread-byte c) (lookup w a)))) - ((eq? c 41) (if (null? w) (cons (lookup (cons (integer->char c) '()) a) - (cons (read-word (read-byte) w a) '())) + ((eq? c 41) (if (null? w) (cons (lookup (cons (integer->char c) (list)) a) + (cons (read-word (read-byte) w a) (list))) (begin (unread-byte c) (lookup w a)))) ((eq? c 44) (cond ((eq? (peek-byte) 64) (begin (read-byte) (cons - (lookup (symbol->list 'unquote-splicing) a) - (cons (read-word (read-byte) w a) '())))) + (lookup (symbol->list (quote unquote-splicing)) a) + (cons (read-word (read-byte) w a) (list))))) (else (cons (lookup-char c a) (cons (read-word (read-byte) w a) - '()))))) - ((eq? c 96) (cons (lookup-char c a) (cons (read-word (read-byte) w a) '()))) + (list)))))) + ((eq? c 96) (cons (lookup-char c a) (cons (read-word (read-byte) w a) (list)))) ((eq? c 59) (read-line-comment c) (read-word 10 w a)) - (else (read-word (read-byte) (append2 w (cons (integer->char c) '())) a)))) + (else (read-word (read-byte) (append2 w (cons (integer->char c) (list))) a)))) ;; ((lambda (p) - ;; ;;(display 'program=) (display p) (newline) + ;; ;;(display (quote program=)) (display p) (newline) ;; (begin-env p (current-module))) ;; (read-input-file)) ) diff --git a/mes.c b/mes.c index 1d3f1765..bee6a394 100644 --- a/mes.c +++ b/mes.c @@ -33,8 +33,13 @@ #define QUASISYNTAX 0 #define ENV_CACHE 0 #define FIXED_PRIMITIVES 1 +#define READER 1 +#if READER +int ARENA_SIZE = 1000000; +#else int ARENA_SIZE = 100000; +#endif int MAX_ARENA_SIZE = 20000000; int GC_SAFETY = 100; @@ -141,6 +146,7 @@ scm scm_symbol_unsyntax_splicing = {SYMBOL, "unsyntax-splicing"}; scm scm_symbol_call_with_values = {SYMBOL, "call-with-values"}; scm scm_symbol_current_module = {SYMBOL, "current-module"}; scm scm_symbol_primitive_load = {SYMBOL, "primitive-load"}; +scm scm_symbol_read_input_file = {SYMBOL, "read-input-file"}; scm scm_symbol_the_unquoters = {SYMBOL, "*the-unquoters*"}; @@ -527,7 +533,10 @@ vm_begin_env () if (caar (r1) == cell_symbol_begin) r1 = append2 (cdar (r1), cdr (r1)); else if (caar (r1) == cell_symbol_primitive_load) - r1 = append2 (read_input_file_env (r0), cdr (r1)); + { + SCM f = read_input_file_env (r0); + r1 = append2 (f, cdr (r1)); + } } r = eval_env (car (r1), r0); r1 = CDR (r1); @@ -1130,6 +1139,7 @@ mes_builtins (SCM a) a = acons (cell_symbol_the_unquoters, the_unquoters, a); #endif + a = add_environment (a, "*dot*", cell_dot); a = add_environment (a, "*foo-bar-baz*", cell_nil); // FIXME: some off-by one? return a; @@ -1184,39 +1194,29 @@ read_input_file_env_ (SCM e, SCM a) SCM read_input_file_env (SCM a) { + r0 = a; +#if READER return read_input_file_env_ (read_env (r0), r0); +#endif + return apply_env (cell_symbol_read_input_file, cell_nil, r0); } -bool g_dump_p = false; - SCM -load_env (SCM a) +load_env (SCM a) ///((internal)) { + r0 =a; +#if !READER + g_stdin = fopen ("module/mes/read-0.mes", "r"); + g_stdin = g_stdin ? g_stdin : fopen (PREFIX "module/mes/read-0.mes", "r"); +#endif + if (!g_function) r0 = mes_builtins (r0); r3 = read_input_file_env (r0); - if (g_dump_p && !g_function) - { - r1 = g_symbols; - SCM frame = cons (r1, cons (r2, cons (r3, cons (r0, cell_nil)))); - stack = cons (frame, stack); - stack = gc (stack); - gc_frame (stack); - char *p = (char*)g_cells; - fputc ('M', stdout); - fputc ('E', stdout); - fputc ('S', stdout); - fputc (stack >> 8, stdout); - fputc (stack % 256, stdout); - for (int i=0; i> 8, stdout); + fputc (stack % 256, stdout); + for (int i=0; i 1 && !strcmp (argv[1], "--dump")) g_dump_p = true; + if (getenv ("MES_ARENA")) ARENA_SIZE = atoi (getenv ("MES_ARENA")); if (argc > 1 && !strcmp (argv[1], "--help")) return puts ("Usage: mes < FILE\n"); if (argc > 1 && !strcmp (argv[1], "--version")) return puts ("Mes 0.3\n"); g_stdin = stdin; - SCM a = mes_environment (); - if (argc > 1 && !strcmp (argv[1], "--load")) - display_ (stderr, bload_env (a)); - else - display_ (stderr, load_env (a)); + r0 = mes_environment (); + SCM program = (argc > 1 && !strcmp (argv[1], "--load")) + ? bload_env (r0) : load_env (r0); + if (argc > 1 && !strcmp (argv[1], "--dump")) return dump (); + display_ (stderr, begin_env (program, r0)); fputs ("", stderr); gc (stack); if (g_debug) fprintf (stderr, "\nstats: [%d]\n", g_free.value); diff --git a/module/mes/read-0.mes b/module/mes/read-0.mes index f36fff67..8d59bb17 100644 --- a/module/mes/read-0.mes +++ b/module/mes/read-0.mes @@ -42,7 +42,10 @@ ;; * read characters, quote, strings (define (read) - (read-word (read-byte) '() (current-module))) + (read-word (read-byte) (list) (current-module))) + + (define (read-env a) + (read-word (read-byte) (list) a)) (define (read-input-file) (define (helper x) @@ -51,18 +54,18 @@ (helper (read))) (define-macro (cond . clauses) - (list 'if (null? clauses) *unspecified* + (list (quote if) (null? clauses) *unspecified* (if (null? (cdr clauses)) - (list 'if (car (car clauses)) - (list (cons 'lambda (cons '() (cons (car (car clauses)) (cdr (car clauses)))))) + (list (quote if) (car (car clauses)) + (list (cons (quote lambda) (cons (list) (cons (car (car clauses)) (cdr (car clauses)))))) *unspecified*) - (if (eq? (car (cadr clauses)) 'else) - (list 'if (car (car clauses)) - (list (cons 'lambda (cons '() (car clauses)))) - (list (cons 'lambda (cons '() (cons *unspecified* (cdr (cadr clauses))))))) - (list 'if (car (car clauses)) - (list (cons 'lambda (cons '() (car clauses)))) - (cons 'cond (cdr clauses))))))) + (if (eq? (car (cadr clauses)) (quote else)) + (list (quote if) (car (car clauses)) + (list (cons (quote lambda) (cons (list) (car clauses)))) + (list (cons (quote lambda) (cons (list) (cons *unspecified* (cdr (cadr clauses))))))) + (list (quote if) (car (car clauses)) + (list (cons (quote lambda) (cons (list) (car clauses)))) + (cons (quote cond) (cdr clauses))))))) (define (eat-whitespace) (cond @@ -93,21 +96,21 @@ (define (read-list a) (eat-whitespace) - (if (eq? (peek-byte) 41) (begin (read-byte) '()) + (if (eq? (peek-byte) 41) (begin (read-byte) (list)) ((lambda (w) - (if (eq? w '.) (car (read-list a)) + (if (eq? w *dot*) (car (read-list a)) (cons w (read-list a)))) - (read-word (read-byte) '() a)))) + (read-word (read-byte) (list) a)))) ;;(define (read-string)) (define (lookup-char c a) - (lookup (cons (integer->char c) '()) a)) + (lookup (cons (integer->char c) (list)) a)) (define (read-word c w a) (cond - ((eq? c -1) '()) - ((eq? c 10) (if (null? w) (read-word (read-byte) '() a) + ((eq? c -1) (list)) + ((eq? c 10) (if (null? w) (read-word (read-byte) (list) a) (lookup w a))) ((eq? c 32) (read-word 10 w a)) ((eq? c 34) (if (null? w) (read-string) @@ -119,27 +122,42 @@ ((eq? (peek-byte) 40) (read-byte) (list->vector (read-list a))) ((eq? (peek-byte) 92) (read-byte) (read-character)) ((eq? (peek-byte) 120) (read-byte) (read-hex)) - (else (read-word (read-byte) (append w (cons (integer->char c) '())) a)))) - ((eq? c 39) (if (null? w) (cons (lookup (cons (integer->char c) '()) a) - (cons (read-word (read-byte) w a) '())) + ((eq? (peek-byte) 44) + (read-byte) + (cond ((eq? (peek-byte) 64) + (read-byte) + (cons (lookup (symbol->list (quote unsyntax-splicing)) a) + (cons (read-word (read-byte) w a) (list)))) + (else + (cons (lookup (symbol->list (quote unsyntax)) a) + (cons (read-word (read-byte) w a) (list)))))) + ((eq? (peek-byte) 39) (read-byte) + (cons (lookup (cons (integer->char 35) (cons (integer->char 39) (list))) a) + (cons (read-word (read-byte) w a) (list)))) + ((eq? (peek-byte) 96) (read-byte) + (cons (lookup (cons (integer->char 35) (cons (integer->char 96) (list))) a) + (cons (read-word (read-byte) w a) (list)))) + (else (read-word (read-byte) (append2 w (cons (integer->char c) (list))) a)))) + ((eq? c 39) (if (null? w) (cons (lookup (cons (integer->char c) (list)) a) + (cons (read-word (read-byte) w a) (list))) (begin (unread-byte c) (lookup w a)))) ((eq? c 40) (if (null? w) (read-list a) (begin (unread-byte c) (lookup w a)))) - ((eq? c 41) (if (null? w) (cons (lookup (cons (integer->char c) '()) a) - (cons (read-word (read-byte) w a) '())) + ((eq? c 41) (if (null? w) (cons (lookup (cons (integer->char c) (list)) a) + (cons (read-word (read-byte) w a) (list))) (begin (unread-byte c) (lookup w a)))) ((eq? c 44) (cond ((eq? (peek-byte) 64) (begin (read-byte) (cons - (lookup (symbol->list 'unquote-splicing) a) - (cons (read-word (read-byte) w a) '())))) + (lookup (symbol->list (quote unquote-splicing)) a) + (cons (read-word (read-byte) w a) (list))))) (else (cons (lookup-char c a) (cons (read-word (read-byte) w a) - '()))))) - ((eq? c 96) (cons (lookup-char c a) (cons (read-word (read-byte) w a) '()))) + (list)))))) + ((eq? c 96) (cons (lookup-char c a) (cons (read-word (read-byte) w a) (list)))) ((eq? c 59) (read-line-comment c) (read-word 10 w a)) - (else (read-word (read-byte) (append w (cons (integer->char c) '())) a)))) + (else (read-word (read-byte) (append2 w (cons (integer->char c) (list))) a)))) ((lambda (p) - ;;(display 'program=) (display p) (newline) + ;;(display (quote scheme-program=)) (display p) (newline) (begin-env p (current-module))) (read-input-file))) diff --git a/module/mes/repl.mes b/module/mes/repl.mes index 2ec4f1f7..ab220fab 100644 --- a/module/mes/repl.mes +++ b/module/mes/repl.mes @@ -114,7 +114,7 @@ along with Mes. If not, see . (print-sexp? #t)) (define (expand) - (let ((sexp (read-env (current-module)))) + (let ((sexp (read))) (when #t print-sexp? (display "[sexp=") (display sexp) @@ -124,7 +124,7 @@ along with Mes. If not, see . (newline))) (define (scexpand) - (let ((sexp (read-env (current-module)))) + (let ((sexp (read))) (when #t print-sexp? (display "[sexp=") (display sexp) @@ -142,7 +142,7 @@ along with Mes. If not, see . (display (assoc-ref topic-alist topic)))) (define (use a) (lambda () - (let ((module (read-env (current-module)))) + (let ((module (read))) (mes-load-module-env module a)))) (define (meta command a) (let ((command-alist `((expand . ,expand) diff --git a/reader.c b/reader.c index f82ff649..46f293a2 100644 --- a/reader.c +++ b/reader.c @@ -36,13 +36,13 @@ unread_char (SCM c) return ungetchar (VALUE (c)); } -SCM -unget_char (SCM c) +int +read_block_comment (int c) { - assert (TYPE (c) == NUMBER || TYPE (c) == CHAR); - ungetchar (VALUE (c)); - return c; + if (c == '!' && peekchar () == '#') return getchar (); + return read_block_comment (getchar ()); } + int read_line_comment (int c) { @@ -50,12 +50,6 @@ read_line_comment (int c) return read_line_comment (getchar ()); } -int -read_block_comment (int c) -{ - if (c == '!' && peekchar () == '#') return getchar (); - return read_block_comment (getchar ()); -} SCM lookup_char (int c, SCM a); @@ -67,12 +61,14 @@ read_word (int c, SCM w, SCM a) if (c == '\n' && VALUE (car (w)) == '.' && cdr (w) == cell_nil) return cell_dot; if (c == EOF || c == '\n') return lookup (w, a); if (c == ' ') return read_word ('\n', w, a); - if (c == '"' && w == cell_nil) return read_string (); - if (c == '"') {ungetchar (c); return lookup (w, a);} if (c == '(' && w == cell_nil) return read_list (a); if (c == '(') {ungetchar (c); return lookup (w, a);} if (c == ')' && w == cell_nil) {ungetchar (c); return cell_nil;} if (c == ')') {ungetchar (c); return lookup (w, a);} + if (c == ';') {read_line_comment (c); return read_word ('\n', w, a);} +#if READER + if (c == '"' && w == cell_nil) return read_string (); + if (c == '"') {ungetchar (c); return lookup (w, a);} if (c == ',' && peekchar () == '@') {getchar (); return cons (lookup (STRING (cell_symbol_unquote_splicing), a), cons (read_word (getchar (), w, a), cell_nil));} @@ -93,11 +89,11 @@ read_word (int c, SCM w, SCM a) c = getchar (); return cons (lookup (cons (make_char ('#'), cons (make_char (c), cell_nil)), a), cons (read_word (getchar (), w, a), cell_nil));} - if (c == ';') {read_line_comment (c); return read_word ('\n', w, a);} if (c == '#' && peekchar () == 'x') {getchar (); return read_hex ();} if (c == '#' && peekchar () == '\\') {getchar (); return read_character ();} if (c == '#' && w == cell_nil && peekchar () == '(') {getchar (); return list_to_vector (read_list (a));} if (c == '#' && peekchar () == '!') {getchar (); read_block_comment (getchar ()); return read_word (getchar (), w, a);} +#endif //READER return read_word (getchar (), append2 (w, cons (make_char (c), cell_nil)), a); } @@ -183,7 +179,9 @@ eat_whitespace (int c) { while (c == ' ' || c == '\t' || c == '\n') c = getchar (); if (c == ';') return eat_whitespace (read_line_comment (c)); +#if READER if (c == '#' && peekchar () == '!') {getchar (); read_block_comment (getchar ()); return eat_whitespace (getchar ());} +#endif return c; }