From 16f678a1582943e57415516833506f710eb46600 Mon Sep 17 00:00:00 2001 From: Jan Nieuwenhuizen Date: Tue, 25 Oct 2016 16:50:19 +0200 Subject: [PATCH] Implement strings and symbols as list of characters [WAS: c-string]. * mes.c (scm_t): Add string field. (make_string, internal_lookup_symbol, internal_make_symbol, make_symbol, lookup, readword): Take scm*. Update callers. (display_helper): Support string field. (append_char): New function. (readstring): Use it. Produce scm*. (cstring_to_list): New function. (add_environment, internal_make_symbol): Use it. (list_of_char_equal_p): New function. (internal_lookup_symbol): Use it. * lib.c (list_ref): New function. * string.c (string_ref): Use it. (string, string_append, string_length, substring, number_to_string, string_to_symbol, symbol_to_string): Update to list-of-characters implementation. --- lib.c | 10 +++ mes.c | 192 +++++++++++++++++++++++++++++++++++-------------------- string.c | 65 ++++++++----------- 3 files changed, 161 insertions(+), 106 deletions(-) diff --git a/lib.c b/lib.c index f1cb8e94..b252ef1a 100644 --- a/lib.c +++ b/lib.c @@ -56,6 +56,16 @@ list (scm *x) ///((args . n)) return x; } +scm * +list_ref (scm *x, scm *k) +{ + assert (x->type == PAIR); + assert (k->type == NUMBER); + int n = k->value; + while (n-- && x->cdr != &scm_nil) x = x->cdr; + return x != &scm_nil ? x->car : &scm_undefined; +} + scm * vector_to_list (scm *v) { diff --git a/mes.c b/mes.c index 974e5a6d..b2b54565 100644 --- a/mes.c +++ b/mes.c @@ -18,7 +18,6 @@ * along with Mes. If not, see . */ -#define STRING_MAX 2048 #define _GNU_SOURCE #include #include @@ -44,6 +43,7 @@ typedef struct scm_t { enum type type; union { char const *name; + struct scm_t* string; struct scm_t* car; struct scm_t* ref; int length; @@ -376,7 +376,9 @@ builtin_eval (scm *e, scm *a) if (e->type == SYMBOL) { scm *y = assq_ref_cache (e, a); if (y == &scm_undefined) { - fprintf (stderr, "eval: unbound variable: %s\n", e->name); + fprintf (stderr, "eval: unbound variable:"); + display_ (stderr, e); + fprintf (stderr, "\n"); assert (!"unbound variable"); } return y; @@ -404,9 +406,12 @@ builtin_eval (scm *e, scm *a) return define (e, a); #else if (e->car == &symbol_define) { - fprintf (stderr, "C DEFINE: %s\n", e->cdr->car->type == SYMBOL - ? e->cdr->car->name - : e->cdr->car->car->name); + fprintf (stderr, "C DEFINE: "); + display_ (stderr, + e->cdr->car->type == SYMBOL + ? e->cdr->car->string + : e->cdr->car->car->string); + fprintf (stderr, "\n"); } assert (e->car != &symbol_define); assert (e->car != &symbol_define_macro); @@ -525,7 +530,7 @@ make_macro (scm *name, scm *x) scm *p = alloc (1); p->type = MACRO; p->macro = x; - p->name = name->name; + p->string = name->string; return p; } @@ -548,38 +553,68 @@ make_ref (scm *x) } scm * -make_string (char const *s) +make_string (scm *x) { scm *p = alloc (1); p->type = STRING; - p->name = strdup (s); + p->string = x; + return p; +} + +scm * +cstring_to_list (char const* s) +{ + scm *p = &scm_nil; + while (s && *s) + p = append2 (p, cons (make_char (*s++), &scm_nil)); return p; } scm *symbols = 0; scm * -internal_lookup_symbol (char const *s) +list_of_char_equal_p (scm *a, scm *b) +{ + while (a != &scm_nil && b != &scm_nil && a->car->value == b->car->value) { + assert (a->car->type == CHAR); + assert (b->car->type == CHAR); + a = a->cdr; + b = b->cdr; + } + return (a == &scm_nil && b == &scm_nil) ? &scm_t : &scm_f; +} + +scm * +internal_lookup_symbol (scm *s) { scm *x = symbols; - while (x && strcmp (s, x->car->name)) x = x->cdr; + while (x) { + // FIXME: .string and .name is the same field; .name is used as a + // handy static field initializer. A string can only be mistaken + // for a cell with type == PAIR for the one character long, + // zero-padded #\etx. + if (x->car->string->type != PAIR) + x->car->string = cstring_to_list (x->car->name); + if (list_of_char_equal_p (x->car->string, s) == &scm_t) break; + x = x->cdr; + } if (x) x = x->car; return x; } scm * -internal_make_symbol (char const *s) +internal_make_symbol (scm *s) { scm *x = alloc (1); x->type = SYMBOL; - x->name = strdup (s); + x->string = s; x->value = 0; symbols = cons (x, symbols); return x; } scm * -make_symbol (char const *s) +make_symbol (scm *s) { scm *x = internal_lookup_symbol (s); return x ? x : internal_make_symbol (s); @@ -648,28 +683,44 @@ vector_set_x (scm *x, scm *i, scm *e) } scm * -lookup (char const *s, scm *a) +lookup (scm *s, scm *a) { - if (isdigit (*s) || (*s == '-' && isdigit (*(s+1)))) - return make_number (atoi (s)); - - scm *x; - x = internal_lookup_symbol (s); + if (isdigit (s->car->value) || (s->car->value == '-' && s->cdr != &scm_nil)) { + scm *p = s; + int sign = 1; + if (s->car->value == '-') { + sign = -1; + p = s->cdr; + } + int n = 0; + while (p != &scm_nil && isdigit (p->car->value)) { + n *= 10; + n += p->car->value - '0'; + p = p->cdr; + } + if (p == &scm_nil) return make_number (n * sign); + } + + scm *x = internal_lookup_symbol (s); if (x) return x; - if (*s == '\'') return &symbol_quote; - if (*s == '`') return &symbol_quasiquote; - if (*s == ',' && *(s+1) == '@') return &symbol_unquote_splicing; - if (*s == ',') return &symbol_unquote; - - if (*s == '#' && *(s+1) == '\'') return &symbol_syntax; - if (*s == '#' && *(s+1) == '`') return &symbol_quasisyntax; - if (*s == '#' && *(s+1) == ',' && *(s+2) == '@') return &symbol_unsyntax_splicing; - if (*s == '#' && *(s+1) == ',') return &symbol_unsyntax; - - if (!strcmp (s, "EOF")) { - fprintf (stderr, "mes: got EOF\n"); - return &scm_nil; // `EOF': eval program, which may read stdin + if (s->cdr == &scm_nil) { + if (s->car->value == '\'') return &symbol_quote; + if (s->car->value == '`') return &symbol_quasiquote; + if (s->car->value == ',') return &symbol_unquote; + } + else if (s->cdr->cdr == &scm_nil) { + if (s->car->value == ',' && s->cdr->car->value == '@') return &symbol_unquote_splicing; + if (s->car->value == '#' && s->cdr->car->value == '\'') return &symbol_syntax; + if (s->car->value == '#' && s->cdr->car->value == '`') return &symbol_quasisyntax; + if (s->car->value == '#' && s->cdr->car->value == ',') return &symbol_unsyntax; + } + else if (s->cdr->cdr->cdr == &scm_nil) { + if (s->car->value == '#' && s->cdr->car->value == ',' && s->cdr->cdr->car->value == '@') return &symbol_unsyntax_splicing; + if (s->car->value == 'E' && s->cdr->car->value == 'O' && s->cdr->cdr->car->value == 'F') { + fprintf (stderr, "mes: got EOF\n"); + return &scm_nil; // `EOF': eval program, which may read stdin + } } return internal_make_symbol (s); @@ -678,10 +729,7 @@ lookup (char const *s, scm *a) scm * lookup_char (int c, scm *a) { - char buf[2]; - buf[0] = c; - buf[1] = 0; - return lookup (buf, a); + return lookup (cons (make_char (c), &scm_nil), a); } scm * @@ -774,7 +822,16 @@ display_helper (FILE* f, scm *x, bool cont, char const *sep, bool quote) } else if (x->type == REF) display_helper (f, x->ref, cont, "", true); else if (builtin_p (x) == &scm_t) fprintf (f, "#", x->name); - else if (pair_p (x) == &scm_f) fprintf (f, "%s", x->name); + else if (x->type != PAIR && x->string) { + scm *p = x->string; + assert (p); + while (p != &scm_nil) { + assert (p->car->type == CHAR); + fputc (p->car->value, f); + p = p->cdr; + } + } + else if (x->type != PAIR && x->name) fprintf (f, "%s", x->name); return &scm_unspecified; } @@ -843,53 +900,48 @@ readblock (int c) } scm * -readword (int c, char *w, scm *a) +readword (int c, scm *w, scm *a) { - if (c == EOF && !w) return &scm_nil; - if (c == '\n' && !w) return readword (getchar (), w, a); - if (c == '\n' && *w == '.' && w[1] == 0) return &scm_dot; + if (c == EOF && w == &scm_nil) return &scm_nil; + if (c == '\n' && w == &scm_nil) return readword (getchar (), w, a); + if (c == '\n' && w->car->value == '.' && w->cdr == &scm_nil) return &scm_dot; if (c == EOF || c == '\n') return lookup (w, a); if (c == ' ') return readword ('\n', w, a); - if (c == '"' && !w) return readstring (); + if (c == '"' && w == &scm_nil) return readstring (); if (c == '"') {ungetchar (c); return lookup (w, a);} - if (c == '(' && !w) return readlist (a); + if (c == '(' && w == &scm_nil) return readlist (a); if (c == '(') {ungetchar (c); return lookup (w, a);} - if (c == ')' && !w) {ungetchar (c); return &scm_nil;} + if (c == ')' && w == &scm_nil) {ungetchar (c); return &scm_nil;} if (c == ')') {ungetchar (c); return lookup (w, a);} - if (c == ',' && peekchar () == '@') {getchar (); return cons (lookup (",@", a), + if (c == ',' && peekchar () == '@') {getchar (); return cons (lookup (symbol_unquote_splicing.string, a), cons (readword (getchar (), w, a), &scm_nil));} if ((c == '\'' || c == '`' || c == ',') - && !w) {return cons (lookup_char (c, a), + && w == &scm_nil) {return cons (lookup_char (c, a), cons (readword (getchar (), w, a), &scm_nil));} - if (c == '#' && peekchar () == ',' && !w) { + if (c == '#' && peekchar () == ',' && w == &scm_nil) { getchar (); - if (peekchar () == '@'){getchar (); return cons (lookup ("#,@", a), + if (peekchar () == '@'){getchar (); return cons (lookup (symbol_unsyntax_splicing.string, a), cons (readword (getchar (), w, a), &scm_nil));} - return cons (lookup ("#,", a), cons (readword (getchar (), w, a), &scm_nil)); + return cons (lookup (symbol_unsyntax.string, a), cons (readword (getchar (), w, a), &scm_nil)); } if (c == '#' && (peekchar () == '\'' || peekchar () == '`') - && !w) {char buf[3] = "#"; buf[1] = getchar (); return cons (lookup (buf, a), + && w == &scm_nil) {return cons (lookup (cons (make_char ('#'), cons (make_char (getchar ()), &scm_nil)), a), cons (readword (getchar (), w, a), &scm_nil));} if (c == ';') {readcomment (c); return readword ('\n', w, a);} if (c == '#' && peekchar () == 'x') {getchar (); return read_hex ();} if (c == '#' && peekchar () == '\\') {getchar (); return read_character ();} - if (c == '#' && !w && peekchar () == '(') {getchar (); return list_to_vector (readlist (a));} + if (c == '#' && w == &scm_nil && peekchar () == '(') {getchar (); return list_to_vector (readlist (a));} if (c == '#' && peekchar () == '(') {ungetchar (c); return lookup (w, a);} if (c == '#' && peekchar () == '!') {getchar (); readblock (getchar ()); return readword (getchar (), w, a);} - char buf[STRING_MAX] = {0}; - char ch = c; - char *p = w ? w + strlen (w) : buf; - *p = ch; - *(p+1) = 0; - return readword (getchar (), w ? w : buf, a); + return readword (getchar (), append2 (w, cons (make_char (c), &scm_nil)), a); } scm * @@ -924,7 +976,7 @@ read_character () } else if (c >= 'a' && c <= 'z' && peekchar () >= 'a' && peekchar () <= 'z') { - char buf[STRING_MAX]; + char buf[10]; char *p = buf; *p++ = c; while (peekchar () >= 'a' && peekchar () <= 'z') { @@ -947,22 +999,26 @@ read_character () return make_char (c); } +scm * +append_char (scm *x, int i) +{ + return append2 (x, cons (make_char (i), &scm_nil)); +} + scm * readstring () { - char buf[STRING_MAX]; - char *p = buf; + scm *p = &scm_nil; int c = getchar (); while (true) { if (c == '"') break; - if (c == '\\' && peekchar () == '"') *p++ = getchar (); - else if (c == '\\' && peekchar () == 'n') {getchar (); *p++ = '\n';} + if (c == '\\' && peekchar () == '"') p = append_char (p, getchar ()); + else if (c == '\\' && peekchar () == 'n') {getchar (); p = append_char (p, '\n');} else if (c == EOF) assert (!"EOF in string"); - else *p++ = c; + else p = append_char (p, c); c = getchar (); } - *p = 0; - return make_string (buf); + return make_string (p); } int @@ -980,7 +1036,7 @@ readlist (scm *a) int c = getchar (); c = eat_whitespace (c); if (c == ')') return &scm_nil; - scm *w = readword (c, 0, a); + scm *w = readword (c, &scm_nil, a); if (w == &scm_dot) return car (readlist (a)); return cons (w, readlist (a)); @@ -989,13 +1045,13 @@ readlist (scm *a) scm * read_env (scm *a) { - return readword (getchar (), 0, a); + return readword (getchar (), &scm_nil, a); } scm * add_environment (scm *a, char const *name, scm *x) { - return cons (cons (make_symbol (name), x), a); + return cons (cons (make_symbol (cstring_to_list (name)), x), a); } scm * diff --git a/string.c b/string.c index d0f792a6..0d87ce46 100644 --- a/string.c +++ b/string.c @@ -21,54 +21,34 @@ scm * string (scm *x) ///((args . n)) { - char buf[STRING_MAX] = ""; - char *p = buf; - while (x != &scm_nil) - { - scm *s = car (x); - assert (s->type == CHAR); - *p++ = s->value; - x = cdr (x); - } - return make_string (buf); + return make_string (x); } scm * string_append (scm *x) ///((args . n)) { - char buf[STRING_MAX] = ""; - + scm *p = &scm_nil; while (x != &scm_nil) { scm *s = car (x); assert (s->type == STRING); - strcat (buf, s->name); + p = append2 (p, s->string); x = cdr (x); } - return make_string (buf); + return make_string (p); } scm * list_to_string (scm *x) { - char buf[STRING_MAX] = ""; - char *p = buf; - while (x != &scm_nil) - { - scm *s = car (x); - assert (s->type == CHAR); - *p++ = s->value; - x = cdr (x); - } - *p = 0; - return make_string (buf); + return make_string (x); } scm * string_length (scm *x) { assert (x->type == STRING); - return make_number (strlen (x->name)); + return make_number (length (x->string)->value); } scm * @@ -76,7 +56,8 @@ string_ref (scm *x, scm *k) { assert (x->type == STRING); assert (k->type == NUMBER); - return make_char (x->name[k->value]); + scm n = {NUMBER, .value=k->value}; + return make_char (list_ref (x->string, &n)->value); } scm * @@ -84,40 +65,48 @@ substring (scm *x) ///((args . n)) { assert (x->type == PAIR); assert (x->car->type == STRING); - char const *s = x->car->name; + scm *s = x->car->string; assert (x->cdr->car->type == NUMBER); int start = x->cdr->car->value; - int end = strlen (s); + int end = length (s)->value; if (x->cdr->cdr->type == PAIR) { assert (x->cdr->cdr->car->type == NUMBER); assert (x->cdr->cdr->car->value <= end); end = x->cdr->cdr->car->value; } - char buf[STRING_MAX]; - strncpy (buf, s+start, end - start); - buf[end-start] = 0; - return make_string (buf); + int n = end - start; + while (start--) s = s->cdr; + scm *p = &scm_nil; + while (n-- && s != &scm_nil) { + p = append2 (p, cons (make_char (s->car->value), &scm_nil)); + s = s->cdr; + } + return make_string (p); } scm * number_to_string (scm *x) { assert (x->type == NUMBER); - char buf[STRING_MAX]; - sprintf (buf,"%d", x->value); - return make_string (buf); + int n = x->value; + scm *p = n < 0 ? cons (make_char ('-'), &scm_nil) : &scm_nil; + do { + p = cons (make_char (n % 10 + '0'), p); + n = n / 10; + } while (n); + return make_string (p); } scm * string_to_symbol (scm *x) { assert (x->type == STRING); - return make_symbol (x->name); + return make_symbol (x->string); } scm * symbol_to_string (scm *x) { assert (x->type == SYMBOL); - return make_string (x->name); + return make_string (x->string); }