From 10400ed2ad134c15519b41288cab15f7a16e5c31 Mon Sep 17 00:00:00 2001 From: Timothy Sample Date: Thu, 16 May 2019 19:19:10 -0400 Subject: [PATCH] Remove PEG modules and the PEG-based parser. * gash/grammar.scm, gash/peg.scm, gash/peg/cache.scm, gash/peg/codegen.scm, gash/peg/simplify-tree.scm, gash/peg/string-peg.scm, gash/peg/using-parsers.scm: Delete files. Makefile.am: Remove them. --- Makefile.am | 7 - gash/grammar.scm | 320 ------------------------------ gash/peg.scm | 41 ---- gash/peg/cache.scm | 45 ----- gash/peg/codegen.scm | 390 ------------------------------------- gash/peg/simplify-tree.scm | 97 --------- gash/peg/string-peg.scm | 280 -------------------------- gash/peg/using-parsers.scm | 116 ----------- 8 files changed, 1296 deletions(-) delete mode 100644 gash/grammar.scm delete mode 100644 gash/peg.scm delete mode 100644 gash/peg/cache.scm delete mode 100644 gash/peg/codegen.scm delete mode 100644 gash/peg/simplify-tree.scm delete mode 100644 gash/peg/string-peg.scm delete mode 100644 gash/peg/using-parsers.scm diff --git a/Makefile.am b/Makefile.am index f5337e0..6c3fdb8 100644 --- a/Makefile.am +++ b/Makefile.am @@ -83,18 +83,11 @@ MODULES = \ gash/environment.scm \ gash/eval.scm \ gash/gash.scm \ - gash/grammar.scm \ gash/io.scm \ gash/job.scm \ gash/lexer.scm \ gash/parser.scm \ gash/pattern.scm \ - gash/peg.scm \ - gash/peg/cache.scm \ - gash/peg/codegen.scm \ - gash/peg/simplify-tree.scm \ - gash/peg/string-peg.scm \ - gash/peg/using-parsers.scm \ gash/pipe.scm \ gash/readline.scm \ gash/repl.scm \ diff --git a/gash/grammar.scm b/gash/grammar.scm deleted file mode 100644 index 0a6dcd2..0000000 --- a/gash/grammar.scm +++ /dev/null @@ -1,320 +0,0 @@ -;;; Gash --- Guile As SHell -;;; Copyright © 2016,2017,2018 R.E.W. van Beusekom -;;; Copyright © 2018 Jan (janneke) Nieuwenhuizen -;;; -;;; This file is part of Gash. -;;; -;;; Gash is free software; you can redistribute it and/or modify it -;;; under the terms of the GNU General Public License as published by -;;; the Free Software Foundation; either version 3 of the License, or (at -;;; your option) any later version. -;;; -;;; Gash is distributed in the hope that it will be useful, but -;;; WITHOUT ANY WARRANTY; without even the implied warranty of -;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;;; GNU General Public License for more details. -;;; -;;; You should have received a copy of the GNU General Public License -;;; along with Gash. If not, see . - -(define-module (gash grammar) - #:use-module (ice-9 match) - #:use-module (ice-9 pretty-print) - #:use-module (ice-9 rdelim) - - #:use-module (srfi srfi-8) - - #:use-module (gash gash) - #:use-module (gash peg) - #:use-module (gash peg codegen) - - #:export (parse - parse-string)) - -(define (parse port) - (parse-string (read-string port))) - -(define (parse-string input) - - (define io-label "") - - (define (io-label-name str len pos) - (let ((at (string-skip str char-alphabetic? pos len))) - (set! io-label (substring str pos at)) - (if (< at len) (list at '()) - #f))) - - (define (io-label-match str len pos) - (if (string-prefix? io-label (substring str pos)) - (list (+ pos (string-length io-label)) '()) - #f)) - - (define-peg-pattern io-here-label none io-label-name) - (define-peg-pattern io-here-delim none io-label-match) - (define-peg-pattern io-here-document all - (and (+ (and (not-followed-by io-here-delim) - peg-any)) - io-here-delim)) - - (define-peg-string-patterns - "script <-- ws* compound - ws < sp / eol - sp < '\\\n'* (comment / [ \t\v]) - comment < [#] (!eol .)* - eol < [\n\r\f] - - compound <-- (term (&rpar / sep#))* - - sep <- sp* (amp ws* / semi ws* / eof) / ws+ - amp <- '&' - semi < ';'!';' - eof < !. - - term <- and / or / pipeline - and <-- pipeline and-op ws* term - or <-- pipeline or-op ws* term - and-op < '&&' - or-op < '||' - - pipeline <-- '!'? sp* (command (&sep / &or-op / &and-op / &rpar / eof / pipe#))+ - - and-or <- '&&' / '||' - - exclamation <- '!' - pipe < sp* '|' !'|' ws* - - command <-- function / compound-command / simple-command - - compound-command <- (subshell / brace-group / for-clause / case-clause / - if-clause / while-clause / until-clause) (sp* io-redirect)* - - simple-command <- ((io-redirect / assignment) sp*)* - ((io-redirect / nonreserved) sp*)+ / - ((io-redirect / assignment) sp*)+ - ((io-redirect / nonreserved) sp*)* - - assignment <-- name assign word? - assign < '=' - - io-redirect <-- [0-9]* (io-here / io-file) - io-file <-- io-op ([0-9]+ / sp* word) - io-op <- '<&' / '>&' / '>>' / '>' / '<>'/ '<' / '>|' - io-here <-- io-here-op io-here-label sp* eol io-here-document - io-here-op <- '<<-' / '<<' - - reserved < ('case' / 'esac' / 'in' / 'if' / 'fi' / 'then' / 'else' / - 'elif' / 'for' / 'done' / 'do' / 'until' / 'while') &ws - nonreserved <- !reserved word - - word <-- test / substitution / - (number / variable-subst / variable / delim / literal)+ - - function <-- name sp* lpar rpar# ws* function-body - name <-- !reserved identifier - function-body <- brace-group (sp* io-redirect)* - - subshell <-- lpar compound rpar# - brace-group <-- lbrace ws* compound rbrace# - - case-clause <-- case-keyword sp* word sp* in-keyword# ws* case-item+ ws* esac-keyword# - case-item <-- pattern sp* colon? ws* compound? case-sep? - colon < ':' - case-sep < ';;' ws* - pattern <-- (word (!rpar '|'# / !'|' &rpar))+ rpar# - - for-clause <-- for-keyword sp+ identifier ws+ (in-keyword sp+ expression)? sep# do-group - expression <-- command - do-group <-- do-keyword ws+ compound done-keyword# - - if-clause <-- if-keyword sp+ compound then-keyword# ws+ compound else-part? fi-keyword# - else-part <-- else-keyword ws+ compound / elif - elif <-- elif-keyword ws+ compound then-keyword# ws+ compound else-part? - - while-clause <-- while-keyword compound do-group - - until-clause <-- until-keyword compound do-group - - test <- ('[' / '\\[') sp+ test-args sp+ ']'# - test-args <-- (sp* word)+ - - literal <- !reserved (escaped / !allowed .)+ - escaped <- escape [ \"$] - escape < [\\] - allowed <- ']' / [ \t\v\f\n`'\")};|&$] / '\\\n' - - identifier <- [_a-zA-Z][_a-zA-Z0-9]* - - dollar < '$' - number <-- [0-9]+ - - substitution <-- dollar lpar compound rpar# / bt ([\\] bt / !bt .)+ bt# - lpar < '(' - rpar < ')' - bt < [`] - - variable <-- dollar ('#' / '@' / '*' / [0-9] / name / - lbrace name (variable-literal / &rbrace) rbrace) - variable-subst <- dollar lbrace (variable-or / variable-and / variable-regex) rbrace - variable-or <-- name min variable-word variable-word* - variable-and <-- name plus variable-word variable-word* - variable-word <- variable-regex / substitution / variable-subst / variable / variable-literal !slash / variable-string / sp* - variable-regex <-- name &slash regex-sep variable-literal '/' variable-string &rbrace / - name regex-sep variable-string - slash < '/' - variable-string <- (!rbrace ((!dq !sq .) / delim))+ - variable-literal <- (!rbrace !min !plus !slash ((!dq !sq .) / delim))+ - regex-sep <-- ('/' / '%%' / '%' / '##' / '#' / '^^' / '^' /',,' / ',' / '*' / '@' / '?') - min < '-' - plus < '+' - lbrace < '{' - rbrace < '}' - - - delim <-- singlequotes / doublequotes / substitution - sq < ['] - dq < [\"] - singlequotes <- sq (!sq .)* sq# - doublequotes <- dq (substitution / variable-subst / variable / (!dq (escape '\"' / .)))* dq# - - case-keyword < 'case' - do-keyword < 'do' - done-keyword < 'done' - elif-keyword < 'elif' - else-keyword < 'else' - esac-keyword < 'esac' - fi-keyword < 'fi' - for-keyword < 'for' - if-keyword < 'if' - in-keyword < 'in' - then-keyword < 'then' - until-keyword < 'until' - while-keyword < 'while' -") - - (catch 'syntax-error - (lambda () - (let* ((match (match-pattern script input)) - (end (peg:end match)) - (tree (peg:tree match))) - (when (> %debug-level 0) - (format #t "parse tree:\n") - (pretty-print tree)) - (if (eq? (string-length input) end) - (let ((script (transform tree))) - (when (> %debug-level 0) - (format #t "script:\n") - (pretty-print script)) - script) - (if match - (begin - (format (current-error-port) "parse error: at offset: ~a\n" end) - (pretty-print tree) - #f) - (begin - (format (current-error-port) "parse error: no match\n") - #f))))) - (lambda (key . args) - (define (line-column input pos) - (let ((length (string-length input))) - (let loop ((lines (string-split input #\newline)) (ln 1) (p 0)) - (if (null? lines) (values #f #f input) - (let* ((line (car lines)) - (length (string-length line)) - (end (+ p length 1)) - (last? (null? (cdr lines)))) - (if (<= pos end) (values ln (+ (if last? 0 1) (- pos p)) - (if last? line - (string-append line "\\n" (cadr lines)))) - (loop (cdr lines) (1+ ln) end))))))) - (define (format-peg o) - (match o - (('or l ...) (string-join (map format-peg l) ", or ")) - (('and l ...) (string-join (map format-peg l) " ")) - ((? symbol?) (symbol->string o)) - ((? string?) o))) - - (receive (ln col line) (line-column input (caar args)) - (let* ((col (- col 1)) - (indent (make-string col #\space))) - (format #t "~a:~a:~a: syntax-error:\n~a\n~a^\n~aexpected: ~a\n" - "" - ln col line - indent - indent - (format-peg (cadar args))) - (exit 1)))))) - -(define (transform o) - (match o - - (('script command) (transform command)) - (('script command ...) `(begin ,@(map transform command))) - - ;; FIXME: cannot remove pipeline even if it's a single command - ;; `pipeline' is what executes commands and evaluates them - ;; (set -e) - ;; (('pipeline pipeline) (transform pipeline)) - ;; or it results in ((if ...)); which won't work either - ;; (('pipeline pipeline) (let ((x (transform pipeline))) - ;; (match x - ;; (('command command ...) (list x)) - ;; (_ x)))) - - (('compound compound) (transform compound)) - (('compound compound ...) `(begin ,@(map transform compound))) - - (('command ('word (or "." "source")) file-name) - `(source ,(transform file-name))) - (('command word ... ('io-redirect ('io-here "<<" ('io-here-document string)))) - `(pipeline (cut display ,string) (command ,@word))) - (('command word ... ('io-redirect filedes ... ('io-file ">" file-name))) - (cond ((or (null? filedes) (equal? filedes '("1"))) - `(with-output-to-file ,file-name - ,(let ((command (transform `(command ,@word)))) - (match command - (('with-input-from-file arg ...) - `(cut with-input-from-file ,@arg)) - (_ command))))) - ((equal? filedes '("2")) - `(with-error-to-file ,file-name - ,(let ((command (transform `(command ,@word)))) - (match command - (('with-input-from-file arg ...) - `(cut with-input-from-file ,@arg)) - (_ command))))) - (else (error (format #f "TODO: output to filedes=~a\n" filedes))))) - (('command word ... ('io-redirect ('io-file "<" file-name))) - `(with-input-from-file ,file-name ,(transform `(command ,@word)))) - - (('command ('word (and (? string?) string)) ...) - `(command ,@string)) - - (('command ('if-clause if-clause ...)) - (transform `(if-clause ,@if-clause))) - (('if-clause expr then) - `(if (true? ,(transform expr)) ,(transform then) 0)) - (('if-clause expr then ('else-part else)) - `(if (true? ,(transform expr)) ,(transform then) ,(transform else))) - (('if-clause expr then ..1) - `(if (true? ,(transform expr)) (begin ,@(map transform then)) 0)) - (('if-clause expr then ..1 ('else-part else)) - `(if (true? ,(transform expr)) (begin ,@(map transform then)) ,(transform else))) - (('if-clause expr then ('else-part else ..1)) - `(if (true? ,(transform expr)) ,(transform then) ,@(map transform else))) - (('if-clause expr then ..1 ('else-part else ..1)) - `(if (true? ,(transform expr)) (begin ,@(map transform then)) (begin ,@(map transform else)))) - - (('elif elif ...) (transform `(if-clause ,@elif))) - - (('function name body) - `(function ,name (lambda ( . args) ,(transform body)))) - - (('word 'delim) '(word "")) - - (('pipeline ('command ('word "shift"))) '(shift)) - - (('command ('word (and (or "[" "\\[") bracket) ('test-args test-args ...) "]")) - `(command (word ,bracket) ,@(map transform test-args) (word "]"))) - - ((h t ...) (map transform o)) - (_ o))) diff --git a/gash/peg.scm b/gash/peg.scm deleted file mode 100644 index 5d6ab04..0000000 --- a/gash/peg.scm +++ /dev/null @@ -1,41 +0,0 @@ -;;;; peg.scm --- Parsing Expression Grammar (PEG) parser generator -;;;; -;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc. -;;;; -;;;; This library is free software; you can redistribute it and/or -;;;; modify it under the terms of the GNU Lesser General Public -;;;; License as published by the Free Software Foundation; either -;;;; version 3 of the License, or (at your option) any later version. -;;;; -;;;; This library is distributed in the hope that it will be useful, -;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -;;;; Lesser General Public License for more details. -;;;; -;;;; You should have received a copy of the GNU Lesser General Public -;;;; License along with this library; if not, write to the Free Software -;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -;;;; - -(define-module (gash peg) - #:use-module (gash peg codegen) - #:use-module (gash peg string-peg) - ;; Note: the most important effect of using string-peg is not whatever - ;; functions it exports, but the fact that it adds a new handler to - ;; peg-sexp-compile. - #:use-module (gash peg simplify-tree) - #:use-module (gash peg using-parsers) - #:use-module (gash peg cache) - #:re-export (define-peg-pattern - define-peg-string-patterns - match-pattern - search-for-pattern - compile-peg-pattern - keyword-flatten - context-flatten - peg:start - peg:end - peg:string - peg:tree - peg:substring - peg-record?)) diff --git a/gash/peg/cache.scm b/gash/peg/cache.scm deleted file mode 100644 index fd192b7..0000000 --- a/gash/peg/cache.scm +++ /dev/null @@ -1,45 +0,0 @@ -;;;; cache.scm --- cache the results of parsing -;;;; -;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc. -;;;; -;;;; This library is free software; you can redistribute it and/or -;;;; modify it under the terms of the GNU Lesser General Public -;;;; License as published by the Free Software Foundation; either -;;;; version 3 of the License, or (at your option) any later version. -;;;; -;;;; This library is distributed in the hope that it will be useful, -;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -;;;; Lesser General Public License for more details. -;;;; -;;;; You should have received a copy of the GNU Lesser General Public -;;;; License along with this library; if not, write to the Free Software -;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -;;;; - -(define-module (gash peg cache) - #:export (cg-cached-parser)) - -;; The results of parsing using a nonterminal are cached. Think of it like a -;; hash with no conflict resolution. Process for deciding on the cache size -;; wasn't very scientific; just ran the benchmarks and stopped a little after -;; the point of diminishing returns on my box. -(define *cache-size* 512) - -(define (make-cache) - (make-vector *cache-size* #f)) - -;; given a syntax object which is a parser function, returns syntax -;; which, if evaluated, will become a parser function that uses a cache. -(define (cg-cached-parser parser) - #`(let ((cache (make-cache))) - (lambda (str strlen at) - (let* ((vref (vector-ref cache (modulo at *cache-size*)))) - ;; Check to see whether the value is cached. - (if (and vref (eq? (car vref) str) (= (cadr vref) at)) - (caddr vref);; If it is return it. - (let ((fres ;; Else calculate it and cache it. - (#,parser str strlen at))) - (vector-set! cache (modulo at *cache-size*) - (list str at fres)) - fres)))))) diff --git a/gash/peg/codegen.scm b/gash/peg/codegen.scm deleted file mode 100644 index 9b91474..0000000 --- a/gash/peg/codegen.scm +++ /dev/null @@ -1,390 +0,0 @@ -;;;; codegen.scm --- code generation for composable parsers -;;;; -;;;; Copyright (C) 2011 Free Software Foundation, Inc. -;;;; -;;;; This library is free software; you can redistribute it and/or -;;;; modify it under the terms of the GNU Lesser General Public -;;;; License as published by the Free Software Foundation; either -;;;; version 3 of the License, or (at your option) any later version. -;;;; -;;;; This library is distributed in the hope that it will be useful, -;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -;;;; Lesser General Public License for more details. -;;;; -;;;; You should have received a copy of the GNU Lesser General Public -;;;; License along with this library; if not, write to the Free Software -;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -;;;; - -(define-module (gash peg codegen) - #:export (compile-peg-pattern wrap-parser-for-users add-peg-compiler!) - #:use-module (ice-9 pretty-print) - #:use-module (system base pmatch)) - -(define-syntax single? - (syntax-rules () - "Return #t if X is a list of one element." - ((_ x) - (pmatch x - ((_) #t) - (else #f))))) - -(define-syntax single-filter - (syntax-rules () - "If EXP is a list of one element, return the element. Otherwise -return EXP." - ((_ exp) - (pmatch exp - ((,elt) elt) - (,elts elts))))) - -(define-syntax push-not-null! - (syntax-rules () - "If OBJ is non-null, push it onto LST, otherwise do nothing." - ((_ lst obj) - (if (not (null? obj)) - (push! lst obj))))) - -(define-syntax push! - (syntax-rules () - "Push an object onto a list." - ((_ lst obj) - (set! lst (cons obj lst))))) - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;; CODE GENERATORS -;; These functions generate scheme code for parsing PEGs. -;; Conventions: -;; accum: (all name body none) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Code we generate will have a certain return structure depending on how we're -;; accumulating (the ACCUM variable). -(define (cg-generic-ret accum name body-uneval at) - ;; name, body-uneval and at are syntax - #`(let ((body #,body-uneval)) - #,(cond - ((and (eq? accum 'all) name) - #`(list #,at - (cond - ((not (list? body)) (list '#,name body)) - ((null? body) '#,name) - ((symbol? (car body)) (list '#,name body)) - (else (cons '#,name body))))) - ((eq? accum 'name) - #`(list #,at '#,name)) - ((eq? accum 'body) - #`(list #,at - (cond - ((single? body) (car body)) - (else body)))) - ((eq? accum 'none) - #`(list #,at '())) - (else - (begin - (pretty-print `(cg-generic-ret-error ,accum ,name ,body-uneval ,at)) - (pretty-print "Defaulting to accum of none.\n") - #`(list #,at '())))))) - -;; The short name makes the formatting below much easier to read. -(define cggr cg-generic-ret) - -;; Generates code that matches a particular string. -;; E.g.: (cg-string syntax "abc" 'body) -(define (cg-string pat accum) - (let ((plen (string-length pat))) - #`(lambda (str len pos) - (let ((end (+ pos #,plen))) - (and (<= end len) - (string= str #,pat pos end) - #,(case accum - ((all) #`(list end (list 'cg-string #,pat))) - ((name) #`(list end 'cg-string)) - ((body) #`(list end #,pat)) - ((none) #`(list end '())) - (else (error "bad accum" accum)))))))) - -;; Generates code for matching any character. -;; E.g.: (cg-peg-any syntax 'body) -(define (cg-peg-any accum) - #`(lambda (str len pos) - (and (< pos len) - #,(case accum - ((all) #`(list (1+ pos) - (list 'cg-peg-any (substring str pos (1+ pos))))) - ((name) #`(list (1+ pos) 'cg-peg-any)) - ((body) #`(list (1+ pos) (substring str pos (1+ pos)))) - ((none) #`(list (1+ pos) '())) - (else (error "bad accum" accum)))))) - -;; Generates code for matching a range of characters between start and end. -;; E.g.: (cg-range syntax #\a #\z 'body) -(define (cg-range pat accum) - (syntax-case pat () - ((start end) - (if (not (and (char? (syntax->datum #'start)) - (char? (syntax->datum #'end)))) - (error "range PEG should have characters after it; instead got" - #'start #'end)) - #`(lambda (str len pos) - (and (< pos len) - (let ((c (string-ref str pos))) - (and (char>=? c start) - (char<=? c end) - #,(case accum - ((all) #`(list (1+ pos) (list 'cg-range (string c)))) - ((name) #`(list (1+ pos) 'cg-range)) - ((body) #`(list (1+ pos) (string c))) - ((none) #`(list (1+ pos) '())) - (else (error "bad accum" accum)))))))))) - -;; Generate code to match a pattern and do nothing with the result -(define (cg-ignore pat accum) - (syntax-case pat () - ((inner) - (compile-peg-pattern #'inner 'none)))) - -(define (cg-capture pat accum) - (syntax-case pat () - ((inner) - (compile-peg-pattern #'inner 'body)))) - -;; Filters the accum argument to compile-peg-pattern for buildings like string -;; literals (since we don't want to tag them with their name if we're doing an -;; "all" accum). -(define (builtin-accum-filter accum) - (cond - ((eq? accum 'all) 'body) - ((eq? accum 'name) 'name) - ((eq? accum 'body) 'body) - ((eq? accum 'none) 'none))) -(define baf builtin-accum-filter) - -;; Top-level function builder for AND. Reduces to a call to CG-AND-INT. -(define (cg-and clauses accum) - #`(lambda (str len pos) - (let ((body '())) - #,(cg-and-int clauses (baf accum) #'str #'len #'pos #'body)))) - -;; Internal function builder for AND (calls itself). -(define (cg-and-int clauses accum str strlen at body) - (syntax-case clauses () - (() - (cggr accum 'cg-and #`(reverse #,body) at)) - ((first rest ...) - #`(let ((res (#,(compile-peg-pattern #'first accum) #,str #,strlen #,at))) - (and res - ;; update AT and BODY then recurse - (let ((newat (car res)) - (newbody (cadr res))) - (set! #,at newat) - (push-not-null! #,body (single-filter newbody)) - #,(cg-and-int #'(rest ...) accum str strlen at body))))))) - -;; Top-level function builder for OR. Reduces to a call to CG-OR-INT. -(define (cg-or clauses accum) - #`(lambda (str len pos) - #,(cg-or-int clauses (baf accum) #'str #'len #'pos))) - -;; Internal function builder for OR (calls itself). -(define (cg-or-int clauses accum str strlen at) - (syntax-case clauses () - (() - #f) - ((first rest ...) - #`(or (#,(compile-peg-pattern #'first accum) #,str #,strlen #,at) - #,(cg-or-int #'(rest ...) accum str strlen at))))) - -(define (cg-* args accum) - (syntax-case args () - ((pat) - #`(lambda (str strlen at) - (let ((body '())) - (let lp ((end at) (count 0)) - (let* ((match (#,(compile-peg-pattern #'pat (baf accum)) - str strlen end)) - (new-end (if match (car match) end)) - (count (if (> new-end end) (1+ count) count))) - (if (> new-end end) - (push-not-null! body (single-filter (cadr match)))) - (if (and (> new-end end) - #,#t) - (lp new-end count) - (let ((success #,#t)) - #,#`(and success - #,(cggr (baf accum) 'cg-body - #'(reverse body) #'new-end))))))))))) - -(define (cg-+ args accum) - (syntax-case args () - ((pat) - #`(lambda (str strlen at) - (let ((body '())) - (let lp ((end at) (count 0)) - (let* ((match (#,(compile-peg-pattern #'pat (baf accum)) - str strlen end)) - (new-end (if match (car match) end)) - (count (if (> new-end end) (1+ count) count))) - (if (> new-end end) - (push-not-null! body (single-filter (cadr match)))) - (if (and (> new-end end) - #,#t) - (lp new-end count) - (let ((success #,#'(>= count 1))) - #,#`(and success - #,(cggr (baf accum) 'cg-body - #'(reverse body) #'new-end))))))))))) - -(define (cg-? args accum) - (syntax-case args () - ((pat) - #`(lambda (str strlen at) - (let ((body '())) - (let lp ((end at) (count 0)) - (let* ((match (#,(compile-peg-pattern #'pat (baf accum)) - str strlen end)) - (new-end (if match (car match) end)) - (count (if (> new-end end) (1+ count) count))) - (if (> new-end end) - (push-not-null! body (single-filter (cadr match)))) - (if (and (> new-end end) - #,#'(< count 1)) - (lp new-end count) - (let ((success #,#t)) - #,#`(and success - #,(cggr (baf accum) 'cg-body - #'(reverse body) #'new-end))))))))))) - -(define (cg-followed-by args accum) - (syntax-case args () - ((pat) - #`(lambda (str strlen at) - (let ((body '())) - (let lp ((end at) (count 0)) - (let* ((match (#,(compile-peg-pattern #'pat (baf accum)) - str strlen end)) - (new-end (if match (car match) end)) - (count (if (> new-end end) (1+ count) count))) - (if (> new-end end) - (push-not-null! body (single-filter (cadr match)))) - (if (and (> new-end end) - #,#'(< count 1)) - (lp new-end count) - (let ((success #,#'(= count 1))) - #,#`(and success - #,(cggr (baf accum) 'cg-body #''() #'at))))))))))) - -(define (cg-not-followed-by args accum) - (syntax-case args () - ((pat) - #`(lambda (str strlen at) - (let ((body '())) - (let lp ((end at) (count 0)) - (let* ((match (#,(compile-peg-pattern #'pat (baf accum)) - str strlen end)) - (new-end (if match (car match) end)) - (count (if (> new-end end) (1+ count) count))) - (if (> new-end end) - (push-not-null! body (single-filter (cadr match)))) - (if (and (> new-end end) - #,#'(< count 1)) - (lp new-end count) - (let ((success #,#'(= count 1))) - #,#`(if success - #f - #,(cggr (baf accum) 'cg-body #''() #'at))))))))))) - - -(define (cg-expect-int clauses accum str strlen at) - (syntax-case clauses () - ((pat) - #`(or (#,(compile-peg-pattern #'pat accum) #,str #,strlen #,at) - (throw 'syntax-error (list #,at (syntax->datum #'pat))))))) ;;TODO throw partial match - -(define (cg-expect clauses accum) - #`(lambda (str len pos) - #,(cg-expect-int clauses ((@@ (ice-9 peg codegen) baf) accum) #'str #'len #'pos))) - -;; Association list of functions to handle different expressions as PEGs -(define peg-compiler-alist '()) - -(define (add-peg-compiler! symbol function) - (set! peg-compiler-alist - (assq-set! peg-compiler-alist symbol function))) - -(add-peg-compiler! 'range cg-range) -(add-peg-compiler! 'ignore cg-ignore) -(add-peg-compiler! 'capture cg-capture) -(add-peg-compiler! 'and cg-and) -(add-peg-compiler! 'or cg-or) -(add-peg-compiler! '* cg-*) -(add-peg-compiler! '+ cg-+) -(add-peg-compiler! '? cg-?) -(add-peg-compiler! 'followed-by cg-followed-by) -(add-peg-compiler! 'not-followed-by cg-not-followed-by) -(add-peg-compiler! 'expect cg-expect) - -;; Takes an arbitrary expressions and accumulation variable, then parses it. -;; E.g.: (compile-peg-pattern syntax '(and "abc" (or "-" (range #\a #\z))) 'all) -(define (compile-peg-pattern pat accum) - (syntax-case pat (peg-any) - (peg-any - (cg-peg-any (baf accum))) - (sym (identifier? #'sym) ;; nonterminal - #'sym) - (str (string? (syntax->datum #'str)) ;; literal string - (cg-string (syntax->datum #'str) (baf accum))) - ((name . args) (let* ((nm (syntax->datum #'name)) - (entry (assq-ref peg-compiler-alist nm))) - (if entry - (entry #'args accum) - (error "Bad peg form" nm #'args - "Not one of" (map car peg-compiler-alist))))))) - -;; Packages the results of a parser - -(define indent 0) - -(define (trace? symbol) - (and #f (not (memq symbol '())))) - -(define (wrap-parser-for-users for-syntax parser accumsym s-syn) - #`(lambda (str strlen at) - (when (trace? '#,s-syn) - (format (current-error-port) "~a~a\n" - (make-string indent #\space) - '#,s-syn)) - (set! indent (+ indent 4)) - (let ((res (#,parser str strlen at))) - (set! indent (- indent 4)) - ;; Try to match the nonterminal. - (let ((pos (or (and res (car res)) 0))) - (when (and (trace? '#,s-syn) (< at pos)) - (format (current-error-port) "~a~a := ~s\tnext: ~s\n" - (make-string indent #\space) - '#,s-syn - (substring str at pos) - (substring str pos (min strlen (+ pos 10)))))) - (if res - ;; If we matched, do some post-processing to figure out - ;; what data to propagate upward. - (let ((at (car res)) - (body (cadr res))) - #,(cond - ((eq? accumsym 'name) - #`(list at '#,s-syn)) - ((eq? accumsym 'all) - #`(list (car res) - (cond - ((not (list? body)) - (list '#,s-syn body)) - ((null? body) '#,s-syn) - ((symbol? (car body)) - (list '#,s-syn body)) - (else (cons '#,s-syn body))))) - ((eq? accumsym 'none) #`(list (car res) '())) - (else #`(begin res)))) - ;; If we didn't match, just return false. - #f)))) diff --git a/gash/peg/simplify-tree.scm b/gash/peg/simplify-tree.scm deleted file mode 100644 index 264e29e..0000000 --- a/gash/peg/simplify-tree.scm +++ /dev/null @@ -1,97 +0,0 @@ -;;;; simplify-tree.scm --- utility functions for the PEG parser -;;;; -;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc. -;;;; -;;;; This library is free software; you can redistribute it and/or -;;;; modify it under the terms of the GNU Lesser General Public -;;;; License as published by the Free Software Foundation; either -;;;; version 3 of the License, or (at your option) any later version. -;;;; -;;;; This library is distributed in the hope that it will be useful, -;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -;;;; Lesser General Public License for more details. -;;;; -;;;; You should have received a copy of the GNU Lesser General Public -;;;; License along with this library; if not, write to the Free Software -;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -;;;; - -(define-module (gash peg simplify-tree) - #:export (keyword-flatten context-flatten string-collapse) - #:use-module (system base pmatch)) - -(define-syntax single? - (syntax-rules () - "Return #t if X is a list of one element." - ((_ x) - (pmatch x - ((_) #t) - (else #f))))) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;; POST-PROCESSING FUNCTIONS (TO CANONICALIZE MATCH TREES) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Is everything in LST true? -(define (andlst lst) - (or (null? lst) - (and (car lst) (andlst (cdr lst))))) - -;; Is LST a list of strings? -(define (string-list? lst) - (and (list? lst) (not (null? lst)) - (andlst (map string? lst)))) - -;; Groups all strings that are next to each other in LST. Used in -;; STRING-COLLAPSE. -(define (string-group lst) - (if (not (list? lst)) - lst - (if (null? lst) - '() - (let ((next (string-group (cdr lst)))) - (if (not (string? (car lst))) - (cons (car lst) next) - (if (and (not (null? next)) - (list? (car next)) - (string? (caar next))) - (cons (cons (car lst) (car next)) (cdr next)) - (cons (list (car lst)) next))))))) - - -;; Collapses all the string in LST. -;; ("a" "b" (c d) "e" "f") -> ("ab" (c d) "ef") -(define (string-collapse lst) - (if (list? lst) - (let ((res (map (lambda (x) (if (string-list? x) - (apply string-append x) - x)) - (string-group (map string-collapse lst))))) - (if (single? res) (car res) res)) - lst)) - -;; If LST is an atom, return (list LST), else return LST. -(define (mklst lst) - (if (not (list? lst)) (list lst) lst)) - -;; Takes a list and "flattens" it, using the predicate TST to know when to stop -;; instead of terminating on atoms (see tutorial). -(define (context-flatten tst lst) - (if (or (not (list? lst)) (null? lst)) - lst - (if (tst lst) - (list lst) - (apply append - (map (lambda (x) (mklst (context-flatten tst x))) - lst))))) - -;; Takes a list and "flattens" it, using the list of keywords KEYWORD-LST to -;; know when to stop at (see tutorial). -(define (keyword-flatten keyword-lst lst) - (context-flatten - (lambda (x) - (if (or (not (list? x)) (null? x)) - #t - (member (car x) keyword-lst))) - lst)) diff --git a/gash/peg/string-peg.scm b/gash/peg/string-peg.scm deleted file mode 100644 index 8797bec..0000000 --- a/gash/peg/string-peg.scm +++ /dev/null @@ -1,280 +0,0 @@ -;;;; string-peg.scm --- representing PEG grammars as strings -;;;; -;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc. -;;;; -;;;; This library is free software; you can redistribute it and/or -;;;; modify it under the terms of the GNU Lesser General Public -;;;; License as published by the Free Software Foundation; either -;;;; version 3 of the License, or (at your option) any later version. -;;;; -;;;; This library is distributed in the hope that it will be useful, -;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -;;;; Lesser General Public License for more details. -;;;; -;;;; You should have received a copy of the GNU Lesser General Public -;;;; License along with this library; if not, write to the Free Software -;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -;;;; - -(define-module (gash peg string-peg) - #:export (peg-as-peg - define-peg-string-patterns - peg-grammar) - #:use-module (gash peg using-parsers) - #:use-module (gash peg codegen) - #:use-module (gash peg simplify-tree)) - -;; Gets the left-hand depth of a list. -(define (depth lst) - (if (or (not (list? lst)) (null? lst)) - 0 - (+ 1 (depth (car lst))))) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;; Parse string PEGs using sexp PEGs. -;; See the variable PEG-AS-PEG for an easier-to-read syntax. -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Grammar for PEGs in PEG grammar. -(define peg-as-peg -"grammar <-- (nonterminal ('<--' / '<-' / '<') sp pattern)+ -pattern <-- alternative (SLASH sp alternative)* -alternative <-- ([!&]? sp suffix)+ -suffix <-- primary ([*+?] sp)* -primary <-- secondary ([#] sp)? -secondary <-- '(' sp pattern ')' sp / '.' sp / literal / charclass / nonterminal !'<' -literal <-- ['] (!['] .)* ['] sp -charclass <-- LB (!']' (CCrange / CCsingle))* RB sp -CCrange <-- . '-' . -CCsingle <-- . -nonterminal <-- [a-zA-Z0-9-]+ sp -sp < [ \t\n]* -SLASH < '/' -LB < '[' -RB < ']' -") - -(define-syntax define-sexp-parser - (lambda (x) - (syntax-case x () - ((_ sym accum pat) - (let* ((matchf (compile-peg-pattern #'pat (syntax->datum #'accum))) - (accumsym (syntax->datum #'accum)) - (syn (wrap-parser-for-users x matchf accumsym #'sym))) - #`(define sym #,syn)))))) - -(define-sexp-parser peg-grammar all - (+ (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern))) -(define-sexp-parser peg-pattern all - (and peg-alternative - (* (and (ignore "/") peg-sp peg-alternative)))) -(define-sexp-parser peg-alternative all - (+ (and (? (or "!" "&")) peg-sp peg-suffix))) -(define-sexp-parser peg-suffix all - (and peg-primary (* (and (or "*" "+" "?") peg-sp)))) -(define-sexp-parser peg-primary all - (and peg-secondary (? (and "#" peg-sp)))) -(define-sexp-parser peg-secondary all - (or (and "(" peg-sp peg-pattern ")" peg-sp) - (and "." peg-sp) - peg-literal - peg-charclass - (and peg-nonterminal (not-followed-by "<")))) -(define-sexp-parser peg-literal all - (and "'" (* (and (not-followed-by "'") peg-any)) "'" peg-sp)) -(define-sexp-parser peg-charclass all - (and (ignore "[") - (* (and (not-followed-by "]") - (or charclass-range charclass-single))) - (ignore "]") - peg-sp)) -(define-sexp-parser charclass-range all (and peg-any "-" peg-any)) -(define-sexp-parser charclass-single all peg-any) -(define-sexp-parser peg-nonterminal all - (and (+ (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-")) peg-sp)) -(define-sexp-parser peg-sp none - (* (or " " "\t" "\n"))) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;; PARSE STRING PEGS -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Takes a string representing a PEG grammar and returns syntax that -;; will define all of the nonterminals in the grammar with equivalent -;; PEG s-expressions. -(define (peg-parser str for-syntax) - (let ((parsed (match-pattern peg-grammar str))) - (if (not parsed) - (begin - ;; (display "Invalid PEG grammar!\n") - #f) - (let ((lst (peg:tree parsed))) - (cond - ((or (not (list? lst)) (null? lst)) - lst) - ((eq? (car lst) 'peg-grammar) - #`(begin - #,@(map (lambda (x) (peg-nonterm->defn x for-syntax)) - (context-flatten (lambda (lst) (<= (depth lst) 2)) - (cdr lst)))))))))) - -;; Macro wrapper for PEG-PARSER. Parses PEG grammars expressed as strings and -;; defines all the appropriate nonterminals. -(define-syntax define-peg-string-patterns - (lambda (x) - (syntax-case x () - ((_ str) - (peg-parser (syntax->datum #'str) x))))) - -;; lst has format (nonterm grabber pattern), where -;; nonterm is a symbol (the name of the nonterminal), -;; grabber is a string (either "<", "<-" or "<--"), and -;; pattern is the parse of a PEG pattern expressed as as string. -(define (peg-nonterm->defn lst for-syntax) - (let* ((nonterm (car lst)) - (grabber (cadr lst)) - (pattern (caddr lst)) - (nonterm-name (datum->syntax for-syntax - (string->symbol (cadr nonterm))))) - #`(define-peg-pattern #,nonterm-name - #,(cond - ((string=? grabber "<--") (datum->syntax for-syntax 'all)) - ((string=? grabber "<-") (datum->syntax for-syntax 'body)) - (else (datum->syntax for-syntax 'none))) - #,(compressor (peg-pattern->defn pattern for-syntax) for-syntax)))) - -;; lst has format ('peg-pattern ...). -;; After the context-flatten, (cdr lst) has format -;; (('peg-alternative ...) ...), where the outer list is a collection -;; of elements from a '/' alternative. -(define (peg-pattern->defn lst for-syntax) - #`(or #,@(map (lambda (x) (peg-alternative->defn x for-syntax)) - (context-flatten (lambda (x) (eq? (car x) 'peg-alternative)) - (cdr lst))))) - -;; lst has format ('peg-alternative ...). -;; After the context-flatten, (cdr lst) has the format -;; (item ...), where each item has format either ("!" ...), ("&" ...), -;; or ('peg-suffix ...). -(define (peg-alternative->defn lst for-syntax) - #`(and #,@(map (lambda (x) (peg-body->defn x for-syntax)) - (context-flatten (lambda (x) (or (string? (car x)) - (eq? (car x) 'peg-suffix))) - (cdr lst))))) - -;; lst has the format either -;; ("!" ('peg-suffix ...)), ("&" ('peg-suffix ...)), or -;; ('peg-suffix ...). -(define (peg-body->defn lst for-syntax) - (cond - ((equal? (car lst) "&") - #`(followed-by #,(peg-suffix->defn (cadr lst) for-syntax))) - ((equal? (car lst) "!") - #`(not-followed-by #,(peg-suffix->defn (cadr lst) for-syntax))) - ((eq? (car lst) 'peg-suffix) - (peg-suffix->defn lst for-syntax)) - (else `(peg-parse-body-fail ,lst)))) - -;; lst has format ('peg-suffix (? (/ "*" "?" "+"))) -(define (peg-suffix->defn lst for-syntax) - (let ((inner-defn (peg-primary->defn (cadr lst) for-syntax))) - (cond - ((null? (cddr lst)) - inner-defn) - ((equal? (caddr lst) "*") - #`(* #,inner-defn)) - ((equal? (caddr lst) "?") - #`(? #,inner-defn)) - ((equal? (caddr lst) "+") - #`(+ #,inner-defn))))) - -;; Parse a primary. -(define (peg-primary->defn lst for-syntax) - (let ((inner-defn (peg-secondary->defn (cadr lst) for-syntax))) - (if (and (pair? (cddr lst)) (equal? (caddr lst) "#")) #`(expect #,inner-defn) - inner-defn))) - -(define (peg-secondary->defn lst for-syntax) - (let ((el (cadr lst))) - (cond - ((list? el) - (cond - ((eq? (car el) 'peg-literal) - (peg-literal->defn el for-syntax)) - ((eq? (car el) 'peg-charclass) - (peg-charclass->defn el for-syntax)) - ((eq? (car el) 'peg-nonterminal) - (datum->syntax for-syntax (string->symbol (cadr el)))))) - ((string? el) - (cond - ((equal? el "(") - (peg-pattern->defn (caddr lst) for-syntax)) - ((equal? el ".") - (datum->syntax for-syntax 'peg-any)) - (else (datum->syntax for-syntax - `(peg-parse-any unknown-string ,lst))))) - (else (datum->syntax for-syntax - `(peg-parse-any unknown-el ,lst)))))) - -;; Trims characters off the front and end of STR. -;; (trim-1chars "'ab'") -> "ab" -(define (trim-1chars str) (substring str 1 (- (string-length str) 1))) - -;; Parses a literal. -(define (peg-literal->defn lst for-syntax) - (datum->syntax for-syntax (trim-1chars (cadr lst)))) - -;; Parses a charclass. -(define (peg-charclass->defn lst for-syntax) - #`(or - #,@(map - (lambda (cc) - (cond - ((eq? (car cc) 'charclass-range) - #`(range #,(datum->syntax - for-syntax - (string-ref (cadr cc) 0)) - #,(datum->syntax - for-syntax - (string-ref (cadr cc) 2)))) - ((eq? (car cc) 'charclass-single) - (datum->syntax for-syntax (cadr cc))))) - (context-flatten - (lambda (x) (or (eq? (car x) 'charclass-range) - (eq? (car x) 'charclass-single))) - (cdr lst))))) - -;; Compresses a list to save the optimizer work. -;; e.g. (or (and a)) -> a -(define (compressor-core lst) - (if (or (not (list? lst)) (null? lst)) - lst - (cond - ((and (or (eq? (car lst) 'or) (eq? (car lst) 'and)) - (null? (cddr lst))) - (compressor-core (cadr lst))) - ((and (eq? (car lst) 'body) - (eq? (cadr lst) 'lit) - (eq? (cadddr lst) 1)) - (compressor-core (caddr lst))) - (else (map compressor-core lst))))) - -(define (compressor syn for-syntax) - (datum->syntax for-syntax - (compressor-core (syntax->datum syn)))) - -;; Builds a lambda-expressions for the pattern STR using accum. -(define (peg-string-compile args accum) - (syntax-case args () - ((str-stx) (string? (syntax->datum #'str-stx)) - (let ((string (syntax->datum #'str-stx))) - (compile-peg-pattern - (compressor - (peg-pattern->defn - (peg:tree (match-pattern peg-pattern string)) #'str-stx) - #'str-stx) - (if (eq? accum 'all) 'body accum)))) - (else (error "Bad embedded PEG string" args)))) - -(add-peg-compiler! 'peg peg-string-compile) diff --git a/gash/peg/using-parsers.scm b/gash/peg/using-parsers.scm deleted file mode 100644 index fb8d736..0000000 --- a/gash/peg/using-parsers.scm +++ /dev/null @@ -1,116 +0,0 @@ -;;;; using-parsers.scm --- utilities to make using parsers easier -;;;; -;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc. -;;;; -;;;; This library is free software; you can redistribute it and/or -;;;; modify it under the terms of the GNU Lesser General Public -;;;; License as published by the Free Software Foundation; either -;;;; version 3 of the License, or (at your option) any later version. -;;;; -;;;; This library is distributed in the hope that it will be useful, -;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -;;;; Lesser General Public License for more details. -;;;; -;;;; You should have received a copy of the GNU Lesser General Public -;;;; License along with this library; if not, write to the Free Software -;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -;;;; - -(define-module (gash peg using-parsers) - #:use-module (gash peg simplify-tree) - #:use-module (gash peg codegen) - #:use-module (gash peg cache) - #:export (match-pattern define-peg-pattern search-for-pattern - prec make-prec peg:start peg:end peg:string - peg:tree peg:substring peg-record?)) - -;;; -;;; Helper Macros -;;; - -(define-syntax until - (syntax-rules () - "Evaluate TEST. If it is true, return its value. Otherwise, -execute the STMTs and try again." - ((_ test stmt stmt* ...) - (let lp () - (or test - (begin stmt stmt* ... (lp))))))) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;; FOR DEFINING AND USING NONTERMINALS -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Parses STRING using NONTERM -(define (match-pattern nonterm string) - ;; We copy the string before using it because it might have been modified - ;; in-place since the last time it was parsed, which would invalidate the - ;; cache. Guile uses copy-on-write for strings, so this is fast. - (let ((res (nonterm (string-copy string) (string-length string) 0))) - (if (not res) - #f - (make-prec 0 (car res) string (string-collapse (cadr res)))))) - -;; Defines a new nonterminal symbol accumulating with ACCUM. -(define-syntax define-peg-pattern - (lambda (x) - (syntax-case x () - ((_ sym accum pat) - (let ((matchf (compile-peg-pattern #'pat (syntax->datum #'accum))) - (accumsym (syntax->datum #'accum))) - ;; CODE is the code to parse the string if the result isn't cached. - (let ((syn (wrap-parser-for-users x matchf accumsym #'sym))) - #`(define sym #,(cg-cached-parser syn)))))))) - -(define (peg-like->peg pat) - (syntax-case pat () - (str (string? (syntax->datum #'str)) #'(peg str)) - (else pat))) - -;; Searches through STRING for something that parses to PEG-MATCHER. Think -;; regexp search. -(define-syntax search-for-pattern - (lambda (x) - (syntax-case x () - ((_ pattern string-uncopied) - (let ((pmsym (syntax->datum #'pattern))) - (let ((matcher (compile-peg-pattern (peg-like->peg #'pattern) 'body))) - ;; We copy the string before using it because it might have been - ;; modified in-place since the last time it was parsed, which would - ;; invalidate the cache. Guile uses copy-on-write for strings, so - ;; this is fast. - #`(let ((string (string-copy string-uncopied)) - (strlen (string-length string-uncopied)) - (at 0)) - (let ((ret (until (or (>= at strlen) - (#,matcher string strlen at)) - (set! at (+ at 1))))) - (if (eq? ret #t) ;; (>= at strlen) succeeded - #f - (let ((end (car ret)) - (match (cadr ret))) - (make-prec - at end string - (string-collapse match)))))))))))) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;; PMATCH STRUCTURE MUNGING -;; Pretty self-explanatory. -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define prec - (make-record-type "peg" '(start end string tree))) -(define make-prec - (record-constructor prec '(start end string tree))) -(define (peg:start pm) - (if pm ((record-accessor prec 'start) pm) #f)) -(define (peg:end pm) - (if pm ((record-accessor prec 'end) pm) #f)) -(define (peg:string pm) - (if pm ((record-accessor prec 'string) pm) #f)) -(define (peg:tree pm) - (if pm ((record-accessor prec 'tree) pm) #f)) -(define (peg:substring pm) - (if pm (substring (peg:string pm) (peg:start pm) (peg:end pm)) #f)) -(define peg-record? (record-predicate prec))