Remove PEG modules and the PEG-based parser.
* gash/grammar.scm, gash/peg.scm, gash/peg/cache.scm, gash/peg/codegen.scm, gash/peg/simplify-tree.scm, gash/peg/string-peg.scm, gash/peg/using-parsers.scm: Delete files. Makefile.am: Remove them.
This commit is contained in:
parent
0b73a0e70f
commit
10400ed2ad
|
@ -83,18 +83,11 @@ MODULES = \
|
|||
gash/environment.scm \
|
||||
gash/eval.scm \
|
||||
gash/gash.scm \
|
||||
gash/grammar.scm \
|
||||
gash/io.scm \
|
||||
gash/job.scm \
|
||||
gash/lexer.scm \
|
||||
gash/parser.scm \
|
||||
gash/pattern.scm \
|
||||
gash/peg.scm \
|
||||
gash/peg/cache.scm \
|
||||
gash/peg/codegen.scm \
|
||||
gash/peg/simplify-tree.scm \
|
||||
gash/peg/string-peg.scm \
|
||||
gash/peg/using-parsers.scm \
|
||||
gash/pipe.scm \
|
||||
gash/readline.scm \
|
||||
gash/repl.scm \
|
||||
|
|
320
gash/grammar.scm
320
gash/grammar.scm
|
@ -1,320 +0,0 @@
|
|||
;;; Gash --- Guile As SHell
|
||||
;;; Copyright © 2016,2017,2018 R.E.W. van Beusekom <rutger.van.beusekom@gmail.com>
|
||||
;;; Copyright © 2018 Jan (janneke) Nieuwenhuizen <janneke@gnu.org>
|
||||
;;;
|
||||
;;; This file is part of Gash.
|
||||
;;;
|
||||
;;; Gash is free software; you can redistribute it and/or modify it
|
||||
;;; under the terms of the GNU General Public License as published by
|
||||
;;; the Free Software Foundation; either version 3 of the License, or (at
|
||||
;;; your option) any later version.
|
||||
;;;
|
||||
;;; Gash is distributed in the hope that it will be useful, but
|
||||
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;;; GNU General Public License for more details.
|
||||
;;;
|
||||
;;; You should have received a copy of the GNU General Public License
|
||||
;;; along with Gash. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
(define-module (gash grammar)
|
||||
#:use-module (ice-9 match)
|
||||
#:use-module (ice-9 pretty-print)
|
||||
#:use-module (ice-9 rdelim)
|
||||
|
||||
#:use-module (srfi srfi-8)
|
||||
|
||||
#:use-module (gash gash)
|
||||
#:use-module (gash peg)
|
||||
#:use-module (gash peg codegen)
|
||||
|
||||
#:export (parse
|
||||
parse-string))
|
||||
|
||||
(define (parse port)
|
||||
(parse-string (read-string port)))
|
||||
|
||||
(define (parse-string input)
|
||||
|
||||
(define io-label "")
|
||||
|
||||
(define (io-label-name str len pos)
|
||||
(let ((at (string-skip str char-alphabetic? pos len)))
|
||||
(set! io-label (substring str pos at))
|
||||
(if (< at len) (list at '())
|
||||
#f)))
|
||||
|
||||
(define (io-label-match str len pos)
|
||||
(if (string-prefix? io-label (substring str pos))
|
||||
(list (+ pos (string-length io-label)) '())
|
||||
#f))
|
||||
|
||||
(define-peg-pattern io-here-label none io-label-name)
|
||||
(define-peg-pattern io-here-delim none io-label-match)
|
||||
(define-peg-pattern io-here-document all
|
||||
(and (+ (and (not-followed-by io-here-delim)
|
||||
peg-any))
|
||||
io-here-delim))
|
||||
|
||||
(define-peg-string-patterns
|
||||
"script <-- ws* compound
|
||||
ws < sp / eol
|
||||
sp < '\\\n'* (comment / [ \t\v])
|
||||
comment < [#] (!eol .)*
|
||||
eol < [\n\r\f]
|
||||
|
||||
compound <-- (term (&rpar / sep#))*
|
||||
|
||||
sep <- sp* (amp ws* / semi ws* / eof) / ws+
|
||||
amp <- '&'
|
||||
semi < ';'!';'
|
||||
eof < !.
|
||||
|
||||
term <- and / or / pipeline
|
||||
and <-- pipeline and-op ws* term
|
||||
or <-- pipeline or-op ws* term
|
||||
and-op < '&&'
|
||||
or-op < '||'
|
||||
|
||||
pipeline <-- '!'? sp* (command (&sep / &or-op / &and-op / &rpar / eof / pipe#))+
|
||||
|
||||
and-or <- '&&' / '||'
|
||||
|
||||
exclamation <- '!'
|
||||
pipe < sp* '|' !'|' ws*
|
||||
|
||||
command <-- function / compound-command / simple-command
|
||||
|
||||
compound-command <- (subshell / brace-group / for-clause / case-clause /
|
||||
if-clause / while-clause / until-clause) (sp* io-redirect)*
|
||||
|
||||
simple-command <- ((io-redirect / assignment) sp*)*
|
||||
((io-redirect / nonreserved) sp*)+ /
|
||||
((io-redirect / assignment) sp*)+
|
||||
((io-redirect / nonreserved) sp*)*
|
||||
|
||||
assignment <-- name assign word?
|
||||
assign < '='
|
||||
|
||||
io-redirect <-- [0-9]* (io-here / io-file)
|
||||
io-file <-- io-op ([0-9]+ / sp* word)
|
||||
io-op <- '<&' / '>&' / '>>' / '>' / '<>'/ '<' / '>|'
|
||||
io-here <-- io-here-op io-here-label sp* eol io-here-document
|
||||
io-here-op <- '<<-' / '<<'
|
||||
|
||||
reserved < ('case' / 'esac' / 'in' / 'if' / 'fi' / 'then' / 'else' /
|
||||
'elif' / 'for' / 'done' / 'do' / 'until' / 'while') &ws
|
||||
nonreserved <- !reserved word
|
||||
|
||||
word <-- test / substitution /
|
||||
(number / variable-subst / variable / delim / literal)+
|
||||
|
||||
function <-- name sp* lpar rpar# ws* function-body
|
||||
name <-- !reserved identifier
|
||||
function-body <- brace-group (sp* io-redirect)*
|
||||
|
||||
subshell <-- lpar compound rpar#
|
||||
brace-group <-- lbrace ws* compound rbrace#
|
||||
|
||||
case-clause <-- case-keyword sp* word sp* in-keyword# ws* case-item+ ws* esac-keyword#
|
||||
case-item <-- pattern sp* colon? ws* compound? case-sep?
|
||||
colon < ':'
|
||||
case-sep < ';;' ws*
|
||||
pattern <-- (word (!rpar '|'# / !'|' &rpar))+ rpar#
|
||||
|
||||
for-clause <-- for-keyword sp+ identifier ws+ (in-keyword sp+ expression)? sep# do-group
|
||||
expression <-- command
|
||||
do-group <-- do-keyword ws+ compound done-keyword#
|
||||
|
||||
if-clause <-- if-keyword sp+ compound then-keyword# ws+ compound else-part? fi-keyword#
|
||||
else-part <-- else-keyword ws+ compound / elif
|
||||
elif <-- elif-keyword ws+ compound then-keyword# ws+ compound else-part?
|
||||
|
||||
while-clause <-- while-keyword compound do-group
|
||||
|
||||
until-clause <-- until-keyword compound do-group
|
||||
|
||||
test <- ('[' / '\\[') sp+ test-args sp+ ']'#
|
||||
test-args <-- (sp* word)+
|
||||
|
||||
literal <- !reserved (escaped / !allowed .)+
|
||||
escaped <- escape [ \"$]
|
||||
escape < [\\]
|
||||
allowed <- ']' / [ \t\v\f\n`'\")};|&$] / '\\\n'
|
||||
|
||||
identifier <- [_a-zA-Z][_a-zA-Z0-9]*
|
||||
|
||||
dollar < '$'
|
||||
number <-- [0-9]+
|
||||
|
||||
substitution <-- dollar lpar compound rpar# / bt ([\\] bt / !bt .)+ bt#
|
||||
lpar < '('
|
||||
rpar < ')'
|
||||
bt < [`]
|
||||
|
||||
variable <-- dollar ('#' / '@' / '*' / [0-9] / name /
|
||||
lbrace name (variable-literal / &rbrace) rbrace)
|
||||
variable-subst <- dollar lbrace (variable-or / variable-and / variable-regex) rbrace
|
||||
variable-or <-- name min variable-word variable-word*
|
||||
variable-and <-- name plus variable-word variable-word*
|
||||
variable-word <- variable-regex / substitution / variable-subst / variable / variable-literal !slash / variable-string / sp*
|
||||
variable-regex <-- name &slash regex-sep variable-literal '/' variable-string &rbrace /
|
||||
name regex-sep variable-string
|
||||
slash < '/'
|
||||
variable-string <- (!rbrace ((!dq !sq .) / delim))+
|
||||
variable-literal <- (!rbrace !min !plus !slash ((!dq !sq .) / delim))+
|
||||
regex-sep <-- ('/' / '%%' / '%' / '##' / '#' / '^^' / '^' /',,' / ',' / '*' / '@' / '?')
|
||||
min < '-'
|
||||
plus < '+'
|
||||
lbrace < '{'
|
||||
rbrace < '}'
|
||||
|
||||
|
||||
delim <-- singlequotes / doublequotes / substitution
|
||||
sq < [']
|
||||
dq < [\"]
|
||||
singlequotes <- sq (!sq .)* sq#
|
||||
doublequotes <- dq (substitution / variable-subst / variable / (!dq (escape '\"' / .)))* dq#
|
||||
|
||||
case-keyword < 'case'
|
||||
do-keyword < 'do'
|
||||
done-keyword < 'done'
|
||||
elif-keyword < 'elif'
|
||||
else-keyword < 'else'
|
||||
esac-keyword < 'esac'
|
||||
fi-keyword < 'fi'
|
||||
for-keyword < 'for'
|
||||
if-keyword < 'if'
|
||||
in-keyword < 'in'
|
||||
then-keyword < 'then'
|
||||
until-keyword < 'until'
|
||||
while-keyword < 'while'
|
||||
")
|
||||
|
||||
(catch 'syntax-error
|
||||
(lambda ()
|
||||
(let* ((match (match-pattern script input))
|
||||
(end (peg:end match))
|
||||
(tree (peg:tree match)))
|
||||
(when (> %debug-level 0)
|
||||
(format #t "parse tree:\n")
|
||||
(pretty-print tree))
|
||||
(if (eq? (string-length input) end)
|
||||
(let ((script (transform tree)))
|
||||
(when (> %debug-level 0)
|
||||
(format #t "script:\n")
|
||||
(pretty-print script))
|
||||
script)
|
||||
(if match
|
||||
(begin
|
||||
(format (current-error-port) "parse error: at offset: ~a\n" end)
|
||||
(pretty-print tree)
|
||||
#f)
|
||||
(begin
|
||||
(format (current-error-port) "parse error: no match\n")
|
||||
#f)))))
|
||||
(lambda (key . args)
|
||||
(define (line-column input pos)
|
||||
(let ((length (string-length input)))
|
||||
(let loop ((lines (string-split input #\newline)) (ln 1) (p 0))
|
||||
(if (null? lines) (values #f #f input)
|
||||
(let* ((line (car lines))
|
||||
(length (string-length line))
|
||||
(end (+ p length 1))
|
||||
(last? (null? (cdr lines))))
|
||||
(if (<= pos end) (values ln (+ (if last? 0 1) (- pos p))
|
||||
(if last? line
|
||||
(string-append line "\\n" (cadr lines))))
|
||||
(loop (cdr lines) (1+ ln) end)))))))
|
||||
(define (format-peg o)
|
||||
(match o
|
||||
(('or l ...) (string-join (map format-peg l) ", or "))
|
||||
(('and l ...) (string-join (map format-peg l) " "))
|
||||
((? symbol?) (symbol->string o))
|
||||
((? string?) o)))
|
||||
|
||||
(receive (ln col line) (line-column input (caar args))
|
||||
(let* ((col (- col 1))
|
||||
(indent (make-string col #\space)))
|
||||
(format #t "~a:~a:~a: syntax-error:\n~a\n~a^\n~aexpected: ~a\n"
|
||||
""
|
||||
ln col line
|
||||
indent
|
||||
indent
|
||||
(format-peg (cadar args)))
|
||||
(exit 1))))))
|
||||
|
||||
(define (transform o)
|
||||
(match o
|
||||
|
||||
(('script command) (transform command))
|
||||
(('script command ...) `(begin ,@(map transform command)))
|
||||
|
||||
;; FIXME: cannot remove pipeline even if it's a single command
|
||||
;; `pipeline' is what executes commands and evaluates them
|
||||
;; (set -e)
|
||||
;; (('pipeline pipeline) (transform pipeline))
|
||||
;; or it results in ((if ...)); which won't work either
|
||||
;; (('pipeline pipeline) (let ((x (transform pipeline)))
|
||||
;; (match x
|
||||
;; (('command command ...) (list x))
|
||||
;; (_ x))))
|
||||
|
||||
(('compound compound) (transform compound))
|
||||
(('compound compound ...) `(begin ,@(map transform compound)))
|
||||
|
||||
(('command ('word (or "." "source")) file-name)
|
||||
`(source ,(transform file-name)))
|
||||
(('command word ... ('io-redirect ('io-here "<<" ('io-here-document string))))
|
||||
`(pipeline (cut display ,string) (command ,@word)))
|
||||
(('command word ... ('io-redirect filedes ... ('io-file ">" file-name)))
|
||||
(cond ((or (null? filedes) (equal? filedes '("1")))
|
||||
`(with-output-to-file ,file-name
|
||||
,(let ((command (transform `(command ,@word))))
|
||||
(match command
|
||||
(('with-input-from-file arg ...)
|
||||
`(cut with-input-from-file ,@arg))
|
||||
(_ command)))))
|
||||
((equal? filedes '("2"))
|
||||
`(with-error-to-file ,file-name
|
||||
,(let ((command (transform `(command ,@word))))
|
||||
(match command
|
||||
(('with-input-from-file arg ...)
|
||||
`(cut with-input-from-file ,@arg))
|
||||
(_ command)))))
|
||||
(else (error (format #f "TODO: output to filedes=~a\n" filedes)))))
|
||||
(('command word ... ('io-redirect ('io-file "<" file-name)))
|
||||
`(with-input-from-file ,file-name ,(transform `(command ,@word))))
|
||||
|
||||
(('command ('word (and (? string?) string)) ...)
|
||||
`(command ,@string))
|
||||
|
||||
(('command ('if-clause if-clause ...))
|
||||
(transform `(if-clause ,@if-clause)))
|
||||
(('if-clause expr then)
|
||||
`(if (true? ,(transform expr)) ,(transform then) 0))
|
||||
(('if-clause expr then ('else-part else))
|
||||
`(if (true? ,(transform expr)) ,(transform then) ,(transform else)))
|
||||
(('if-clause expr then ..1)
|
||||
`(if (true? ,(transform expr)) (begin ,@(map transform then)) 0))
|
||||
(('if-clause expr then ..1 ('else-part else))
|
||||
`(if (true? ,(transform expr)) (begin ,@(map transform then)) ,(transform else)))
|
||||
(('if-clause expr then ('else-part else ..1))
|
||||
`(if (true? ,(transform expr)) ,(transform then) ,@(map transform else)))
|
||||
(('if-clause expr then ..1 ('else-part else ..1))
|
||||
`(if (true? ,(transform expr)) (begin ,@(map transform then)) (begin ,@(map transform else))))
|
||||
|
||||
(('elif elif ...) (transform `(if-clause ,@elif)))
|
||||
|
||||
(('function name body)
|
||||
`(function ,name (lambda ( . args) ,(transform body))))
|
||||
|
||||
(('word 'delim) '(word ""))
|
||||
|
||||
(('pipeline ('command ('word "shift"))) '(shift))
|
||||
|
||||
(('command ('word (and (or "[" "\\[") bracket) ('test-args test-args ...) "]"))
|
||||
`(command (word ,bracket) ,@(map transform test-args) (word "]")))
|
||||
|
||||
((h t ...) (map transform o))
|
||||
(_ o)))
|
41
gash/peg.scm
41
gash/peg.scm
|
@ -1,41 +0,0 @@
|
|||
;;;; peg.scm --- Parsing Expression Grammar (PEG) parser generator
|
||||
;;;;
|
||||
;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This library is free software; you can redistribute it and/or
|
||||
;;;; modify it under the terms of the GNU Lesser General Public
|
||||
;;;; License as published by the Free Software Foundation; either
|
||||
;;;; version 3 of the License, or (at your option) any later version.
|
||||
;;;;
|
||||
;;;; This library is distributed in the hope that it will be useful,
|
||||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;;;; Lesser General Public License for more details.
|
||||
;;;;
|
||||
;;;; You should have received a copy of the GNU Lesser General Public
|
||||
;;;; License along with this library; if not, write to the Free Software
|
||||
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;;;;
|
||||
|
||||
(define-module (gash peg)
|
||||
#:use-module (gash peg codegen)
|
||||
#:use-module (gash peg string-peg)
|
||||
;; Note: the most important effect of using string-peg is not whatever
|
||||
;; functions it exports, but the fact that it adds a new handler to
|
||||
;; peg-sexp-compile.
|
||||
#:use-module (gash peg simplify-tree)
|
||||
#:use-module (gash peg using-parsers)
|
||||
#:use-module (gash peg cache)
|
||||
#:re-export (define-peg-pattern
|
||||
define-peg-string-patterns
|
||||
match-pattern
|
||||
search-for-pattern
|
||||
compile-peg-pattern
|
||||
keyword-flatten
|
||||
context-flatten
|
||||
peg:start
|
||||
peg:end
|
||||
peg:string
|
||||
peg:tree
|
||||
peg:substring
|
||||
peg-record?))
|
|
@ -1,45 +0,0 @@
|
|||
;;;; cache.scm --- cache the results of parsing
|
||||
;;;;
|
||||
;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This library is free software; you can redistribute it and/or
|
||||
;;;; modify it under the terms of the GNU Lesser General Public
|
||||
;;;; License as published by the Free Software Foundation; either
|
||||
;;;; version 3 of the License, or (at your option) any later version.
|
||||
;;;;
|
||||
;;;; This library is distributed in the hope that it will be useful,
|
||||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;;;; Lesser General Public License for more details.
|
||||
;;;;
|
||||
;;;; You should have received a copy of the GNU Lesser General Public
|
||||
;;;; License along with this library; if not, write to the Free Software
|
||||
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;;;;
|
||||
|
||||
(define-module (gash peg cache)
|
||||
#:export (cg-cached-parser))
|
||||
|
||||
;; The results of parsing using a nonterminal are cached. Think of it like a
|
||||
;; hash with no conflict resolution. Process for deciding on the cache size
|
||||
;; wasn't very scientific; just ran the benchmarks and stopped a little after
|
||||
;; the point of diminishing returns on my box.
|
||||
(define *cache-size* 512)
|
||||
|
||||
(define (make-cache)
|
||||
(make-vector *cache-size* #f))
|
||||
|
||||
;; given a syntax object which is a parser function, returns syntax
|
||||
;; which, if evaluated, will become a parser function that uses a cache.
|
||||
(define (cg-cached-parser parser)
|
||||
#`(let ((cache (make-cache)))
|
||||
(lambda (str strlen at)
|
||||
(let* ((vref (vector-ref cache (modulo at *cache-size*))))
|
||||
;; Check to see whether the value is cached.
|
||||
(if (and vref (eq? (car vref) str) (= (cadr vref) at))
|
||||
(caddr vref);; If it is return it.
|
||||
(let ((fres ;; Else calculate it and cache it.
|
||||
(#,parser str strlen at)))
|
||||
(vector-set! cache (modulo at *cache-size*)
|
||||
(list str at fres))
|
||||
fres))))))
|
|
@ -1,390 +0,0 @@
|
|||
;;;; codegen.scm --- code generation for composable parsers
|
||||
;;;;
|
||||
;;;; Copyright (C) 2011 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This library is free software; you can redistribute it and/or
|
||||
;;;; modify it under the terms of the GNU Lesser General Public
|
||||
;;;; License as published by the Free Software Foundation; either
|
||||
;;;; version 3 of the License, or (at your option) any later version.
|
||||
;;;;
|
||||
;;;; This library is distributed in the hope that it will be useful,
|
||||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;;;; Lesser General Public License for more details.
|
||||
;;;;
|
||||
;;;; You should have received a copy of the GNU Lesser General Public
|
||||
;;;; License along with this library; if not, write to the Free Software
|
||||
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;;;;
|
||||
|
||||
(define-module (gash peg codegen)
|
||||
#:export (compile-peg-pattern wrap-parser-for-users add-peg-compiler!)
|
||||
#:use-module (ice-9 pretty-print)
|
||||
#:use-module (system base pmatch))
|
||||
|
||||
(define-syntax single?
|
||||
(syntax-rules ()
|
||||
"Return #t if X is a list of one element."
|
||||
((_ x)
|
||||
(pmatch x
|
||||
((_) #t)
|
||||
(else #f)))))
|
||||
|
||||
(define-syntax single-filter
|
||||
(syntax-rules ()
|
||||
"If EXP is a list of one element, return the element. Otherwise
|
||||
return EXP."
|
||||
((_ exp)
|
||||
(pmatch exp
|
||||
((,elt) elt)
|
||||
(,elts elts)))))
|
||||
|
||||
(define-syntax push-not-null!
|
||||
(syntax-rules ()
|
||||
"If OBJ is non-null, push it onto LST, otherwise do nothing."
|
||||
((_ lst obj)
|
||||
(if (not (null? obj))
|
||||
(push! lst obj)))))
|
||||
|
||||
(define-syntax push!
|
||||
(syntax-rules ()
|
||||
"Push an object onto a list."
|
||||
((_ lst obj)
|
||||
(set! lst (cons obj lst)))))
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;; CODE GENERATORS
|
||||
;; These functions generate scheme code for parsing PEGs.
|
||||
;; Conventions:
|
||||
;; accum: (all name body none)
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Code we generate will have a certain return structure depending on how we're
|
||||
;; accumulating (the ACCUM variable).
|
||||
(define (cg-generic-ret accum name body-uneval at)
|
||||
;; name, body-uneval and at are syntax
|
||||
#`(let ((body #,body-uneval))
|
||||
#,(cond
|
||||
((and (eq? accum 'all) name)
|
||||
#`(list #,at
|
||||
(cond
|
||||
((not (list? body)) (list '#,name body))
|
||||
((null? body) '#,name)
|
||||
((symbol? (car body)) (list '#,name body))
|
||||
(else (cons '#,name body)))))
|
||||
((eq? accum 'name)
|
||||
#`(list #,at '#,name))
|
||||
((eq? accum 'body)
|
||||
#`(list #,at
|
||||
(cond
|
||||
((single? body) (car body))
|
||||
(else body))))
|
||||
((eq? accum 'none)
|
||||
#`(list #,at '()))
|
||||
(else
|
||||
(begin
|
||||
(pretty-print `(cg-generic-ret-error ,accum ,name ,body-uneval ,at))
|
||||
(pretty-print "Defaulting to accum of none.\n")
|
||||
#`(list #,at '()))))))
|
||||
|
||||
;; The short name makes the formatting below much easier to read.
|
||||
(define cggr cg-generic-ret)
|
||||
|
||||
;; Generates code that matches a particular string.
|
||||
;; E.g.: (cg-string syntax "abc" 'body)
|
||||
(define (cg-string pat accum)
|
||||
(let ((plen (string-length pat)))
|
||||
#`(lambda (str len pos)
|
||||
(let ((end (+ pos #,plen)))
|
||||
(and (<= end len)
|
||||
(string= str #,pat pos end)
|
||||
#,(case accum
|
||||
((all) #`(list end (list 'cg-string #,pat)))
|
||||
((name) #`(list end 'cg-string))
|
||||
((body) #`(list end #,pat))
|
||||
((none) #`(list end '()))
|
||||
(else (error "bad accum" accum))))))))
|
||||
|
||||
;; Generates code for matching any character.
|
||||
;; E.g.: (cg-peg-any syntax 'body)
|
||||
(define (cg-peg-any accum)
|
||||
#`(lambda (str len pos)
|
||||
(and (< pos len)
|
||||
#,(case accum
|
||||
((all) #`(list (1+ pos)
|
||||
(list 'cg-peg-any (substring str pos (1+ pos)))))
|
||||
((name) #`(list (1+ pos) 'cg-peg-any))
|
||||
((body) #`(list (1+ pos) (substring str pos (1+ pos))))
|
||||
((none) #`(list (1+ pos) '()))
|
||||
(else (error "bad accum" accum))))))
|
||||
|
||||
;; Generates code for matching a range of characters between start and end.
|
||||
;; E.g.: (cg-range syntax #\a #\z 'body)
|
||||
(define (cg-range pat accum)
|
||||
(syntax-case pat ()
|
||||
((start end)
|
||||
(if (not (and (char? (syntax->datum #'start))
|
||||
(char? (syntax->datum #'end))))
|
||||
(error "range PEG should have characters after it; instead got"
|
||||
#'start #'end))
|
||||
#`(lambda (str len pos)
|
||||
(and (< pos len)
|
||||
(let ((c (string-ref str pos)))
|
||||
(and (char>=? c start)
|
||||
(char<=? c end)
|
||||
#,(case accum
|
||||
((all) #`(list (1+ pos) (list 'cg-range (string c))))
|
||||
((name) #`(list (1+ pos) 'cg-range))
|
||||
((body) #`(list (1+ pos) (string c)))
|
||||
((none) #`(list (1+ pos) '()))
|
||||
(else (error "bad accum" accum))))))))))
|
||||
|
||||
;; Generate code to match a pattern and do nothing with the result
|
||||
(define (cg-ignore pat accum)
|
||||
(syntax-case pat ()
|
||||
((inner)
|
||||
(compile-peg-pattern #'inner 'none))))
|
||||
|
||||
(define (cg-capture pat accum)
|
||||
(syntax-case pat ()
|
||||
((inner)
|
||||
(compile-peg-pattern #'inner 'body))))
|
||||
|
||||
;; Filters the accum argument to compile-peg-pattern for buildings like string
|
||||
;; literals (since we don't want to tag them with their name if we're doing an
|
||||
;; "all" accum).
|
||||
(define (builtin-accum-filter accum)
|
||||
(cond
|
||||
((eq? accum 'all) 'body)
|
||||
((eq? accum 'name) 'name)
|
||||
((eq? accum 'body) 'body)
|
||||
((eq? accum 'none) 'none)))
|
||||
(define baf builtin-accum-filter)
|
||||
|
||||
;; Top-level function builder for AND. Reduces to a call to CG-AND-INT.
|
||||
(define (cg-and clauses accum)
|
||||
#`(lambda (str len pos)
|
||||
(let ((body '()))
|
||||
#,(cg-and-int clauses (baf accum) #'str #'len #'pos #'body))))
|
||||
|
||||
;; Internal function builder for AND (calls itself).
|
||||
(define (cg-and-int clauses accum str strlen at body)
|
||||
(syntax-case clauses ()
|
||||
(()
|
||||
(cggr accum 'cg-and #`(reverse #,body) at))
|
||||
((first rest ...)
|
||||
#`(let ((res (#,(compile-peg-pattern #'first accum) #,str #,strlen #,at)))
|
||||
(and res
|
||||
;; update AT and BODY then recurse
|
||||
(let ((newat (car res))
|
||||
(newbody (cadr res)))
|
||||
(set! #,at newat)
|
||||
(push-not-null! #,body (single-filter newbody))
|
||||
#,(cg-and-int #'(rest ...) accum str strlen at body)))))))
|
||||
|
||||
;; Top-level function builder for OR. Reduces to a call to CG-OR-INT.
|
||||
(define (cg-or clauses accum)
|
||||
#`(lambda (str len pos)
|
||||
#,(cg-or-int clauses (baf accum) #'str #'len #'pos)))
|
||||
|
||||
;; Internal function builder for OR (calls itself).
|
||||
(define (cg-or-int clauses accum str strlen at)
|
||||
(syntax-case clauses ()
|
||||
(()
|
||||
#f)
|
||||
((first rest ...)
|
||||
#`(or (#,(compile-peg-pattern #'first accum) #,str #,strlen #,at)
|
||||
#,(cg-or-int #'(rest ...) accum str strlen at)))))
|
||||
|
||||
(define (cg-* args accum)
|
||||
(syntax-case args ()
|
||||
((pat)
|
||||
#`(lambda (str strlen at)
|
||||
(let ((body '()))
|
||||
(let lp ((end at) (count 0))
|
||||
(let* ((match (#,(compile-peg-pattern #'pat (baf accum))
|
||||
str strlen end))
|
||||
(new-end (if match (car match) end))
|
||||
(count (if (> new-end end) (1+ count) count)))
|
||||
(if (> new-end end)
|
||||
(push-not-null! body (single-filter (cadr match))))
|
||||
(if (and (> new-end end)
|
||||
#,#t)
|
||||
(lp new-end count)
|
||||
(let ((success #,#t))
|
||||
#,#`(and success
|
||||
#,(cggr (baf accum) 'cg-body
|
||||
#'(reverse body) #'new-end)))))))))))
|
||||
|
||||
(define (cg-+ args accum)
|
||||
(syntax-case args ()
|
||||
((pat)
|
||||
#`(lambda (str strlen at)
|
||||
(let ((body '()))
|
||||
(let lp ((end at) (count 0))
|
||||
(let* ((match (#,(compile-peg-pattern #'pat (baf accum))
|
||||
str strlen end))
|
||||
(new-end (if match (car match) end))
|
||||
(count (if (> new-end end) (1+ count) count)))
|
||||
(if (> new-end end)
|
||||
(push-not-null! body (single-filter (cadr match))))
|
||||
(if (and (> new-end end)
|
||||
#,#t)
|
||||
(lp new-end count)
|
||||
(let ((success #,#'(>= count 1)))
|
||||
#,#`(and success
|
||||
#,(cggr (baf accum) 'cg-body
|
||||
#'(reverse body) #'new-end)))))))))))
|
||||
|
||||
(define (cg-? args accum)
|
||||
(syntax-case args ()
|
||||
((pat)
|
||||
#`(lambda (str strlen at)
|
||||
(let ((body '()))
|
||||
(let lp ((end at) (count 0))
|
||||
(let* ((match (#,(compile-peg-pattern #'pat (baf accum))
|
||||
str strlen end))
|
||||
(new-end (if match (car match) end))
|
||||
(count (if (> new-end end) (1+ count) count)))
|
||||
(if (> new-end end)
|
||||
(push-not-null! body (single-filter (cadr match))))
|
||||
(if (and (> new-end end)
|
||||
#,#'(< count 1))
|
||||
(lp new-end count)
|
||||
(let ((success #,#t))
|
||||
#,#`(and success
|
||||
#,(cggr (baf accum) 'cg-body
|
||||
#'(reverse body) #'new-end)))))))))))
|
||||
|
||||
(define (cg-followed-by args accum)
|
||||
(syntax-case args ()
|
||||
((pat)
|
||||
#`(lambda (str strlen at)
|
||||
(let ((body '()))
|
||||
(let lp ((end at) (count 0))
|
||||
(let* ((match (#,(compile-peg-pattern #'pat (baf accum))
|
||||
str strlen end))
|
||||
(new-end (if match (car match) end))
|
||||
(count (if (> new-end end) (1+ count) count)))
|
||||
(if (> new-end end)
|
||||
(push-not-null! body (single-filter (cadr match))))
|
||||
(if (and (> new-end end)
|
||||
#,#'(< count 1))
|
||||
(lp new-end count)
|
||||
(let ((success #,#'(= count 1)))
|
||||
#,#`(and success
|
||||
#,(cggr (baf accum) 'cg-body #''() #'at)))))))))))
|
||||
|
||||
(define (cg-not-followed-by args accum)
|
||||
(syntax-case args ()
|
||||
((pat)
|
||||
#`(lambda (str strlen at)
|
||||
(let ((body '()))
|
||||
(let lp ((end at) (count 0))
|
||||
(let* ((match (#,(compile-peg-pattern #'pat (baf accum))
|
||||
str strlen end))
|
||||
(new-end (if match (car match) end))
|
||||
(count (if (> new-end end) (1+ count) count)))
|
||||
(if (> new-end end)
|
||||
(push-not-null! body (single-filter (cadr match))))
|
||||
(if (and (> new-end end)
|
||||
#,#'(< count 1))
|
||||
(lp new-end count)
|
||||
(let ((success #,#'(= count 1)))
|
||||
#,#`(if success
|
||||
#f
|
||||
#,(cggr (baf accum) 'cg-body #''() #'at)))))))))))
|
||||
|
||||
|
||||
(define (cg-expect-int clauses accum str strlen at)
|
||||
(syntax-case clauses ()
|
||||
((pat)
|
||||
#`(or (#,(compile-peg-pattern #'pat accum) #,str #,strlen #,at)
|
||||
(throw 'syntax-error (list #,at (syntax->datum #'pat))))))) ;;TODO throw partial match
|
||||
|
||||
(define (cg-expect clauses accum)
|
||||
#`(lambda (str len pos)
|
||||
#,(cg-expect-int clauses ((@@ (ice-9 peg codegen) baf) accum) #'str #'len #'pos)))
|
||||
|
||||
;; Association list of functions to handle different expressions as PEGs
|
||||
(define peg-compiler-alist '())
|
||||
|
||||
(define (add-peg-compiler! symbol function)
|
||||
(set! peg-compiler-alist
|
||||
(assq-set! peg-compiler-alist symbol function)))
|
||||
|
||||
(add-peg-compiler! 'range cg-range)
|
||||
(add-peg-compiler! 'ignore cg-ignore)
|
||||
(add-peg-compiler! 'capture cg-capture)
|
||||
(add-peg-compiler! 'and cg-and)
|
||||
(add-peg-compiler! 'or cg-or)
|
||||
(add-peg-compiler! '* cg-*)
|
||||
(add-peg-compiler! '+ cg-+)
|
||||
(add-peg-compiler! '? cg-?)
|
||||
(add-peg-compiler! 'followed-by cg-followed-by)
|
||||
(add-peg-compiler! 'not-followed-by cg-not-followed-by)
|
||||
(add-peg-compiler! 'expect cg-expect)
|
||||
|
||||
;; Takes an arbitrary expressions and accumulation variable, then parses it.
|
||||
;; E.g.: (compile-peg-pattern syntax '(and "abc" (or "-" (range #\a #\z))) 'all)
|
||||
(define (compile-peg-pattern pat accum)
|
||||
(syntax-case pat (peg-any)
|
||||
(peg-any
|
||||
(cg-peg-any (baf accum)))
|
||||
(sym (identifier? #'sym) ;; nonterminal
|
||||
#'sym)
|
||||
(str (string? (syntax->datum #'str)) ;; literal string
|
||||
(cg-string (syntax->datum #'str) (baf accum)))
|
||||
((name . args) (let* ((nm (syntax->datum #'name))
|
||||
(entry (assq-ref peg-compiler-alist nm)))
|
||||
(if entry
|
||||
(entry #'args accum)
|
||||
(error "Bad peg form" nm #'args
|
||||
"Not one of" (map car peg-compiler-alist)))))))
|
||||
|
||||
;; Packages the results of a parser
|
||||
|
||||
(define indent 0)
|
||||
|
||||
(define (trace? symbol)
|
||||
(and #f (not (memq symbol '()))))
|
||||
|
||||
(define (wrap-parser-for-users for-syntax parser accumsym s-syn)
|
||||
#`(lambda (str strlen at)
|
||||
(when (trace? '#,s-syn)
|
||||
(format (current-error-port) "~a~a\n"
|
||||
(make-string indent #\space)
|
||||
'#,s-syn))
|
||||
(set! indent (+ indent 4))
|
||||
(let ((res (#,parser str strlen at)))
|
||||
(set! indent (- indent 4))
|
||||
;; Try to match the nonterminal.
|
||||
(let ((pos (or (and res (car res)) 0)))
|
||||
(when (and (trace? '#,s-syn) (< at pos))
|
||||
(format (current-error-port) "~a~a := ~s\tnext: ~s\n"
|
||||
(make-string indent #\space)
|
||||
'#,s-syn
|
||||
(substring str at pos)
|
||||
(substring str pos (min strlen (+ pos 10))))))
|
||||
(if res
|
||||
;; If we matched, do some post-processing to figure out
|
||||
;; what data to propagate upward.
|
||||
(let ((at (car res))
|
||||
(body (cadr res)))
|
||||
#,(cond
|
||||
((eq? accumsym 'name)
|
||||
#`(list at '#,s-syn))
|
||||
((eq? accumsym 'all)
|
||||
#`(list (car res)
|
||||
(cond
|
||||
((not (list? body))
|
||||
(list '#,s-syn body))
|
||||
((null? body) '#,s-syn)
|
||||
((symbol? (car body))
|
||||
(list '#,s-syn body))
|
||||
(else (cons '#,s-syn body)))))
|
||||
((eq? accumsym 'none) #`(list (car res) '()))
|
||||
(else #`(begin res))))
|
||||
;; If we didn't match, just return false.
|
||||
#f))))
|
|
@ -1,97 +0,0 @@
|
|||
;;;; simplify-tree.scm --- utility functions for the PEG parser
|
||||
;;;;
|
||||
;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This library is free software; you can redistribute it and/or
|
||||
;;;; modify it under the terms of the GNU Lesser General Public
|
||||
;;;; License as published by the Free Software Foundation; either
|
||||
;;;; version 3 of the License, or (at your option) any later version.
|
||||
;;;;
|
||||
;;;; This library is distributed in the hope that it will be useful,
|
||||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;;;; Lesser General Public License for more details.
|
||||
;;;;
|
||||
;;;; You should have received a copy of the GNU Lesser General Public
|
||||
;;;; License along with this library; if not, write to the Free Software
|
||||
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;;;;
|
||||
|
||||
(define-module (gash peg simplify-tree)
|
||||
#:export (keyword-flatten context-flatten string-collapse)
|
||||
#:use-module (system base pmatch))
|
||||
|
||||
(define-syntax single?
|
||||
(syntax-rules ()
|
||||
"Return #t if X is a list of one element."
|
||||
((_ x)
|
||||
(pmatch x
|
||||
((_) #t)
|
||||
(else #f)))))
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;; POST-PROCESSING FUNCTIONS (TO CANONICALIZE MATCH TREES)
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Is everything in LST true?
|
||||
(define (andlst lst)
|
||||
(or (null? lst)
|
||||
(and (car lst) (andlst (cdr lst)))))
|
||||
|
||||
;; Is LST a list of strings?
|
||||
(define (string-list? lst)
|
||||
(and (list? lst) (not (null? lst))
|
||||
(andlst (map string? lst))))
|
||||
|
||||
;; Groups all strings that are next to each other in LST. Used in
|
||||
;; STRING-COLLAPSE.
|
||||
(define (string-group lst)
|
||||
(if (not (list? lst))
|
||||
lst
|
||||
(if (null? lst)
|
||||
'()
|
||||
(let ((next (string-group (cdr lst))))
|
||||
(if (not (string? (car lst)))
|
||||
(cons (car lst) next)
|
||||
(if (and (not (null? next))
|
||||
(list? (car next))
|
||||
(string? (caar next)))
|
||||
(cons (cons (car lst) (car next)) (cdr next))
|
||||
(cons (list (car lst)) next)))))))
|
||||
|
||||
|
||||
;; Collapses all the string in LST.
|
||||
;; ("a" "b" (c d) "e" "f") -> ("ab" (c d) "ef")
|
||||
(define (string-collapse lst)
|
||||
(if (list? lst)
|
||||
(let ((res (map (lambda (x) (if (string-list? x)
|
||||
(apply string-append x)
|
||||
x))
|
||||
(string-group (map string-collapse lst)))))
|
||||
(if (single? res) (car res) res))
|
||||
lst))
|
||||
|
||||
;; If LST is an atom, return (list LST), else return LST.
|
||||
(define (mklst lst)
|
||||
(if (not (list? lst)) (list lst) lst))
|
||||
|
||||
;; Takes a list and "flattens" it, using the predicate TST to know when to stop
|
||||
;; instead of terminating on atoms (see tutorial).
|
||||
(define (context-flatten tst lst)
|
||||
(if (or (not (list? lst)) (null? lst))
|
||||
lst
|
||||
(if (tst lst)
|
||||
(list lst)
|
||||
(apply append
|
||||
(map (lambda (x) (mklst (context-flatten tst x)))
|
||||
lst)))))
|
||||
|
||||
;; Takes a list and "flattens" it, using the list of keywords KEYWORD-LST to
|
||||
;; know when to stop at (see tutorial).
|
||||
(define (keyword-flatten keyword-lst lst)
|
||||
(context-flatten
|
||||
(lambda (x)
|
||||
(if (or (not (list? x)) (null? x))
|
||||
#t
|
||||
(member (car x) keyword-lst)))
|
||||
lst))
|
|
@ -1,280 +0,0 @@
|
|||
;;;; string-peg.scm --- representing PEG grammars as strings
|
||||
;;;;
|
||||
;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This library is free software; you can redistribute it and/or
|
||||
;;;; modify it under the terms of the GNU Lesser General Public
|
||||
;;;; License as published by the Free Software Foundation; either
|
||||
;;;; version 3 of the License, or (at your option) any later version.
|
||||
;;;;
|
||||
;;;; This library is distributed in the hope that it will be useful,
|
||||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;;;; Lesser General Public License for more details.
|
||||
;;;;
|
||||
;;;; You should have received a copy of the GNU Lesser General Public
|
||||
;;;; License along with this library; if not, write to the Free Software
|
||||
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;;;;
|
||||
|
||||
(define-module (gash peg string-peg)
|
||||
#:export (peg-as-peg
|
||||
define-peg-string-patterns
|
||||
peg-grammar)
|
||||
#:use-module (gash peg using-parsers)
|
||||
#:use-module (gash peg codegen)
|
||||
#:use-module (gash peg simplify-tree))
|
||||
|
||||
;; Gets the left-hand depth of a list.
|
||||
(define (depth lst)
|
||||
(if (or (not (list? lst)) (null? lst))
|
||||
0
|
||||
(+ 1 (depth (car lst)))))
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;; Parse string PEGs using sexp PEGs.
|
||||
;; See the variable PEG-AS-PEG for an easier-to-read syntax.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Grammar for PEGs in PEG grammar.
|
||||
(define peg-as-peg
|
||||
"grammar <-- (nonterminal ('<--' / '<-' / '<') sp pattern)+
|
||||
pattern <-- alternative (SLASH sp alternative)*
|
||||
alternative <-- ([!&]? sp suffix)+
|
||||
suffix <-- primary ([*+?] sp)*
|
||||
primary <-- secondary ([#] sp)?
|
||||
secondary <-- '(' sp pattern ')' sp / '.' sp / literal / charclass / nonterminal !'<'
|
||||
literal <-- ['] (!['] .)* ['] sp
|
||||
charclass <-- LB (!']' (CCrange / CCsingle))* RB sp
|
||||
CCrange <-- . '-' .
|
||||
CCsingle <-- .
|
||||
nonterminal <-- [a-zA-Z0-9-]+ sp
|
||||
sp < [ \t\n]*
|
||||
SLASH < '/'
|
||||
LB < '['
|
||||
RB < ']'
|
||||
")
|
||||
|
||||
(define-syntax define-sexp-parser
|
||||
(lambda (x)
|
||||
(syntax-case x ()
|
||||
((_ sym accum pat)
|
||||
(let* ((matchf (compile-peg-pattern #'pat (syntax->datum #'accum)))
|
||||
(accumsym (syntax->datum #'accum))
|
||||
(syn (wrap-parser-for-users x matchf accumsym #'sym)))
|
||||
#`(define sym #,syn))))))
|
||||
|
||||
(define-sexp-parser peg-grammar all
|
||||
(+ (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern)))
|
||||
(define-sexp-parser peg-pattern all
|
||||
(and peg-alternative
|
||||
(* (and (ignore "/") peg-sp peg-alternative))))
|
||||
(define-sexp-parser peg-alternative all
|
||||
(+ (and (? (or "!" "&")) peg-sp peg-suffix)))
|
||||
(define-sexp-parser peg-suffix all
|
||||
(and peg-primary (* (and (or "*" "+" "?") peg-sp))))
|
||||
(define-sexp-parser peg-primary all
|
||||
(and peg-secondary (? (and "#" peg-sp))))
|
||||
(define-sexp-parser peg-secondary all
|
||||
(or (and "(" peg-sp peg-pattern ")" peg-sp)
|
||||
(and "." peg-sp)
|
||||
peg-literal
|
||||
peg-charclass
|
||||
(and peg-nonterminal (not-followed-by "<"))))
|
||||
(define-sexp-parser peg-literal all
|
||||
(and "'" (* (and (not-followed-by "'") peg-any)) "'" peg-sp))
|
||||
(define-sexp-parser peg-charclass all
|
||||
(and (ignore "[")
|
||||
(* (and (not-followed-by "]")
|
||||
(or charclass-range charclass-single)))
|
||||
(ignore "]")
|
||||
peg-sp))
|
||||
(define-sexp-parser charclass-range all (and peg-any "-" peg-any))
|
||||
(define-sexp-parser charclass-single all peg-any)
|
||||
(define-sexp-parser peg-nonterminal all
|
||||
(and (+ (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-")) peg-sp))
|
||||
(define-sexp-parser peg-sp none
|
||||
(* (or " " "\t" "\n")))
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;; PARSE STRING PEGS
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Takes a string representing a PEG grammar and returns syntax that
|
||||
;; will define all of the nonterminals in the grammar with equivalent
|
||||
;; PEG s-expressions.
|
||||
(define (peg-parser str for-syntax)
|
||||
(let ((parsed (match-pattern peg-grammar str)))
|
||||
(if (not parsed)
|
||||
(begin
|
||||
;; (display "Invalid PEG grammar!\n")
|
||||
#f)
|
||||
(let ((lst (peg:tree parsed)))
|
||||
(cond
|
||||
((or (not (list? lst)) (null? lst))
|
||||
lst)
|
||||
((eq? (car lst) 'peg-grammar)
|
||||
#`(begin
|
||||
#,@(map (lambda (x) (peg-nonterm->defn x for-syntax))
|
||||
(context-flatten (lambda (lst) (<= (depth lst) 2))
|
||||
(cdr lst))))))))))
|
||||
|
||||
;; Macro wrapper for PEG-PARSER. Parses PEG grammars expressed as strings and
|
||||
;; defines all the appropriate nonterminals.
|
||||
(define-syntax define-peg-string-patterns
|
||||
(lambda (x)
|
||||
(syntax-case x ()
|
||||
((_ str)
|
||||
(peg-parser (syntax->datum #'str) x)))))
|
||||
|
||||
;; lst has format (nonterm grabber pattern), where
|
||||
;; nonterm is a symbol (the name of the nonterminal),
|
||||
;; grabber is a string (either "<", "<-" or "<--"), and
|
||||
;; pattern is the parse of a PEG pattern expressed as as string.
|
||||
(define (peg-nonterm->defn lst for-syntax)
|
||||
(let* ((nonterm (car lst))
|
||||
(grabber (cadr lst))
|
||||
(pattern (caddr lst))
|
||||
(nonterm-name (datum->syntax for-syntax
|
||||
(string->symbol (cadr nonterm)))))
|
||||
#`(define-peg-pattern #,nonterm-name
|
||||
#,(cond
|
||||
((string=? grabber "<--") (datum->syntax for-syntax 'all))
|
||||
((string=? grabber "<-") (datum->syntax for-syntax 'body))
|
||||
(else (datum->syntax for-syntax 'none)))
|
||||
#,(compressor (peg-pattern->defn pattern for-syntax) for-syntax))))
|
||||
|
||||
;; lst has format ('peg-pattern ...).
|
||||
;; After the context-flatten, (cdr lst) has format
|
||||
;; (('peg-alternative ...) ...), where the outer list is a collection
|
||||
;; of elements from a '/' alternative.
|
||||
(define (peg-pattern->defn lst for-syntax)
|
||||
#`(or #,@(map (lambda (x) (peg-alternative->defn x for-syntax))
|
||||
(context-flatten (lambda (x) (eq? (car x) 'peg-alternative))
|
||||
(cdr lst)))))
|
||||
|
||||
;; lst has format ('peg-alternative ...).
|
||||
;; After the context-flatten, (cdr lst) has the format
|
||||
;; (item ...), where each item has format either ("!" ...), ("&" ...),
|
||||
;; or ('peg-suffix ...).
|
||||
(define (peg-alternative->defn lst for-syntax)
|
||||
#`(and #,@(map (lambda (x) (peg-body->defn x for-syntax))
|
||||
(context-flatten (lambda (x) (or (string? (car x))
|
||||
(eq? (car x) 'peg-suffix)))
|
||||
(cdr lst)))))
|
||||
|
||||
;; lst has the format either
|
||||
;; ("!" ('peg-suffix ...)), ("&" ('peg-suffix ...)), or
|
||||
;; ('peg-suffix ...).
|
||||
(define (peg-body->defn lst for-syntax)
|
||||
(cond
|
||||
((equal? (car lst) "&")
|
||||
#`(followed-by #,(peg-suffix->defn (cadr lst) for-syntax)))
|
||||
((equal? (car lst) "!")
|
||||
#`(not-followed-by #,(peg-suffix->defn (cadr lst) for-syntax)))
|
||||
((eq? (car lst) 'peg-suffix)
|
||||
(peg-suffix->defn lst for-syntax))
|
||||
(else `(peg-parse-body-fail ,lst))))
|
||||
|
||||
;; lst has format ('peg-suffix <peg-primary> (? (/ "*" "?" "+")))
|
||||
(define (peg-suffix->defn lst for-syntax)
|
||||
(let ((inner-defn (peg-primary->defn (cadr lst) for-syntax)))
|
||||
(cond
|
||||
((null? (cddr lst))
|
||||
inner-defn)
|
||||
((equal? (caddr lst) "*")
|
||||
#`(* #,inner-defn))
|
||||
((equal? (caddr lst) "?")
|
||||
#`(? #,inner-defn))
|
||||
((equal? (caddr lst) "+")
|
||||
#`(+ #,inner-defn)))))
|
||||
|
||||
;; Parse a primary.
|
||||
(define (peg-primary->defn lst for-syntax)
|
||||
(let ((inner-defn (peg-secondary->defn (cadr lst) for-syntax)))
|
||||
(if (and (pair? (cddr lst)) (equal? (caddr lst) "#")) #`(expect #,inner-defn)
|
||||
inner-defn)))
|
||||
|
||||
(define (peg-secondary->defn lst for-syntax)
|
||||
(let ((el (cadr lst)))
|
||||
(cond
|
||||
((list? el)
|
||||
(cond
|
||||
((eq? (car el) 'peg-literal)
|
||||
(peg-literal->defn el for-syntax))
|
||||
((eq? (car el) 'peg-charclass)
|
||||
(peg-charclass->defn el for-syntax))
|
||||
((eq? (car el) 'peg-nonterminal)
|
||||
(datum->syntax for-syntax (string->symbol (cadr el))))))
|
||||
((string? el)
|
||||
(cond
|
||||
((equal? el "(")
|
||||
(peg-pattern->defn (caddr lst) for-syntax))
|
||||
((equal? el ".")
|
||||
(datum->syntax for-syntax 'peg-any))
|
||||
(else (datum->syntax for-syntax
|
||||
`(peg-parse-any unknown-string ,lst)))))
|
||||
(else (datum->syntax for-syntax
|
||||
`(peg-parse-any unknown-el ,lst))))))
|
||||
|
||||
;; Trims characters off the front and end of STR.
|
||||
;; (trim-1chars "'ab'") -> "ab"
|
||||
(define (trim-1chars str) (substring str 1 (- (string-length str) 1)))
|
||||
|
||||
;; Parses a literal.
|
||||
(define (peg-literal->defn lst for-syntax)
|
||||
(datum->syntax for-syntax (trim-1chars (cadr lst))))
|
||||
|
||||
;; Parses a charclass.
|
||||
(define (peg-charclass->defn lst for-syntax)
|
||||
#`(or
|
||||
#,@(map
|
||||
(lambda (cc)
|
||||
(cond
|
||||
((eq? (car cc) 'charclass-range)
|
||||
#`(range #,(datum->syntax
|
||||
for-syntax
|
||||
(string-ref (cadr cc) 0))
|
||||
#,(datum->syntax
|
||||
for-syntax
|
||||
(string-ref (cadr cc) 2))))
|
||||
((eq? (car cc) 'charclass-single)
|
||||
(datum->syntax for-syntax (cadr cc)))))
|
||||
(context-flatten
|
||||
(lambda (x) (or (eq? (car x) 'charclass-range)
|
||||
(eq? (car x) 'charclass-single)))
|
||||
(cdr lst)))))
|
||||
|
||||
;; Compresses a list to save the optimizer work.
|
||||
;; e.g. (or (and a)) -> a
|
||||
(define (compressor-core lst)
|
||||
(if (or (not (list? lst)) (null? lst))
|
||||
lst
|
||||
(cond
|
||||
((and (or (eq? (car lst) 'or) (eq? (car lst) 'and))
|
||||
(null? (cddr lst)))
|
||||
(compressor-core (cadr lst)))
|
||||
((and (eq? (car lst) 'body)
|
||||
(eq? (cadr lst) 'lit)
|
||||
(eq? (cadddr lst) 1))
|
||||
(compressor-core (caddr lst)))
|
||||
(else (map compressor-core lst)))))
|
||||
|
||||
(define (compressor syn for-syntax)
|
||||
(datum->syntax for-syntax
|
||||
(compressor-core (syntax->datum syn))))
|
||||
|
||||
;; Builds a lambda-expressions for the pattern STR using accum.
|
||||
(define (peg-string-compile args accum)
|
||||
(syntax-case args ()
|
||||
((str-stx) (string? (syntax->datum #'str-stx))
|
||||
(let ((string (syntax->datum #'str-stx)))
|
||||
(compile-peg-pattern
|
||||
(compressor
|
||||
(peg-pattern->defn
|
||||
(peg:tree (match-pattern peg-pattern string)) #'str-stx)
|
||||
#'str-stx)
|
||||
(if (eq? accum 'all) 'body accum))))
|
||||
(else (error "Bad embedded PEG string" args))))
|
||||
|
||||
(add-peg-compiler! 'peg peg-string-compile)
|
|
@ -1,116 +0,0 @@
|
|||
;;;; using-parsers.scm --- utilities to make using parsers easier
|
||||
;;;;
|
||||
;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This library is free software; you can redistribute it and/or
|
||||
;;;; modify it under the terms of the GNU Lesser General Public
|
||||
;;;; License as published by the Free Software Foundation; either
|
||||
;;;; version 3 of the License, or (at your option) any later version.
|
||||
;;;;
|
||||
;;;; This library is distributed in the hope that it will be useful,
|
||||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;;;; Lesser General Public License for more details.
|
||||
;;;;
|
||||
;;;; You should have received a copy of the GNU Lesser General Public
|
||||
;;;; License along with this library; if not, write to the Free Software
|
||||
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;;;;
|
||||
|
||||
(define-module (gash peg using-parsers)
|
||||
#:use-module (gash peg simplify-tree)
|
||||
#:use-module (gash peg codegen)
|
||||
#:use-module (gash peg cache)
|
||||
#:export (match-pattern define-peg-pattern search-for-pattern
|
||||
prec make-prec peg:start peg:end peg:string
|
||||
peg:tree peg:substring peg-record?))
|
||||
|
||||
;;;
|
||||
;;; Helper Macros
|
||||
;;;
|
||||
|
||||
(define-syntax until
|
||||
(syntax-rules ()
|
||||
"Evaluate TEST. If it is true, return its value. Otherwise,
|
||||
execute the STMTs and try again."
|
||||
((_ test stmt stmt* ...)
|
||||
(let lp ()
|
||||
(or test
|
||||
(begin stmt stmt* ... (lp)))))))
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;; FOR DEFINING AND USING NONTERMINALS
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Parses STRING using NONTERM
|
||||
(define (match-pattern nonterm string)
|
||||
;; We copy the string before using it because it might have been modified
|
||||
;; in-place since the last time it was parsed, which would invalidate the
|
||||
;; cache. Guile uses copy-on-write for strings, so this is fast.
|
||||
(let ((res (nonterm (string-copy string) (string-length string) 0)))
|
||||
(if (not res)
|
||||
#f
|
||||
(make-prec 0 (car res) string (string-collapse (cadr res))))))
|
||||
|
||||
;; Defines a new nonterminal symbol accumulating with ACCUM.
|
||||
(define-syntax define-peg-pattern
|
||||
(lambda (x)
|
||||
(syntax-case x ()
|
||||
((_ sym accum pat)
|
||||
(let ((matchf (compile-peg-pattern #'pat (syntax->datum #'accum)))
|
||||
(accumsym (syntax->datum #'accum)))
|
||||
;; CODE is the code to parse the string if the result isn't cached.
|
||||
(let ((syn (wrap-parser-for-users x matchf accumsym #'sym)))
|
||||
#`(define sym #,(cg-cached-parser syn))))))))
|
||||
|
||||
(define (peg-like->peg pat)
|
||||
(syntax-case pat ()
|
||||
(str (string? (syntax->datum #'str)) #'(peg str))
|
||||
(else pat)))
|
||||
|
||||
;; Searches through STRING for something that parses to PEG-MATCHER. Think
|
||||
;; regexp search.
|
||||
(define-syntax search-for-pattern
|
||||
(lambda (x)
|
||||
(syntax-case x ()
|
||||
((_ pattern string-uncopied)
|
||||
(let ((pmsym (syntax->datum #'pattern)))
|
||||
(let ((matcher (compile-peg-pattern (peg-like->peg #'pattern) 'body)))
|
||||
;; We copy the string before using it because it might have been
|
||||
;; modified in-place since the last time it was parsed, which would
|
||||
;; invalidate the cache. Guile uses copy-on-write for strings, so
|
||||
;; this is fast.
|
||||
#`(let ((string (string-copy string-uncopied))
|
||||
(strlen (string-length string-uncopied))
|
||||
(at 0))
|
||||
(let ((ret (until (or (>= at strlen)
|
||||
(#,matcher string strlen at))
|
||||
(set! at (+ at 1)))))
|
||||
(if (eq? ret #t) ;; (>= at strlen) succeeded
|
||||
#f
|
||||
(let ((end (car ret))
|
||||
(match (cadr ret)))
|
||||
(make-prec
|
||||
at end string
|
||||
(string-collapse match))))))))))))
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;; PMATCH STRUCTURE MUNGING
|
||||
;; Pretty self-explanatory.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define prec
|
||||
(make-record-type "peg" '(start end string tree)))
|
||||
(define make-prec
|
||||
(record-constructor prec '(start end string tree)))
|
||||
(define (peg:start pm)
|
||||
(if pm ((record-accessor prec 'start) pm) #f))
|
||||
(define (peg:end pm)
|
||||
(if pm ((record-accessor prec 'end) pm) #f))
|
||||
(define (peg:string pm)
|
||||
(if pm ((record-accessor prec 'string) pm) #f))
|
||||
(define (peg:tree pm)
|
||||
(if pm ((record-accessor prec 'tree) pm) #f))
|
||||
(define (peg:substring pm)
|
||||
(if pm (substring (peg:string pm) (peg:start pm) (peg:end pm)) #f))
|
||||
(define peg-record? (record-predicate prec))
|
Loading…
Reference in New Issue