revamp PEG grammar
This commit is contained in:
parent
1e81a66926
commit
2334e6ebde
|
@ -8,7 +8,6 @@
|
|||
#:use-module (ice-9 getopt-long)
|
||||
#:use-module (ice-9 local-eval)
|
||||
#:use-module (ice-9 match)
|
||||
#:use-module (ice-9 rdelim)
|
||||
#:use-module (ice-9 pretty-print)
|
||||
#:use-module (ice-9 receive)
|
||||
#:use-module (ice-9 regex)
|
||||
|
@ -40,12 +39,12 @@
|
|||
|
||||
(define (parse-string string)
|
||||
(let ((parser (cond (%geesh-parser? (@ (gash geesh) parse-string))
|
||||
(else (@ (gash peg) parse-string)))))
|
||||
(else (@ (gash grammar) parse-string)))))
|
||||
(parser string)))
|
||||
|
||||
(define (parse port)
|
||||
(let ((parser (cond (%geesh-parser? (@ (gash geesh) parse))
|
||||
(else (@ (gash peg) parse)))))
|
||||
(else (@ (gash grammar) parse)))))
|
||||
(parser port)))
|
||||
|
||||
(define (file-to-ast file-name)
|
||||
|
|
|
@ -0,0 +1,249 @@
|
|||
(define-module (gash grammar)
|
||||
#:use-module (ice-9 match)
|
||||
#:use-module (ice-9 pretty-print)
|
||||
#:use-module (ice-9 rdelim)
|
||||
|
||||
#:use-module (srfi srfi-8)
|
||||
|
||||
#:use-module (peg)
|
||||
#:use-module (peg codegen)
|
||||
|
||||
#:export (parse
|
||||
parse-string))
|
||||
|
||||
;; (define-syntax define-unwrapped-sexp-parser
|
||||
;; (lambda (x)
|
||||
;; (syntax-case x ()
|
||||
;; ((_ sym accum pat)
|
||||
;; (let* ((matchf (compile-peg-pattern #'pat (syntax->datum #'accum))))
|
||||
;; #`(define sym #,matchf))))))
|
||||
|
||||
;; (define-unwrapped-sexp-parser eol none (or "\f" "\n" "\r"))
|
||||
;; (add-peg-compiler! 'eol eol)
|
||||
|
||||
;; (define-unwrapped-sexp-parser ws none (or " " "\t" "\v"))
|
||||
;; (add-peg-compiler! 'ws ws)
|
||||
|
||||
;; (define-unwrapped-sexp-parser line none (and "#" (* (and (not-followed-by eol) peg-any))))
|
||||
;; (add-peg-compiler! 'line line)
|
||||
|
||||
;; (define-unwrapped-sexp-parser skip none (* (or ws eol line)))
|
||||
;; (add-peg-compiler! 'skip skip)
|
||||
|
||||
;; (define (wrap-skip-parser-for-users for-syntax parser accumsym s-syn)
|
||||
;; (display "wrap\n")
|
||||
;; #`(lambda (str strlen pos)
|
||||
;; (when #t
|
||||
;; (format (current-error-port) "~a ~a : ~s\n"
|
||||
;; (make-string (- pos (or (string-rindex str #\newline 0 pos) 0)) #\space)
|
||||
;; '#,s-syn
|
||||
;; (substring str pos (min (+ pos 40) strlen))))
|
||||
|
||||
;; (let* ((res (skip str strlen pos))
|
||||
;; (pos (or (and res (car res)) pos))
|
||||
;; (res (#,parser str strlen pos)))
|
||||
;; ;; Try to match the nonterminal.
|
||||
;; (if res
|
||||
;; ;; If we matched, do some post-processing to figure out
|
||||
;; ;; what data to propagate upward.
|
||||
;; (let* ((at (car res))
|
||||
;; (body (cadr res)))
|
||||
;; #,(cond
|
||||
;; ((eq? accumsym 'name)
|
||||
;; #``(,at ,'#,s-syn))
|
||||
;; ((eq? accumsym 'all)
|
||||
;; #`(list at
|
||||
;; (cond
|
||||
;; ((not (list? body))
|
||||
;; `(,'#,s-syn ,body))
|
||||
;; ((null? body) `(,'#,s-syn))
|
||||
;; ((symbol? (car body))
|
||||
;; `(,'#,s-syn ,body))
|
||||
;; (else (cons '#,s-syn body)))))
|
||||
;; ((eq? accumsym 'none) #``(,at ()))
|
||||
;; (else #``(,at ,body))))
|
||||
;; ;; If we didn't match, just return false.
|
||||
;; #f))))
|
||||
|
||||
;; (module-set! (resolve-module '(peg codegen)) 'wrap-parser-for-users wrap-skip-parser-for-users)
|
||||
|
||||
(define (parse port)
|
||||
(parse-string (read-string port)))
|
||||
|
||||
(define (parse-string input)
|
||||
|
||||
(define io-label "")
|
||||
|
||||
(define (io-label-name str len pos)
|
||||
(let ((at (string-skip str char-alphabetic? pos len)))
|
||||
(set! io-label (substring str pos at))
|
||||
(if (< at len) (list at '())
|
||||
#f)))
|
||||
|
||||
(define (io-label-match str len pos)
|
||||
(if (string-prefix? io-label (substring str pos))
|
||||
(list (+ pos (string-length io-label)) '())
|
||||
#f))
|
||||
|
||||
(define-peg-pattern io-here-label none io-label-name)
|
||||
(define-peg-pattern io-here-delim none io-label-match)
|
||||
(define-peg-pattern io-here-document all
|
||||
(and (+ (and (not-followed-by io-here-delim)
|
||||
peg-any))
|
||||
io-here-delim))
|
||||
|
||||
(define-peg-string-patterns
|
||||
"script <-- ws* compound
|
||||
ws < sp / eol
|
||||
sp < '\\\n'? (comment / [ \t\v])
|
||||
comment < [#] (!eol .)*
|
||||
eol < [\n\r\f]
|
||||
|
||||
compound <-- (term (&rpar / sep#))*
|
||||
|
||||
sep <- sp* (amp ws* / semi ws* / eof) / ws+
|
||||
amp <- '&'
|
||||
semi < ';'!';'
|
||||
eof < !.
|
||||
|
||||
term <- and / or / pipeline
|
||||
and <-- pipeline and-op ws* term
|
||||
or <-- pipeline or-op ws* term
|
||||
and-op < '&&'
|
||||
or-op < '||'
|
||||
|
||||
pipeline <-- '!'? sp* (command (&sep / &or-op / &and-op / &rpar / eof / pipe#))+
|
||||
|
||||
and-or <- '&&' / '||'
|
||||
|
||||
exclamation <- '!'
|
||||
pipe < sp* '|' !'|' ws*
|
||||
|
||||
command <-- function-def / compound-command / simple-command
|
||||
|
||||
compound-command <- (subshell / brace-group / for-clause / case-clause /
|
||||
if-clause / while-clause / until-clause) (sp* io-redirect)*
|
||||
|
||||
simple-command <- ((io-redirect / nonreserved) sp*)+
|
||||
io-redirect <-- [0-9]* (io-here / io-file)
|
||||
io-file <-- io-op ([0-9]+ / word)
|
||||
io-op <- '<&' / '>&' / '>>' / '>' / '<>'/ '<' / '>|'
|
||||
io-here <-- io-here-op io-here-label sp* eol io-here-document
|
||||
io-here-op <- '<<-' / '<<'
|
||||
|
||||
function-def <-- name sp* lpar rpar# ws* function-body
|
||||
name <-- !reserved identifier
|
||||
function-body <-- brace-group (sp* io-redirect)*
|
||||
|
||||
subshell <-- lpar compound rpar#
|
||||
brace-group <-- lbrace ws* compound rbrace#
|
||||
|
||||
case-clause <-- 'case' sp* word sp* 'in'# ws* case-item+ ws* 'esac'#
|
||||
case-item <-- pattern sp* colon? ws* compound? case-sep?
|
||||
colon < ':'
|
||||
case-sep < ';;' ws*
|
||||
pattern <-- (word (!rpar '|'# / !'|' &rpar))+ rpar#
|
||||
|
||||
for-clause <-- 'for' sp+ identifier ws+ ('in' sp+ expression)? sep# do-group
|
||||
expression <-- command
|
||||
do-group <-- 'do' ws+ compound 'done'#
|
||||
|
||||
if-clause <-- 'if' sp+ compound 'then'# ws+ compound else-part? 'fi'#
|
||||
else-part <-- 'else' ws+ compound /
|
||||
'elif' ws+ compound 'then'# ws+ compound else-part?
|
||||
|
||||
while-clause <-- 'while' compound do-group
|
||||
|
||||
until-clause <-- 'until' compound do-group
|
||||
|
||||
reserved < ('case' / 'esac' / 'in' / 'if' / 'fi' / 'then' / 'else' /
|
||||
'elif' / 'for' / 'done' / 'do' / 'until' / 'while') &ws
|
||||
|
||||
nonreserved <- !reserved word
|
||||
|
||||
word <-- test / substitution / assignment / number / variable /
|
||||
delim / literal
|
||||
|
||||
test <-- ltest sp+ (word sp+)+ rtest#
|
||||
ltest < '['
|
||||
rtest < ']'
|
||||
|
||||
literal <- !reserved (!']' ![ \t\v\f\n`'\")};|&\\] .)+
|
||||
|
||||
identifier <- [_a-zA-Z][_a-zA-Z0-9]*
|
||||
|
||||
dollar < '$'
|
||||
number <-- [0-9]+
|
||||
|
||||
substitution <-- dollar lpar compound rpar# / bt ([\\] bt / !bt .)+ bt#
|
||||
lpar < '('
|
||||
rpar < ')'
|
||||
bt < [`]
|
||||
|
||||
assignment <-- name assign word?
|
||||
assign < '='
|
||||
|
||||
variable <-- dollar ('*' / '@' / [0-9] / name /
|
||||
lbrace name (variable-or / variable-and / variable-word / variable-literal / &rbrace) rbrace)
|
||||
variable-or <-- min variable-word
|
||||
variable-and <-- plus variable-word
|
||||
variable-word <- (variable-regex / substitution / variable / variable-literal)+
|
||||
variable-regex <-- ('%%' / '%' / '##' / '#' / '^^' / '^' /',,' / ',' / '*' / '@' / '?')+ variable-word
|
||||
variable-literal <- (!rbrace .)+
|
||||
min < '-'
|
||||
plus < '+'
|
||||
lbrace < '{'
|
||||
rbrace < '}'
|
||||
|
||||
|
||||
delim <-- singlequotes / doublequotes / substitution
|
||||
sq < [']
|
||||
dq < [\"]
|
||||
singlequotes <- sq (!['] .)* sq#
|
||||
doublequotes <- dq (substitution / variable / (![\"] .))* dq#")
|
||||
|
||||
(catch 'syntax-error
|
||||
(lambda ()
|
||||
(let* ((match (match-pattern script input))
|
||||
(end (peg:end match))
|
||||
(tree (peg:tree match)))
|
||||
(if (eq? (string-length input) end)
|
||||
tree
|
||||
(if match
|
||||
(begin
|
||||
(format (current-error-port) "parse error: at offset: ~a\n" end)
|
||||
(pretty-print tree)
|
||||
#f)
|
||||
(begin
|
||||
(format (current-error-port) "parse error: no match\n")
|
||||
#f)))))
|
||||
(lambda (key . args)
|
||||
(define (line-column input pos)
|
||||
(let ((length (string-length input)))
|
||||
(let loop ((lines (string-split input #\newline)) (ln 1) (p 0))
|
||||
(if (null? lines) (values #f #f input)
|
||||
(let* ((line (car lines))
|
||||
(length (string-length line))
|
||||
(end (+ p length 1))
|
||||
(last? (null? (cdr lines))))
|
||||
(if (<= pos end) (values ln (+ (if last? 0 1) (- pos p))
|
||||
(if last? line
|
||||
(string-append line "\\n" (cadr lines))))
|
||||
(loop (cdr lines) (1+ ln) end)))))))
|
||||
(define (format-peg o)
|
||||
(match o
|
||||
(('or l ...) (string-join (map format-peg l) ", or "))
|
||||
(('and l ...) (string-join (map format-peg l) " "))
|
||||
((? symbol?) (symbol->string o))
|
||||
((? string?) o)))
|
||||
|
||||
(receive (ln col line) (line-column input (caar args))
|
||||
(let* ((col (- col 1))
|
||||
(indent (make-string col #\space)))
|
||||
(format #t "~a:~a:~a: syntax-error:\n~a\n~a^\n~aexpected: ~a\n"
|
||||
""
|
||||
ln col line
|
||||
indent
|
||||
indent
|
||||
(format-peg (cadar args)))
|
||||
(exit 1))))))
|
395
gash/peg.scm
395
gash/peg.scm
|
@ -1,395 +0,0 @@
|
|||
(define-module (gash peg)
|
||||
#:use-module (ice-9 local-eval)
|
||||
#:use-module (ice-9 match)
|
||||
#:use-module (ice-9 pretty-print)
|
||||
#:use-module (ice-9 peg)
|
||||
#:use-module (ice-9 peg codegen)
|
||||
#:use-module (ice-9 rdelim)
|
||||
#:use-module (ice-9 regex)
|
||||
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (srfi srfi-26)
|
||||
|
||||
#:use-module (gash builtins)
|
||||
#:use-module (gash environment)
|
||||
#:use-module (gash gash)
|
||||
#:use-module (gash io)
|
||||
#:use-module (gash script)
|
||||
|
||||
#:export (
|
||||
parse
|
||||
parse-string
|
||||
peg-trace?
|
||||
))
|
||||
|
||||
(define (wrap-parser-for-users for-syntax parser accumsym s-syn)
|
||||
#`(lambda (str strlen pos)
|
||||
(when (> (@ (gash gash) %debug-level) 2)
|
||||
(format (current-error-port) "~a ~a : ~s\n"
|
||||
(make-string (- pos (or (string-rindex str #\newline 0 pos) 0)) #\space)
|
||||
'#,s-syn
|
||||
(substring str pos (min (+ pos 40) strlen))))
|
||||
|
||||
(let* ((res (#,parser str strlen pos)))
|
||||
;; Try to match the nonterminal.
|
||||
(if res
|
||||
;; If we matched, do some post-processing to figure out
|
||||
;; what data to propagate upward.
|
||||
(let ((at (car res))
|
||||
(body (cadr res)))
|
||||
#,(cond
|
||||
((eq? accumsym 'name)
|
||||
#`(list at '#,s-syn))
|
||||
((eq? accumsym 'all)
|
||||
#`(list (car res)
|
||||
(cond
|
||||
((not (list? body))
|
||||
(list '#,s-syn body))
|
||||
((null? body) '#,s-syn)
|
||||
((symbol? (car body))
|
||||
(list '#,s-syn body))
|
||||
(else (cons '#,s-syn body)))))
|
||||
((eq? accumsym 'none) #`(list (car res) '()))
|
||||
(else #`(begin res))))
|
||||
;; If we didn't match, just return false.
|
||||
#f))))
|
||||
|
||||
(module-define! (resolve-module '(ice-9 peg codegen))
|
||||
'wrap-parser-for-users
|
||||
wrap-parser-for-users)
|
||||
|
||||
(define (error? x)
|
||||
(let loop ((x x))
|
||||
(if (null? x) #f
|
||||
(if (not (pair? x))
|
||||
(eq? 'error x)
|
||||
(or (loop (car x))
|
||||
(loop (cdr x)))))))
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;; WIP
|
||||
(define (expand identifier o) ;;identifier-string -> symbol
|
||||
(define (expand- o)
|
||||
(let ((dollar-identifier (string-append "$" identifier)))
|
||||
(match o
|
||||
((? symbol?) o)
|
||||
((? string?) (if (string=? o dollar-identifier) (string->symbol identifier) o))
|
||||
((? list?) (map expand- o))
|
||||
(_ o))))
|
||||
(map expand- o))
|
||||
|
||||
(define (tostring . args)
|
||||
(with-output-to-string (cut map display args)))
|
||||
|
||||
;; transform ast -> list of expr
|
||||
;; such that (map eval expr)
|
||||
(define (DEAD-transform ast)
|
||||
(format (current-error-port) "transform=~s\n" ast)
|
||||
(match ast
|
||||
(('script term "&") (list (background (transform term))))
|
||||
(('script term) `(,(transform term)))
|
||||
(('script terms ...) (transform terms))
|
||||
(('substitution "$(" script ")") (local-eval (cons 'substitute (cddr (car (transform script)))) (the-environment)))
|
||||
(('substitution "`" script "`") (local-eval (cons 'substitute (cddr (car (transform script)))) (the-environment)))
|
||||
((('term command)) `(,(transform command)))
|
||||
((('term command) ...) (map transform command))
|
||||
((('term command) (('term commands) ...)) (map transform (cons command commands)))
|
||||
(('compound-list terms ...) (transform terms))
|
||||
(('if-clause "if" (expression "then" consequent "fi"))
|
||||
`(if (equal? 0 (status:exit-val ,@(transform expression)))
|
||||
(begin ,@(transform consequent))))
|
||||
(('if-clause "if" (expression "then" consequent ('else-part "else" alternative) "fi"))
|
||||
`(if (equal? 0 (status:exit-val ,@(transform expression)))
|
||||
(begin ,@(transform consequent))
|
||||
(begin ,@(transform alternative))))
|
||||
(('for-clause ("for" identifier sep do-group)) #t)
|
||||
(('for-clause "for" ((identifier "in" lst sep) do-group))
|
||||
`(for-each (lambda (,(string->symbol identifier))
|
||||
(begin ,@(expand identifier (transform do-group))))
|
||||
(glob ,(transform lst))))
|
||||
(('do-group "do" (command "done")) (transform command))
|
||||
(('pipeline command) (pk 1) (let* ((command (transform command))) (or (builtin command) `(pipeline #t ,@command))))
|
||||
(('pipeline command piped-commands) (pk 2) `(pipeline #t ,@(transform command) ,@(transform piped-commands)))
|
||||
(('simple-command ('word (assignment name value))) `((lambda _ (let ((name ,(tostring (transform name)))
|
||||
(value ,(tostring (transform value))))
|
||||
(stderr "assignment: " name "=" value)
|
||||
(set! global-variables (assoc-set! global-variables name (glob value)))))))
|
||||
(('simple-command ('word s)) `((glob ,(transform s))))
|
||||
(('simple-command ('word s1) ('io-redirect "<<" ('here-document s2))) `((append (glob "echo") (cons "-n" (glob ,s2))) (glob ,(transform s1))))
|
||||
(('simple-command ('word s1) ('word s2)) `((append (glob ,(transform s1)) (glob ,(transform s2)))))
|
||||
(('simple-command ('word s1) (('word s2) ...)) `((append (glob ,(transform s1)) (append-map glob (list ,@(map transform s2))))))
|
||||
(('variable s) s)
|
||||
(('literal s) (transform s))
|
||||
(('singlequotes s) (string-concatenate `("'" ,s "'")))
|
||||
(('doublequotes s) (string-concatenate `("\"" ,s "\"")))
|
||||
(('backticks s) (string-concatenate `("`" ,s "`")))
|
||||
(('delim ('singlequotes s ...)) (string-concatenate (map transform s)))
|
||||
(('delim ('doublequotes s ...)) (string-concatenate (map transform s)))
|
||||
(('delim ('backticks s ...)) (string-concatenate (map transform s)))
|
||||
((('pipe _) command) (transform command))
|
||||
(((('pipe _) command) ...) (map (compose car transform) command))
|
||||
((_ o) (transform o)) ;; peel the onion: (symbol (...)) -> (...)
|
||||
(_ ast))) ;; done
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
|
||||
(define (parse- input)
|
||||
(define label "")
|
||||
(define (label-name str len pos)
|
||||
(let ((at (string-skip str char-alphabetic? pos len)))
|
||||
(set! label (substring str pos at))
|
||||
(if (< at len) (list at '())
|
||||
#f)))
|
||||
|
||||
(define (label-match str len pos)
|
||||
(if (string-prefix? label (substring str pos)) (list (+ pos (string-length label)) '())
|
||||
#f))
|
||||
|
||||
(define-peg-pattern here-label none label-name)
|
||||
(define-peg-pattern here-delim none label-match)
|
||||
(define-peg-pattern here-document all (and (+ (and (not-followed-by here-delim) peg-any)) here-delim))
|
||||
|
||||
(define-peg-string-patterns
|
||||
"script <-- ws* (term (separator term)* separator?)?
|
||||
term <- (and / or / pipeline) (sp* (and / or /pipeline))*
|
||||
and <-- pipeline sp* amp-amp ws* pipeline
|
||||
or <-- pipeline sp* pipe-pipe ws* pipeline
|
||||
pipe < '|'
|
||||
pipeline <-- negate? pipeline-head pipeline-tail*
|
||||
pipeline-head <- sp* command
|
||||
pipeline-tail <- sp* pipe ws* command
|
||||
negate <-- '!'
|
||||
command <-- function / (compound-command (sp+ io-redirect)*) / simple-command (sp+ io-redirect)*
|
||||
compound-command <- brace-group / subshell / for-clause / case-clause / if-clause / while-clause / until-clause
|
||||
simple-command <- (sp* (io-redirect sp+)* nonreserved)+
|
||||
nonreserved <- &(reserved word) word / !reserved word
|
||||
reserved < 'case' / 'esac' / 'if' / 'fi' / 'then' / 'else' / 'elif' / 'for' / 'done' / 'do' / 'until' / 'while'
|
||||
|
||||
function <-- identifier sp* lpar sp* rpar ws* (function-body / error)
|
||||
function-body <- compound-command io-redirect*
|
||||
|
||||
io-redirect <-- [0-9]* sp* (io-here / io-file)
|
||||
io-file <-- ('<&' / '>&' / '>>' / '>' / '<>'/ '<' / '>|') sp* ([0-9]+ / file-name)
|
||||
io-here <- ('<<' / '<<-') io-suffix here-document
|
||||
io-op < '<<-' / '<<' / '<&' / '>&' / '>>' / '>' / '<>'/ '<' / '>|'
|
||||
io-suffix <- sp* here-label sp* nl
|
||||
|
||||
brace-group <-- lbrace (ws* (compound-list / error) ws* rbrace / error)
|
||||
subshell <-- lpar compound-list separator rpar
|
||||
compound-list <- term (separator term)*
|
||||
|
||||
case-keyword < 'case'
|
||||
case-clause <-- case-keyword sp+ word ws+ 'in' ws+ case-item* 'esac'
|
||||
case-item <-- pattern ((compound-list separator)? case-sep ws* / error)
|
||||
case-sep < ';;'
|
||||
pattern <-- sp* word (sp* '|' sp* word)* sp* ')' sp*
|
||||
|
||||
for-keyword < 'for'
|
||||
in-keyword < 'in'
|
||||
for-clause <-- for-keyword sp+ name (ws+ in-keyword sequence)? sp* sequential-sep do-group
|
||||
sequence <-- (sp+ word)+
|
||||
do-keyword < 'do'
|
||||
done-keyword < 'done'
|
||||
do-group <- do-keyword ws* compound-list separator done-keyword
|
||||
|
||||
if-keyword < 'if'
|
||||
fi-keyword < 'fi'
|
||||
if-clause <-- if-keyword pipeline separator then-part elif-part* else-part? fi-keyword
|
||||
then-keyword < 'then'
|
||||
then-part <-- then-keyword ws* compound-list separator
|
||||
elif-keyword < 'elif'
|
||||
elif-part <-- elif-keyword ws* compound-list separator then-keyword ws* compound-list separator else-part?
|
||||
else-keyword < 'else'
|
||||
else-part <-- else-keyword ws* compound-list separator
|
||||
|
||||
while-keyword < 'while'
|
||||
while-clause <-- while-keyword ws* compound-list separator do-group
|
||||
|
||||
until-keyword < 'until'
|
||||
until-clause <-- until-keyword ws* compound-list separator do-group
|
||||
|
||||
file-name <-- word
|
||||
name <-- identifier
|
||||
identifier <- [_a-zA-Z][_a-zA-Z0-9]*
|
||||
word <-- assignment / (delim / number / variable / variable-and-or / literal)+
|
||||
|
||||
number <-- [0-9]+
|
||||
lsubst < '$('
|
||||
rsubst < ')'
|
||||
tick < '`'
|
||||
substitution <-- lsubst script rsubst / tick script tick
|
||||
assignment <-- name assign rhs
|
||||
rhs <- (substitution / word)*
|
||||
assign < '='
|
||||
dollar < '$'
|
||||
literal <-- backslash? (!ws !amp !tick !dollar !pipe !semi !par !nl !sp !rbrace !io-op !dq !sq .)+
|
||||
variable <-- dollar ('$' / '#' / '*' / '?' / '@' / [0-9] / identifier / lbrace identifier rbrace)
|
||||
variable-and-or <- dollar lbrace (variable-or / variable-and / variable-hash-hash / variable-hash / variable-percent-percent / variable-percent / variable-slash ) rbrace
|
||||
variable-and <-- identifier plus (pat / rhs / str)
|
||||
variable-or <-- identifier minus (pat / rhs / str)
|
||||
variable-hash <-- identifier hash (pat / rhs / str)
|
||||
variable-hash-hash <-- identifier hash hash (pat / rhs / str)
|
||||
variable-percent <-- identifier percent (pat / rhs / str)
|
||||
variable-percent-percent <-- identifier percent percent (pat / rhs /str)
|
||||
variable-slash <-- (identifier slash pat slash str) / (identifier slash pat slash) / (identifier slash pat)
|
||||
pat <-- (!dollar !rbrace !slash .)+
|
||||
str <-- (!rbrace .)+
|
||||
delim <- singlequotes / doublequotes / substitution
|
||||
sq < [']
|
||||
dq < [\"]
|
||||
bt < [`]
|
||||
singlequotes <-- sq (doublequotes / (!sq .))* sq
|
||||
doublequotes <-- dq (singlequotes / substitution / number / variable / variable-and-or / literal / (!dq .))* dq
|
||||
break <- amp / semi !semi
|
||||
separator <- (sp* break ws*) / ws+
|
||||
sequential-sep <- (semi !semi ws*) / ws+
|
||||
amp <- '&'
|
||||
amp-amp < '&&'
|
||||
pipe-pipe < '||'
|
||||
backslash <- '\\'
|
||||
semi < ';'
|
||||
lpar < '('
|
||||
rpar < ')'
|
||||
lbrace < [{]
|
||||
rbrace < [}]
|
||||
plus < [+]
|
||||
minus < '-'
|
||||
hash < '#'
|
||||
percent < '%'
|
||||
slash < '/'
|
||||
par < lpar / rpar
|
||||
nl < '\n'
|
||||
sp < '\t' / ' ' / (escaped-nl sp*)
|
||||
ws < sp / nl
|
||||
escaped-nl < (backslash nl)
|
||||
error <-- .*")
|
||||
|
||||
(when (> %debug-level 1)
|
||||
(format (current-error-port) "input:~s\n" input))
|
||||
|
||||
(let* ((match (match-pattern script input))
|
||||
(end (peg:end match))
|
||||
(pt (peg:tree match)))
|
||||
(if (eq? (string-length input) end)
|
||||
pt
|
||||
(if match
|
||||
(begin
|
||||
(format (current-error-port) "parse error: at offset: ~a\n" end)
|
||||
(pretty-print pt (current-error-port))
|
||||
#f)
|
||||
(begin
|
||||
(format (current-error-port) "parse error: no match\n")
|
||||
#f)))))
|
||||
|
||||
(define (flatten o)
|
||||
(keyword-flatten '(and assignent command doublequotes for-clause literal name or pipeline singlequotes substitution word) o))
|
||||
|
||||
(define (unspecified? o)
|
||||
(eq? o *unspecified*))
|
||||
|
||||
(define (transform ast)
|
||||
(when (> %debug-level 1)
|
||||
(pretty-print ast (current-error-port)))
|
||||
(match ast
|
||||
;; FIXME: flatten?
|
||||
|
||||
((('assignent _ ...) _ ...) (map transform (flatten ast)))
|
||||
((('command _ ...) _ ...) (map transform (flatten ast)))
|
||||
((('doublequotes _ ...) _ ...) (map transform (flatten ast)))
|
||||
((('for-clause _ ...) _ ...) (map transform (flatten ast)))
|
||||
((('literal _ ...) _ ...) (map transform (flatten ast)))
|
||||
((('pipeline _ ...) _ ...) (map transform (flatten ast)))
|
||||
((('singlequotes _ ...) _ ...) (map transform (flatten ast)))
|
||||
|
||||
((('word _ ...) ('word _ ...)) (transform (cons 'word ast)))
|
||||
|
||||
((('word _ ...) _ ...) (map transform (flatten ast)))
|
||||
|
||||
(('script ('pipeline ('command command ... (word (literal "&")))))
|
||||
(background `(pipeline ',(map transform command))))
|
||||
|
||||
(('script terms ...) `(script ,@(map transform terms)))
|
||||
|
||||
(('pipeline ('command command ('io-redirect ('io-file ">" file-name))))
|
||||
(transform `(pipeline (command ,@(transform command)) (lambda _ (with-output-to-file ,(transform file-name) (lambda _ (display (read-string))))))))
|
||||
|
||||
(('pipeline ('command command ('io-redirect "<<" ('here-document here-document))))
|
||||
(transform `(pipeline (lambda _ (display ,here-document)) (command ,(transform command)))))
|
||||
|
||||
(('pipeline o ...)
|
||||
(let ((commands (map transform o)))
|
||||
`(pipeline ,@(cons (trace commands) commands))))
|
||||
|
||||
(('command o ...) `(command ,@(map transform o)))
|
||||
(('literal o) (transform o))
|
||||
(('name o) o)
|
||||
(('number o) o)
|
||||
|
||||
;;(('assignment a b) `(assignment ,(transform a) ',(transform b)))
|
||||
;; FIXME: to quote or not?
|
||||
(('assignment a) `(substitution (variable ,(transform a))))
|
||||
(('assignment a b) `(assignment ,(transform a) ,(transform b)))
|
||||
|
||||
;; (('assignment a (and b ('literal _ ...))) `(assignment ,(transform a) ,(transform b)))
|
||||
;; (('assignment a b)
|
||||
;; `(assignment ,(transform a) ,(map transform b)))
|
||||
|
||||
(('for-clause name sequence (and body ('pipeline _ ...)))
|
||||
`(for ,(transform name) (lambda _ ,(transform sequence)) (lambda _ ,(transform body))))
|
||||
(('for-clause name expr body)
|
||||
`(for ,(transform name) (lambda _ ,(transform expr)) (lambda _ ,@(map transform body))))
|
||||
(('sequence o)
|
||||
`(sequence (string-split ,(transform o) #\space)))
|
||||
(('sequence o ...)
|
||||
`(sequence (quote ,(map transform o))))
|
||||
|
||||
(('and l r) `(and-terms ,(transform l) ,(transform r)))
|
||||
(('or l r) `(or-terms ,(transform l) ,(transform r)))
|
||||
|
||||
(('substitution o) `(substitution ,(transform o)))
|
||||
(('if-clause expr then) `(if-clause ,(transform expr) ,(transform then)))
|
||||
(('if-clause expr then else) `(if-clause ,(transform expr) ,(transform then) ,(transform else)))
|
||||
(('elif-part expr then) `(if-clause ,(transform expr) ,(transform then)))
|
||||
(('elif-part expr then else) `(if-clause ,(transform expr) ,(transform then) ,(transform else)))
|
||||
(('then-part o ...) `(begin ,@(map transform o)))
|
||||
(('else-part o ...) `(begin ,@(map transform o)))
|
||||
(('word 'singlequotes) "")
|
||||
(('word o) (transform o))
|
||||
(('word o ...) `(string-append ,@(map transform o)))
|
||||
|
||||
(('function name body)
|
||||
`(function ,name (lambda ( . args) ,(transform body))))
|
||||
|
||||
(('brace-group o) `(brace-group ,(transform o)))
|
||||
(('file-name o) `(file-name ,(transform o)))
|
||||
|
||||
('doublequotes "")
|
||||
|
||||
(_ ast)))
|
||||
|
||||
|
||||
(define (remove-line-comments s)
|
||||
(string-join (map
|
||||
(lambda (s)
|
||||
(let ((n (string-index s #\#)))
|
||||
(if (and n (zero? n)) (string-pad-right s (string-length s) #\space 0 n)
|
||||
s)))
|
||||
(string-split s #\newline)) "\n"))
|
||||
|
||||
(define (parse-string string)
|
||||
(let* ((pt ((compose parse- remove-line-comments) string))
|
||||
(foo (when (> %debug-level 1) (display "tree:\n") (pretty-print pt)))
|
||||
(flat (flatten pt))
|
||||
(foo (when (> %debug-level 0) (display "flat:\n") (pretty-print flat)))
|
||||
(ast (transform flat))
|
||||
(foo (when (> %debug-level 0) (display "ast:\n") (pretty-print ast))))
|
||||
(cond ((error? ast)
|
||||
(stderr "error:") (pretty-print ast (current-error-port)) #f)
|
||||
((eq? ast 'script)
|
||||
#t)
|
||||
(else ast))))
|
||||
|
||||
(define (parse port)
|
||||
(parse-string (read-string port)))
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
;;;; peg.scm --- Parsing Expression Grammar (PEG) parser generator
|
||||
;;;;
|
||||
;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This library is free software; you can redistribute it and/or
|
||||
;;;; modify it under the terms of the GNU Lesser General Public
|
||||
;;;; License as published by the Free Software Foundation; either
|
||||
;;;; version 3 of the License, or (at your option) any later version.
|
||||
;;;;
|
||||
;;;; This library is distributed in the hope that it will be useful,
|
||||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;;;; Lesser General Public License for more details.
|
||||
;;;;
|
||||
;;;; You should have received a copy of the GNU Lesser General Public
|
||||
;;;; License along with this library; if not, write to the Free Software
|
||||
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;;;;
|
||||
|
||||
(define-module (peg)
|
||||
#:use-module (peg codegen)
|
||||
#:use-module (peg string-peg)
|
||||
;; Note: the most important effect of using string-peg is not whatever
|
||||
;; functions it exports, but the fact that it adds a new handler to
|
||||
;; peg-sexp-compile.
|
||||
#:use-module (peg simplify-tree)
|
||||
#:use-module (peg using-parsers)
|
||||
#:use-module (peg cache)
|
||||
#:re-export (define-peg-pattern
|
||||
define-peg-string-patterns
|
||||
match-pattern
|
||||
search-for-pattern
|
||||
compile-peg-pattern
|
||||
keyword-flatten
|
||||
context-flatten
|
||||
peg:start
|
||||
peg:end
|
||||
peg:string
|
||||
peg:tree
|
||||
peg:substring
|
||||
peg-record?))
|
|
@ -0,0 +1,45 @@
|
|||
;;;; cache.scm --- cache the results of parsing
|
||||
;;;;
|
||||
;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This library is free software; you can redistribute it and/or
|
||||
;;;; modify it under the terms of the GNU Lesser General Public
|
||||
;;;; License as published by the Free Software Foundation; either
|
||||
;;;; version 3 of the License, or (at your option) any later version.
|
||||
;;;;
|
||||
;;;; This library is distributed in the hope that it will be useful,
|
||||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;;;; Lesser General Public License for more details.
|
||||
;;;;
|
||||
;;;; You should have received a copy of the GNU Lesser General Public
|
||||
;;;; License along with this library; if not, write to the Free Software
|
||||
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;;;;
|
||||
|
||||
(define-module (peg cache)
|
||||
#:export (cg-cached-parser))
|
||||
|
||||
;; The results of parsing using a nonterminal are cached. Think of it like a
|
||||
;; hash with no conflict resolution. Process for deciding on the cache size
|
||||
;; wasn't very scientific; just ran the benchmarks and stopped a little after
|
||||
;; the point of diminishing returns on my box.
|
||||
(define *cache-size* 512)
|
||||
|
||||
(define (make-cache)
|
||||
(make-vector *cache-size* #f))
|
||||
|
||||
;; given a syntax object which is a parser function, returns syntax
|
||||
;; which, if evaluated, will become a parser function that uses a cache.
|
||||
(define (cg-cached-parser parser)
|
||||
#`(let ((cache (make-cache)))
|
||||
(lambda (str strlen at)
|
||||
(let* ((vref (vector-ref cache (modulo at *cache-size*))))
|
||||
;; Check to see whether the value is cached.
|
||||
(if (and vref (eq? (car vref) str) (= (cadr vref) at))
|
||||
(caddr vref);; If it is return it.
|
||||
(let ((fres ;; Else calculate it and cache it.
|
||||
(#,parser str strlen at)))
|
||||
(vector-set! cache (modulo at *cache-size*)
|
||||
(list str at fres))
|
||||
fres))))))
|
|
@ -0,0 +1,390 @@
|
|||
;;;; codegen.scm --- code generation for composable parsers
|
||||
;;;;
|
||||
;;;; Copyright (C) 2011 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This library is free software; you can redistribute it and/or
|
||||
;;;; modify it under the terms of the GNU Lesser General Public
|
||||
;;;; License as published by the Free Software Foundation; either
|
||||
;;;; version 3 of the License, or (at your option) any later version.
|
||||
;;;;
|
||||
;;;; This library is distributed in the hope that it will be useful,
|
||||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;;;; Lesser General Public License for more details.
|
||||
;;;;
|
||||
;;;; You should have received a copy of the GNU Lesser General Public
|
||||
;;;; License along with this library; if not, write to the Free Software
|
||||
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;;;;
|
||||
|
||||
(define-module (peg codegen)
|
||||
#:export (compile-peg-pattern wrap-parser-for-users add-peg-compiler!)
|
||||
#:use-module (ice-9 pretty-print)
|
||||
#:use-module (system base pmatch))
|
||||
|
||||
(define-syntax single?
|
||||
(syntax-rules ()
|
||||
"Return #t if X is a list of one element."
|
||||
((_ x)
|
||||
(pmatch x
|
||||
((_) #t)
|
||||
(else #f)))))
|
||||
|
||||
(define-syntax single-filter
|
||||
(syntax-rules ()
|
||||
"If EXP is a list of one element, return the element. Otherwise
|
||||
return EXP."
|
||||
((_ exp)
|
||||
(pmatch exp
|
||||
((,elt) elt)
|
||||
(,elts elts)))))
|
||||
|
||||
(define-syntax push-not-null!
|
||||
(syntax-rules ()
|
||||
"If OBJ is non-null, push it onto LST, otherwise do nothing."
|
||||
((_ lst obj)
|
||||
(if (not (null? obj))
|
||||
(push! lst obj)))))
|
||||
|
||||
(define-syntax push!
|
||||
(syntax-rules ()
|
||||
"Push an object onto a list."
|
||||
((_ lst obj)
|
||||
(set! lst (cons obj lst)))))
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;; CODE GENERATORS
|
||||
;; These functions generate scheme code for parsing PEGs.
|
||||
;; Conventions:
|
||||
;; accum: (all name body none)
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Code we generate will have a certain return structure depending on how we're
|
||||
;; accumulating (the ACCUM variable).
|
||||
(define (cg-generic-ret accum name body-uneval at)
|
||||
;; name, body-uneval and at are syntax
|
||||
#`(let ((body #,body-uneval))
|
||||
#,(cond
|
||||
((and (eq? accum 'all) name)
|
||||
#`(list #,at
|
||||
(cond
|
||||
((not (list? body)) (list '#,name body))
|
||||
((null? body) '#,name)
|
||||
((symbol? (car body)) (list '#,name body))
|
||||
(else (cons '#,name body)))))
|
||||
((eq? accum 'name)
|
||||
#`(list #,at '#,name))
|
||||
((eq? accum 'body)
|
||||
#`(list #,at
|
||||
(cond
|
||||
((single? body) (car body))
|
||||
(else body))))
|
||||
((eq? accum 'none)
|
||||
#`(list #,at '()))
|
||||
(else
|
||||
(begin
|
||||
(pretty-print `(cg-generic-ret-error ,accum ,name ,body-uneval ,at))
|
||||
(pretty-print "Defaulting to accum of none.\n")
|
||||
#`(list #,at '()))))))
|
||||
|
||||
;; The short name makes the formatting below much easier to read.
|
||||
(define cggr cg-generic-ret)
|
||||
|
||||
;; Generates code that matches a particular string.
|
||||
;; E.g.: (cg-string syntax "abc" 'body)
|
||||
(define (cg-string pat accum)
|
||||
(let ((plen (string-length pat)))
|
||||
#`(lambda (str len pos)
|
||||
(let ((end (+ pos #,plen)))
|
||||
(and (<= end len)
|
||||
(string= str #,pat pos end)
|
||||
#,(case accum
|
||||
((all) #`(list end (list 'cg-string #,pat)))
|
||||
((name) #`(list end 'cg-string))
|
||||
((body) #`(list end #,pat))
|
||||
((none) #`(list end '()))
|
||||
(else (error "bad accum" accum))))))))
|
||||
|
||||
;; Generates code for matching any character.
|
||||
;; E.g.: (cg-peg-any syntax 'body)
|
||||
(define (cg-peg-any accum)
|
||||
#`(lambda (str len pos)
|
||||
(and (< pos len)
|
||||
#,(case accum
|
||||
((all) #`(list (1+ pos)
|
||||
(list 'cg-peg-any (substring str pos (1+ pos)))))
|
||||
((name) #`(list (1+ pos) 'cg-peg-any))
|
||||
((body) #`(list (1+ pos) (substring str pos (1+ pos))))
|
||||
((none) #`(list (1+ pos) '()))
|
||||
(else (error "bad accum" accum))))))
|
||||
|
||||
;; Generates code for matching a range of characters between start and end.
|
||||
;; E.g.: (cg-range syntax #\a #\z 'body)
|
||||
(define (cg-range pat accum)
|
||||
(syntax-case pat ()
|
||||
((start end)
|
||||
(if (not (and (char? (syntax->datum #'start))
|
||||
(char? (syntax->datum #'end))))
|
||||
(error "range PEG should have characters after it; instead got"
|
||||
#'start #'end))
|
||||
#`(lambda (str len pos)
|
||||
(and (< pos len)
|
||||
(let ((c (string-ref str pos)))
|
||||
(and (char>=? c start)
|
||||
(char<=? c end)
|
||||
#,(case accum
|
||||
((all) #`(list (1+ pos) (list 'cg-range (string c))))
|
||||
((name) #`(list (1+ pos) 'cg-range))
|
||||
((body) #`(list (1+ pos) (string c)))
|
||||
((none) #`(list (1+ pos) '()))
|
||||
(else (error "bad accum" accum))))))))))
|
||||
|
||||
;; Generate code to match a pattern and do nothing with the result
|
||||
(define (cg-ignore pat accum)
|
||||
(syntax-case pat ()
|
||||
((inner)
|
||||
(compile-peg-pattern #'inner 'none))))
|
||||
|
||||
(define (cg-capture pat accum)
|
||||
(syntax-case pat ()
|
||||
((inner)
|
||||
(compile-peg-pattern #'inner 'body))))
|
||||
|
||||
;; Filters the accum argument to compile-peg-pattern for buildings like string
|
||||
;; literals (since we don't want to tag them with their name if we're doing an
|
||||
;; "all" accum).
|
||||
(define (builtin-accum-filter accum)
|
||||
(cond
|
||||
((eq? accum 'all) 'body)
|
||||
((eq? accum 'name) 'name)
|
||||
((eq? accum 'body) 'body)
|
||||
((eq? accum 'none) 'none)))
|
||||
(define baf builtin-accum-filter)
|
||||
|
||||
;; Top-level function builder for AND. Reduces to a call to CG-AND-INT.
|
||||
(define (cg-and clauses accum)
|
||||
#`(lambda (str len pos)
|
||||
(let ((body '()))
|
||||
#,(cg-and-int clauses (baf accum) #'str #'len #'pos #'body))))
|
||||
|
||||
;; Internal function builder for AND (calls itself).
|
||||
(define (cg-and-int clauses accum str strlen at body)
|
||||
(syntax-case clauses ()
|
||||
(()
|
||||
(cggr accum 'cg-and #`(reverse #,body) at))
|
||||
((first rest ...)
|
||||
#`(let ((res (#,(compile-peg-pattern #'first accum) #,str #,strlen #,at)))
|
||||
(and res
|
||||
;; update AT and BODY then recurse
|
||||
(let ((newat (car res))
|
||||
(newbody (cadr res)))
|
||||
(set! #,at newat)
|
||||
(push-not-null! #,body (single-filter newbody))
|
||||
#,(cg-and-int #'(rest ...) accum str strlen at body)))))))
|
||||
|
||||
;; Top-level function builder for OR. Reduces to a call to CG-OR-INT.
|
||||
(define (cg-or clauses accum)
|
||||
#`(lambda (str len pos)
|
||||
#,(cg-or-int clauses (baf accum) #'str #'len #'pos)))
|
||||
|
||||
;; Internal function builder for OR (calls itself).
|
||||
(define (cg-or-int clauses accum str strlen at)
|
||||
(syntax-case clauses ()
|
||||
(()
|
||||
#f)
|
||||
((first rest ...)
|
||||
#`(or (#,(compile-peg-pattern #'first accum) #,str #,strlen #,at)
|
||||
#,(cg-or-int #'(rest ...) accum str strlen at)))))
|
||||
|
||||
(define (cg-* args accum)
|
||||
(syntax-case args ()
|
||||
((pat)
|
||||
#`(lambda (str strlen at)
|
||||
(let ((body '()))
|
||||
(let lp ((end at) (count 0))
|
||||
(let* ((match (#,(compile-peg-pattern #'pat (baf accum))
|
||||
str strlen end))
|
||||
(new-end (if match (car match) end))
|
||||
(count (if (> new-end end) (1+ count) count)))
|
||||
(if (> new-end end)
|
||||
(push-not-null! body (single-filter (cadr match))))
|
||||
(if (and (> new-end end)
|
||||
#,#t)
|
||||
(lp new-end count)
|
||||
(let ((success #,#t))
|
||||
#,#`(and success
|
||||
#,(cggr (baf accum) 'cg-body
|
||||
#'(reverse body) #'new-end)))))))))))
|
||||
|
||||
(define (cg-+ args accum)
|
||||
(syntax-case args ()
|
||||
((pat)
|
||||
#`(lambda (str strlen at)
|
||||
(let ((body '()))
|
||||
(let lp ((end at) (count 0))
|
||||
(let* ((match (#,(compile-peg-pattern #'pat (baf accum))
|
||||
str strlen end))
|
||||
(new-end (if match (car match) end))
|
||||
(count (if (> new-end end) (1+ count) count)))
|
||||
(if (> new-end end)
|
||||
(push-not-null! body (single-filter (cadr match))))
|
||||
(if (and (> new-end end)
|
||||
#,#t)
|
||||
(lp new-end count)
|
||||
(let ((success #,#'(>= count 1)))
|
||||
#,#`(and success
|
||||
#,(cggr (baf accum) 'cg-body
|
||||
#'(reverse body) #'new-end)))))))))))
|
||||
|
||||
(define (cg-? args accum)
|
||||
(syntax-case args ()
|
||||
((pat)
|
||||
#`(lambda (str strlen at)
|
||||
(let ((body '()))
|
||||
(let lp ((end at) (count 0))
|
||||
(let* ((match (#,(compile-peg-pattern #'pat (baf accum))
|
||||
str strlen end))
|
||||
(new-end (if match (car match) end))
|
||||
(count (if (> new-end end) (1+ count) count)))
|
||||
(if (> new-end end)
|
||||
(push-not-null! body (single-filter (cadr match))))
|
||||
(if (and (> new-end end)
|
||||
#,#'(< count 1))
|
||||
(lp new-end count)
|
||||
(let ((success #,#t))
|
||||
#,#`(and success
|
||||
#,(cggr (baf accum) 'cg-body
|
||||
#'(reverse body) #'new-end)))))))))))
|
||||
|
||||
(define (cg-followed-by args accum)
|
||||
(syntax-case args ()
|
||||
((pat)
|
||||
#`(lambda (str strlen at)
|
||||
(let ((body '()))
|
||||
(let lp ((end at) (count 0))
|
||||
(let* ((match (#,(compile-peg-pattern #'pat (baf accum))
|
||||
str strlen end))
|
||||
(new-end (if match (car match) end))
|
||||
(count (if (> new-end end) (1+ count) count)))
|
||||
(if (> new-end end)
|
||||
(push-not-null! body (single-filter (cadr match))))
|
||||
(if (and (> new-end end)
|
||||
#,#'(< count 1))
|
||||
(lp new-end count)
|
||||
(let ((success #,#'(= count 1)))
|
||||
#,#`(and success
|
||||
#,(cggr (baf accum) 'cg-body #''() #'at)))))))))))
|
||||
|
||||
(define (cg-not-followed-by args accum)
|
||||
(syntax-case args ()
|
||||
((pat)
|
||||
#`(lambda (str strlen at)
|
||||
(let ((body '()))
|
||||
(let lp ((end at) (count 0))
|
||||
(let* ((match (#,(compile-peg-pattern #'pat (baf accum))
|
||||
str strlen end))
|
||||
(new-end (if match (car match) end))
|
||||
(count (if (> new-end end) (1+ count) count)))
|
||||
(if (> new-end end)
|
||||
(push-not-null! body (single-filter (cadr match))))
|
||||
(if (and (> new-end end)
|
||||
#,#'(< count 1))
|
||||
(lp new-end count)
|
||||
(let ((success #,#'(= count 1)))
|
||||
#,#`(if success
|
||||
#f
|
||||
#,(cggr (baf accum) 'cg-body #''() #'at)))))))))))
|
||||
|
||||
|
||||
(define (cg-expect-int clauses accum str strlen at)
|
||||
(syntax-case clauses ()
|
||||
((pat)
|
||||
#`(or (#,(compile-peg-pattern #'pat accum) #,str #,strlen #,at)
|
||||
(throw 'syntax-error (list #,at (syntax->datum #'pat))))))) ;;TODO throw partial match
|
||||
|
||||
(define (cg-expect clauses accum)
|
||||
#`(lambda (str len pos)
|
||||
#,(cg-expect-int clauses ((@@ (ice-9 peg codegen) baf) accum) #'str #'len #'pos)))
|
||||
|
||||
;; Association list of functions to handle different expressions as PEGs
|
||||
(define peg-compiler-alist '())
|
||||
|
||||
(define (add-peg-compiler! symbol function)
|
||||
(set! peg-compiler-alist
|
||||
(assq-set! peg-compiler-alist symbol function)))
|
||||
|
||||
(add-peg-compiler! 'range cg-range)
|
||||
(add-peg-compiler! 'ignore cg-ignore)
|
||||
(add-peg-compiler! 'capture cg-capture)
|
||||
(add-peg-compiler! 'and cg-and)
|
||||
(add-peg-compiler! 'or cg-or)
|
||||
(add-peg-compiler! '* cg-*)
|
||||
(add-peg-compiler! '+ cg-+)
|
||||
(add-peg-compiler! '? cg-?)
|
||||
(add-peg-compiler! 'followed-by cg-followed-by)
|
||||
(add-peg-compiler! 'not-followed-by cg-not-followed-by)
|
||||
(add-peg-compiler! 'expect cg-expect)
|
||||
|
||||
;; Takes an arbitrary expressions and accumulation variable, then parses it.
|
||||
;; E.g.: (compile-peg-pattern syntax '(and "abc" (or "-" (range #\a #\z))) 'all)
|
||||
(define (compile-peg-pattern pat accum)
|
||||
(syntax-case pat (peg-any)
|
||||
(peg-any
|
||||
(cg-peg-any (baf accum)))
|
||||
(sym (identifier? #'sym) ;; nonterminal
|
||||
#'sym)
|
||||
(str (string? (syntax->datum #'str)) ;; literal string
|
||||
(cg-string (syntax->datum #'str) (baf accum)))
|
||||
((name . args) (let* ((nm (syntax->datum #'name))
|
||||
(entry (assq-ref peg-compiler-alist nm)))
|
||||
(if entry
|
||||
(entry #'args accum)
|
||||
(error "Bad peg form" nm #'args
|
||||
"Not one of" (map car peg-compiler-alist)))))))
|
||||
|
||||
;; Packages the results of a parser
|
||||
|
||||
(define indent 0)
|
||||
|
||||
(define (trace? symbol)
|
||||
(and #f (not (memq symbol '()))))
|
||||
|
||||
(define (wrap-parser-for-users for-syntax parser accumsym s-syn)
|
||||
#`(lambda (str strlen at)
|
||||
(when (trace? '#,s-syn)
|
||||
(format (current-error-port) "~a~a\n"
|
||||
(make-string indent #\space)
|
||||
'#,s-syn))
|
||||
(set! indent (+ indent 4))
|
||||
(let ((res (#,parser str strlen at)))
|
||||
(set! indent (- indent 4))
|
||||
;; Try to match the nonterminal.
|
||||
(let ((pos (or (and res (car res)) 0)))
|
||||
(when (and (trace? '#,s-syn) (< at pos))
|
||||
(format (current-error-port) "~a~a := ~s\tnext: ~s\n"
|
||||
(make-string indent #\space)
|
||||
'#,s-syn
|
||||
(substring str at pos)
|
||||
(substring str pos (min strlen (+ pos 10))))))
|
||||
(if res
|
||||
;; If we matched, do some post-processing to figure out
|
||||
;; what data to propagate upward.
|
||||
(let ((at (car res))
|
||||
(body (cadr res)))
|
||||
#,(cond
|
||||
((eq? accumsym 'name)
|
||||
#`(list at '#,s-syn))
|
||||
((eq? accumsym 'all)
|
||||
#`(list (car res)
|
||||
(cond
|
||||
((not (list? body))
|
||||
(list '#,s-syn body))
|
||||
((null? body) '#,s-syn)
|
||||
((symbol? (car body))
|
||||
(list '#,s-syn body))
|
||||
(else (cons '#,s-syn body)))))
|
||||
((eq? accumsym 'none) #`(list (car res) '()))
|
||||
(else #`(begin res))))
|
||||
;; If we didn't match, just return false.
|
||||
#f))))
|
|
@ -0,0 +1,97 @@
|
|||
;;;; simplify-tree.scm --- utility functions for the PEG parser
|
||||
;;;;
|
||||
;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This library is free software; you can redistribute it and/or
|
||||
;;;; modify it under the terms of the GNU Lesser General Public
|
||||
;;;; License as published by the Free Software Foundation; either
|
||||
;;;; version 3 of the License, or (at your option) any later version.
|
||||
;;;;
|
||||
;;;; This library is distributed in the hope that it will be useful,
|
||||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;;;; Lesser General Public License for more details.
|
||||
;;;;
|
||||
;;;; You should have received a copy of the GNU Lesser General Public
|
||||
;;;; License along with this library; if not, write to the Free Software
|
||||
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;;;;
|
||||
|
||||
(define-module (peg simplify-tree)
|
||||
#:export (keyword-flatten context-flatten string-collapse)
|
||||
#:use-module (system base pmatch))
|
||||
|
||||
(define-syntax single?
|
||||
(syntax-rules ()
|
||||
"Return #t if X is a list of one element."
|
||||
((_ x)
|
||||
(pmatch x
|
||||
((_) #t)
|
||||
(else #f)))))
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;; POST-PROCESSING FUNCTIONS (TO CANONICALIZE MATCH TREES)
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Is everything in LST true?
|
||||
(define (andlst lst)
|
||||
(or (null? lst)
|
||||
(and (car lst) (andlst (cdr lst)))))
|
||||
|
||||
;; Is LST a list of strings?
|
||||
(define (string-list? lst)
|
||||
(and (list? lst) (not (null? lst))
|
||||
(andlst (map string? lst))))
|
||||
|
||||
;; Groups all strings that are next to each other in LST. Used in
|
||||
;; STRING-COLLAPSE.
|
||||
(define (string-group lst)
|
||||
(if (not (list? lst))
|
||||
lst
|
||||
(if (null? lst)
|
||||
'()
|
||||
(let ((next (string-group (cdr lst))))
|
||||
(if (not (string? (car lst)))
|
||||
(cons (car lst) next)
|
||||
(if (and (not (null? next))
|
||||
(list? (car next))
|
||||
(string? (caar next)))
|
||||
(cons (cons (car lst) (car next)) (cdr next))
|
||||
(cons (list (car lst)) next)))))))
|
||||
|
||||
|
||||
;; Collapses all the string in LST.
|
||||
;; ("a" "b" (c d) "e" "f") -> ("ab" (c d) "ef")
|
||||
(define (string-collapse lst)
|
||||
(if (list? lst)
|
||||
(let ((res (map (lambda (x) (if (string-list? x)
|
||||
(apply string-append x)
|
||||
x))
|
||||
(string-group (map string-collapse lst)))))
|
||||
(if (single? res) (car res) res))
|
||||
lst))
|
||||
|
||||
;; If LST is an atom, return (list LST), else return LST.
|
||||
(define (mklst lst)
|
||||
(if (not (list? lst)) (list lst) lst))
|
||||
|
||||
;; Takes a list and "flattens" it, using the predicate TST to know when to stop
|
||||
;; instead of terminating on atoms (see tutorial).
|
||||
(define (context-flatten tst lst)
|
||||
(if (or (not (list? lst)) (null? lst))
|
||||
lst
|
||||
(if (tst lst)
|
||||
(list lst)
|
||||
(apply append
|
||||
(map (lambda (x) (mklst (context-flatten tst x)))
|
||||
lst)))))
|
||||
|
||||
;; Takes a list and "flattens" it, using the list of keywords KEYWORD-LST to
|
||||
;; know when to stop at (see tutorial).
|
||||
(define (keyword-flatten keyword-lst lst)
|
||||
(context-flatten
|
||||
(lambda (x)
|
||||
(if (or (not (list? x)) (null? x))
|
||||
#t
|
||||
(member (car x) keyword-lst)))
|
||||
lst))
|
|
@ -0,0 +1,280 @@
|
|||
;;;; string-peg.scm --- representing PEG grammars as strings
|
||||
;;;;
|
||||
;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This library is free software; you can redistribute it and/or
|
||||
;;;; modify it under the terms of the GNU Lesser General Public
|
||||
;;;; License as published by the Free Software Foundation; either
|
||||
;;;; version 3 of the License, or (at your option) any later version.
|
||||
;;;;
|
||||
;;;; This library is distributed in the hope that it will be useful,
|
||||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;;;; Lesser General Public License for more details.
|
||||
;;;;
|
||||
;;;; You should have received a copy of the GNU Lesser General Public
|
||||
;;;; License along with this library; if not, write to the Free Software
|
||||
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;;;;
|
||||
|
||||
(define-module (peg string-peg)
|
||||
#:export (peg-as-peg
|
||||
define-peg-string-patterns
|
||||
peg-grammar)
|
||||
#:use-module (peg using-parsers)
|
||||
#:use-module (peg codegen)
|
||||
#:use-module (peg simplify-tree))
|
||||
|
||||
;; Gets the left-hand depth of a list.
|
||||
(define (depth lst)
|
||||
(if (or (not (list? lst)) (null? lst))
|
||||
0
|
||||
(+ 1 (depth (car lst)))))
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;; Parse string PEGs using sexp PEGs.
|
||||
;; See the variable PEG-AS-PEG for an easier-to-read syntax.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Grammar for PEGs in PEG grammar.
|
||||
(define peg-as-peg
|
||||
"grammar <-- (nonterminal ('<--' / '<-' / '<') sp pattern)+
|
||||
pattern <-- alternative (SLASH sp alternative)*
|
||||
alternative <-- ([!&]? sp suffix)+
|
||||
suffix <-- primary ([*+?] sp)*
|
||||
primary <-- secondary ([#] sp)?
|
||||
secondary <-- '(' sp pattern ')' sp / '.' sp / literal / charclass / nonterminal !'<'
|
||||
literal <-- ['] (!['] .)* ['] sp
|
||||
charclass <-- LB (!']' (CCrange / CCsingle))* RB sp
|
||||
CCrange <-- . '-' .
|
||||
CCsingle <-- .
|
||||
nonterminal <-- [a-zA-Z0-9-]+ sp
|
||||
sp < [ \t\n]*
|
||||
SLASH < '/'
|
||||
LB < '['
|
||||
RB < ']'
|
||||
")
|
||||
|
||||
(define-syntax define-sexp-parser
|
||||
(lambda (x)
|
||||
(syntax-case x ()
|
||||
((_ sym accum pat)
|
||||
(let* ((matchf (compile-peg-pattern #'pat (syntax->datum #'accum)))
|
||||
(accumsym (syntax->datum #'accum))
|
||||
(syn (wrap-parser-for-users x matchf accumsym #'sym)))
|
||||
#`(define sym #,syn))))))
|
||||
|
||||
(define-sexp-parser peg-grammar all
|
||||
(+ (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern)))
|
||||
(define-sexp-parser peg-pattern all
|
||||
(and peg-alternative
|
||||
(* (and (ignore "/") peg-sp peg-alternative))))
|
||||
(define-sexp-parser peg-alternative all
|
||||
(+ (and (? (or "!" "&")) peg-sp peg-suffix)))
|
||||
(define-sexp-parser peg-suffix all
|
||||
(and peg-primary (* (and (or "*" "+" "?") peg-sp))))
|
||||
(define-sexp-parser peg-primary all
|
||||
(and peg-secondary (? (and "#" peg-sp))))
|
||||
(define-sexp-parser peg-secondary all
|
||||
(or (and "(" peg-sp peg-pattern ")" peg-sp)
|
||||
(and "." peg-sp)
|
||||
peg-literal
|
||||
peg-charclass
|
||||
(and peg-nonterminal (not-followed-by "<"))))
|
||||
(define-sexp-parser peg-literal all
|
||||
(and "'" (* (and (not-followed-by "'") peg-any)) "'" peg-sp))
|
||||
(define-sexp-parser peg-charclass all
|
||||
(and (ignore "[")
|
||||
(* (and (not-followed-by "]")
|
||||
(or charclass-range charclass-single)))
|
||||
(ignore "]")
|
||||
peg-sp))
|
||||
(define-sexp-parser charclass-range all (and peg-any "-" peg-any))
|
||||
(define-sexp-parser charclass-single all peg-any)
|
||||
(define-sexp-parser peg-nonterminal all
|
||||
(and (+ (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-")) peg-sp))
|
||||
(define-sexp-parser peg-sp none
|
||||
(* (or " " "\t" "\n")))
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;; PARSE STRING PEGS
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Takes a string representing a PEG grammar and returns syntax that
|
||||
;; will define all of the nonterminals in the grammar with equivalent
|
||||
;; PEG s-expressions.
|
||||
(define (peg-parser str for-syntax)
|
||||
(let ((parsed (match-pattern peg-grammar str)))
|
||||
(if (not parsed)
|
||||
(begin
|
||||
;; (display "Invalid PEG grammar!\n")
|
||||
#f)
|
||||
(let ((lst (peg:tree parsed)))
|
||||
(cond
|
||||
((or (not (list? lst)) (null? lst))
|
||||
lst)
|
||||
((eq? (car lst) 'peg-grammar)
|
||||
#`(begin
|
||||
#,@(map (lambda (x) (peg-nonterm->defn x for-syntax))
|
||||
(context-flatten (lambda (lst) (<= (depth lst) 2))
|
||||
(cdr lst))))))))))
|
||||
|
||||
;; Macro wrapper for PEG-PARSER. Parses PEG grammars expressed as strings and
|
||||
;; defines all the appropriate nonterminals.
|
||||
(define-syntax define-peg-string-patterns
|
||||
(lambda (x)
|
||||
(syntax-case x ()
|
||||
((_ str)
|
||||
(peg-parser (syntax->datum #'str) x)))))
|
||||
|
||||
;; lst has format (nonterm grabber pattern), where
|
||||
;; nonterm is a symbol (the name of the nonterminal),
|
||||
;; grabber is a string (either "<", "<-" or "<--"), and
|
||||
;; pattern is the parse of a PEG pattern expressed as as string.
|
||||
(define (peg-nonterm->defn lst for-syntax)
|
||||
(let* ((nonterm (car lst))
|
||||
(grabber (cadr lst))
|
||||
(pattern (caddr lst))
|
||||
(nonterm-name (datum->syntax for-syntax
|
||||
(string->symbol (cadr nonterm)))))
|
||||
#`(define-peg-pattern #,nonterm-name
|
||||
#,(cond
|
||||
((string=? grabber "<--") (datum->syntax for-syntax 'all))
|
||||
((string=? grabber "<-") (datum->syntax for-syntax 'body))
|
||||
(else (datum->syntax for-syntax 'none)))
|
||||
#,(compressor (peg-pattern->defn pattern for-syntax) for-syntax))))
|
||||
|
||||
;; lst has format ('peg-pattern ...).
|
||||
;; After the context-flatten, (cdr lst) has format
|
||||
;; (('peg-alternative ...) ...), where the outer list is a collection
|
||||
;; of elements from a '/' alternative.
|
||||
(define (peg-pattern->defn lst for-syntax)
|
||||
#`(or #,@(map (lambda (x) (peg-alternative->defn x for-syntax))
|
||||
(context-flatten (lambda (x) (eq? (car x) 'peg-alternative))
|
||||
(cdr lst)))))
|
||||
|
||||
;; lst has format ('peg-alternative ...).
|
||||
;; After the context-flatten, (cdr lst) has the format
|
||||
;; (item ...), where each item has format either ("!" ...), ("&" ...),
|
||||
;; or ('peg-suffix ...).
|
||||
(define (peg-alternative->defn lst for-syntax)
|
||||
#`(and #,@(map (lambda (x) (peg-body->defn x for-syntax))
|
||||
(context-flatten (lambda (x) (or (string? (car x))
|
||||
(eq? (car x) 'peg-suffix)))
|
||||
(cdr lst)))))
|
||||
|
||||
;; lst has the format either
|
||||
;; ("!" ('peg-suffix ...)), ("&" ('peg-suffix ...)), or
|
||||
;; ('peg-suffix ...).
|
||||
(define (peg-body->defn lst for-syntax)
|
||||
(cond
|
||||
((equal? (car lst) "&")
|
||||
#`(followed-by #,(peg-suffix->defn (cadr lst) for-syntax)))
|
||||
((equal? (car lst) "!")
|
||||
#`(not-followed-by #,(peg-suffix->defn (cadr lst) for-syntax)))
|
||||
((eq? (car lst) 'peg-suffix)
|
||||
(peg-suffix->defn lst for-syntax))
|
||||
(else `(peg-parse-body-fail ,lst))))
|
||||
|
||||
;; lst has format ('peg-suffix <peg-primary> (? (/ "*" "?" "+")))
|
||||
(define (peg-suffix->defn lst for-syntax)
|
||||
(let ((inner-defn (peg-primary->defn (cadr lst) for-syntax)))
|
||||
(cond
|
||||
((null? (cddr lst))
|
||||
inner-defn)
|
||||
((equal? (caddr lst) "*")
|
||||
#`(* #,inner-defn))
|
||||
((equal? (caddr lst) "?")
|
||||
#`(? #,inner-defn))
|
||||
((equal? (caddr lst) "+")
|
||||
#`(+ #,inner-defn)))))
|
||||
|
||||
;; Parse a primary.
|
||||
(define (peg-primary->defn lst for-syntax)
|
||||
(let ((inner-defn (peg-secondary->defn (cadr lst) for-syntax)))
|
||||
(if (and (pair? (cddr lst)) (equal? (caddr lst) "#")) #`(expect #,inner-defn)
|
||||
inner-defn)))
|
||||
|
||||
(define (peg-secondary->defn lst for-syntax)
|
||||
(let ((el (cadr lst)))
|
||||
(cond
|
||||
((list? el)
|
||||
(cond
|
||||
((eq? (car el) 'peg-literal)
|
||||
(peg-literal->defn el for-syntax))
|
||||
((eq? (car el) 'peg-charclass)
|
||||
(peg-charclass->defn el for-syntax))
|
||||
((eq? (car el) 'peg-nonterminal)
|
||||
(datum->syntax for-syntax (string->symbol (cadr el))))))
|
||||
((string? el)
|
||||
(cond
|
||||
((equal? el "(")
|
||||
(peg-pattern->defn (caddr lst) for-syntax))
|
||||
((equal? el ".")
|
||||
(datum->syntax for-syntax 'peg-any))
|
||||
(else (datum->syntax for-syntax
|
||||
`(peg-parse-any unknown-string ,lst)))))
|
||||
(else (datum->syntax for-syntax
|
||||
`(peg-parse-any unknown-el ,lst))))))
|
||||
|
||||
;; Trims characters off the front and end of STR.
|
||||
;; (trim-1chars "'ab'") -> "ab"
|
||||
(define (trim-1chars str) (substring str 1 (- (string-length str) 1)))
|
||||
|
||||
;; Parses a literal.
|
||||
(define (peg-literal->defn lst for-syntax)
|
||||
(datum->syntax for-syntax (trim-1chars (cadr lst))))
|
||||
|
||||
;; Parses a charclass.
|
||||
(define (peg-charclass->defn lst for-syntax)
|
||||
#`(or
|
||||
#,@(map
|
||||
(lambda (cc)
|
||||
(cond
|
||||
((eq? (car cc) 'charclass-range)
|
||||
#`(range #,(datum->syntax
|
||||
for-syntax
|
||||
(string-ref (cadr cc) 0))
|
||||
#,(datum->syntax
|
||||
for-syntax
|
||||
(string-ref (cadr cc) 2))))
|
||||
((eq? (car cc) 'charclass-single)
|
||||
(datum->syntax for-syntax (cadr cc)))))
|
||||
(context-flatten
|
||||
(lambda (x) (or (eq? (car x) 'charclass-range)
|
||||
(eq? (car x) 'charclass-single)))
|
||||
(cdr lst)))))
|
||||
|
||||
;; Compresses a list to save the optimizer work.
|
||||
;; e.g. (or (and a)) -> a
|
||||
(define (compressor-core lst)
|
||||
(if (or (not (list? lst)) (null? lst))
|
||||
lst
|
||||
(cond
|
||||
((and (or (eq? (car lst) 'or) (eq? (car lst) 'and))
|
||||
(null? (cddr lst)))
|
||||
(compressor-core (cadr lst)))
|
||||
((and (eq? (car lst) 'body)
|
||||
(eq? (cadr lst) 'lit)
|
||||
(eq? (cadddr lst) 1))
|
||||
(compressor-core (caddr lst)))
|
||||
(else (map compressor-core lst)))))
|
||||
|
||||
(define (compressor syn for-syntax)
|
||||
(datum->syntax for-syntax
|
||||
(compressor-core (syntax->datum syn))))
|
||||
|
||||
;; Builds a lambda-expressions for the pattern STR using accum.
|
||||
(define (peg-string-compile args accum)
|
||||
(syntax-case args ()
|
||||
((str-stx) (string? (syntax->datum #'str-stx))
|
||||
(let ((string (syntax->datum #'str-stx)))
|
||||
(compile-peg-pattern
|
||||
(compressor
|
||||
(peg-pattern->defn
|
||||
(peg:tree (match-pattern peg-pattern string)) #'str-stx)
|
||||
#'str-stx)
|
||||
(if (eq? accum 'all) 'body accum))))
|
||||
(else (error "Bad embedded PEG string" args))))
|
||||
|
||||
(add-peg-compiler! 'peg peg-string-compile)
|
|
@ -0,0 +1,116 @@
|
|||
;;;; using-parsers.scm --- utilities to make using parsers easier
|
||||
;;;;
|
||||
;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This library is free software; you can redistribute it and/or
|
||||
;;;; modify it under the terms of the GNU Lesser General Public
|
||||
;;;; License as published by the Free Software Foundation; either
|
||||
;;;; version 3 of the License, or (at your option) any later version.
|
||||
;;;;
|
||||
;;;; This library is distributed in the hope that it will be useful,
|
||||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;;;; Lesser General Public License for more details.
|
||||
;;;;
|
||||
;;;; You should have received a copy of the GNU Lesser General Public
|
||||
;;;; License along with this library; if not, write to the Free Software
|
||||
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;;;;
|
||||
|
||||
(define-module (peg using-parsers)
|
||||
#:use-module (peg simplify-tree)
|
||||
#:use-module (peg codegen)
|
||||
#:use-module (peg cache)
|
||||
#:export (match-pattern define-peg-pattern search-for-pattern
|
||||
prec make-prec peg:start peg:end peg:string
|
||||
peg:tree peg:substring peg-record?))
|
||||
|
||||
;;;
|
||||
;;; Helper Macros
|
||||
;;;
|
||||
|
||||
(define-syntax until
|
||||
(syntax-rules ()
|
||||
"Evaluate TEST. If it is true, return its value. Otherwise,
|
||||
execute the STMTs and try again."
|
||||
((_ test stmt stmt* ...)
|
||||
(let lp ()
|
||||
(or test
|
||||
(begin stmt stmt* ... (lp)))))))
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;; FOR DEFINING AND USING NONTERMINALS
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Parses STRING using NONTERM
|
||||
(define (match-pattern nonterm string)
|
||||
;; We copy the string before using it because it might have been modified
|
||||
;; in-place since the last time it was parsed, which would invalidate the
|
||||
;; cache. Guile uses copy-on-write for strings, so this is fast.
|
||||
(let ((res (nonterm (string-copy string) (string-length string) 0)))
|
||||
(if (not res)
|
||||
#f
|
||||
(make-prec 0 (car res) string (string-collapse (cadr res))))))
|
||||
|
||||
;; Defines a new nonterminal symbol accumulating with ACCUM.
|
||||
(define-syntax define-peg-pattern
|
||||
(lambda (x)
|
||||
(syntax-case x ()
|
||||
((_ sym accum pat)
|
||||
(let ((matchf (compile-peg-pattern #'pat (syntax->datum #'accum)))
|
||||
(accumsym (syntax->datum #'accum)))
|
||||
;; CODE is the code to parse the string if the result isn't cached.
|
||||
(let ((syn (wrap-parser-for-users x matchf accumsym #'sym)))
|
||||
#`(define sym #,(cg-cached-parser syn))))))))
|
||||
|
||||
(define (peg-like->peg pat)
|
||||
(syntax-case pat ()
|
||||
(str (string? (syntax->datum #'str)) #'(peg str))
|
||||
(else pat)))
|
||||
|
||||
;; Searches through STRING for something that parses to PEG-MATCHER. Think
|
||||
;; regexp search.
|
||||
(define-syntax search-for-pattern
|
||||
(lambda (x)
|
||||
(syntax-case x ()
|
||||
((_ pattern string-uncopied)
|
||||
(let ((pmsym (syntax->datum #'pattern)))
|
||||
(let ((matcher (compile-peg-pattern (peg-like->peg #'pattern) 'body)))
|
||||
;; We copy the string before using it because it might have been
|
||||
;; modified in-place since the last time it was parsed, which would
|
||||
;; invalidate the cache. Guile uses copy-on-write for strings, so
|
||||
;; this is fast.
|
||||
#`(let ((string (string-copy string-uncopied))
|
||||
(strlen (string-length string-uncopied))
|
||||
(at 0))
|
||||
(let ((ret (until (or (>= at strlen)
|
||||
(#,matcher string strlen at))
|
||||
(set! at (+ at 1)))))
|
||||
(if (eq? ret #t) ;; (>= at strlen) succeeded
|
||||
#f
|
||||
(let ((end (car ret))
|
||||
(match (cadr ret)))
|
||||
(make-prec
|
||||
at end string
|
||||
(string-collapse match))))))))))))
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;; PMATCH STRUCTURE MUNGING
|
||||
;; Pretty self-explanatory.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define prec
|
||||
(make-record-type "peg" '(start end string tree)))
|
||||
(define make-prec
|
||||
(record-constructor prec '(start end string tree)))
|
||||
(define (peg:start pm)
|
||||
(if pm ((record-accessor prec 'start) pm) #f))
|
||||
(define (peg:end pm)
|
||||
(if pm ((record-accessor prec 'end) pm) #f))
|
||||
(define (peg:string pm)
|
||||
(if pm ((record-accessor prec 'string) pm) #f))
|
||||
(define (peg:tree pm)
|
||||
(if pm ((record-accessor prec 'tree) pm) #f))
|
||||
(define (peg:substring pm)
|
||||
(if pm (substring (peg:string pm) (peg:start pm) (peg:end pm)) #f))
|
||||
(define peg-record? (record-predicate prec))
|
Loading…
Reference in New Issue