gash/gash/grammar.scm

253 lines
9.2 KiB
Scheme

(define-module (gash grammar)
#:use-module (ice-9 match)
#:use-module (ice-9 pretty-print)
#:use-module (ice-9 rdelim)
#:use-module (srfi srfi-8)
#:use-module (gash gash)
#:use-module (gash peg)
#:use-module (gash peg codegen)
#:export (parse
parse-string))
;; (define-syntax define-unwrapped-sexp-parser
;; (lambda (x)
;; (syntax-case x ()
;; ((_ sym accum pat)
;; (let* ((matchf (compile-peg-pattern #'pat (syntax->datum #'accum))))
;; #`(define sym #,matchf))))))
;; (define-unwrapped-sexp-parser eol none (or "\f" "\n" "\r"))
;; (add-peg-compiler! 'eol eol)
;; (define-unwrapped-sexp-parser ws none (or " " "\t" "\v"))
;; (add-peg-compiler! 'ws ws)
;; (define-unwrapped-sexp-parser line none (and "#" (* (and (not-followed-by eol) peg-any))))
;; (add-peg-compiler! 'line line)
;; (define-unwrapped-sexp-parser skip none (* (or ws eol line)))
;; (add-peg-compiler! 'skip skip)
;; (define (wrap-skip-parser-for-users for-syntax parser accumsym s-syn)
;; (display "wrap\n")
;; #`(lambda (str strlen pos)
;; (when #t
;; (format (current-error-port) "~a ~a : ~s\n"
;; (make-string (- pos (or (string-rindex str #\newline 0 pos) 0)) #\space)
;; '#,s-syn
;; (substring str pos (min (+ pos 40) strlen))))
;; (let* ((res (skip str strlen pos))
;; (pos (or (and res (car res)) pos))
;; (res (#,parser str strlen pos)))
;; ;; Try to match the nonterminal.
;; (if res
;; ;; If we matched, do some post-processing to figure out
;; ;; what data to propagate upward.
;; (let* ((at (car res))
;; (body (cadr res)))
;; #,(cond
;; ((eq? accumsym 'name)
;; #``(,at ,'#,s-syn))
;; ((eq? accumsym 'all)
;; #`(list at
;; (cond
;; ((not (list? body))
;; `(,'#,s-syn ,body))
;; ((null? body) `(,'#,s-syn))
;; ((symbol? (car body))
;; `(,'#,s-syn ,body))
;; (else (cons '#,s-syn body)))))
;; ((eq? accumsym 'none) #``(,at ()))
;; (else #``(,at ,body))))
;; ;; If we didn't match, just return false.
;; #f))))
;; (module-set! (resolve-module '(peg codegen)) 'wrap-parser-for-users wrap-skip-parser-for-users)
(define (parse port)
(parse-string (read-string port)))
(define (parse-string input)
(define io-label "")
(define (io-label-name str len pos)
(let ((at (string-skip str char-alphabetic? pos len)))
(set! io-label (substring str pos at))
(if (< at len) (list at '())
#f)))
(define (io-label-match str len pos)
(if (string-prefix? io-label (substring str pos))
(list (+ pos (string-length io-label)) '())
#f))
(define-peg-pattern io-here-label none io-label-name)
(define-peg-pattern io-here-delim none io-label-match)
(define-peg-pattern io-here-document all
(and (+ (and (not-followed-by io-here-delim)
peg-any))
io-here-delim))
(define-peg-string-patterns
"script <-- ws* compound
ws < sp / eol
sp < '\\\n'? (comment / [ \t\v])
comment < [#] (!eol .)*
eol < [\n\r\f]
compound <-- (term (&rpar / sep#))*
sep <- sp* (amp ws* / semi ws* / eof) / ws+
amp <- '&'
semi < ';'!';'
eof < !.
term <- and / or / pipeline
and <-- pipeline and-op ws* term
or <-- pipeline or-op ws* term
and-op < '&&'
or-op < '||'
pipeline <-- '!'? sp* (command (&sep / &or-op / &and-op / &rpar / eof / pipe#))+
and-or <- '&&' / '||'
exclamation <- '!'
pipe < sp* '|' !'|' ws*
command <-- function-def / compound-command / simple-command
compound-command <- (subshell / brace-group / for-clause / case-clause /
if-clause / while-clause / until-clause) (sp* io-redirect)*
simple-command <- ((io-redirect / nonreserved) sp*)+
io-redirect <-- [0-9]* (io-here / io-file)
io-file <-- io-op ([0-9]+ / word)
io-op <- '<&' / '>&' / '>>' / '>' / '<>'/ '<' / '>|'
io-here <-- io-here-op io-here-label sp* eol io-here-document
io-here-op <- '<<-' / '<<'
function-def <-- name sp* lpar rpar# ws* function-body
name <-- !reserved identifier
function-body <-- brace-group (sp* io-redirect)*
subshell <-- lpar compound rpar#
brace-group <-- lbrace ws* compound rbrace#
case-clause <-- 'case' sp* word sp* 'in'# ws* case-item+ ws* 'esac'#
case-item <-- pattern sp* colon? ws* compound? case-sep?
colon < ':'
case-sep < ';;' ws*
pattern <-- (word (!rpar '|'# / !'|' &rpar))+ rpar#
for-clause <-- 'for' sp+ identifier ws+ ('in' sp+ expression)? sep# do-group
expression <-- command
do-group <-- 'do' ws+ compound 'done'#
if-clause <-- 'if' sp+ compound 'then'# ws+ compound else-part? 'fi'#
else-part <-- 'else' ws+ compound /
'elif' ws+ compound 'then'# ws+ compound else-part?
while-clause <-- 'while' compound do-group
until-clause <-- 'until' compound do-group
reserved < ('case' / 'esac' / 'in' / 'if' / 'fi' / 'then' / 'else' /
'elif' / 'for' / 'done' / 'do' / 'until' / 'while') &ws
nonreserved <- !reserved word
word <-- test / substitution / assignment / number / variable /
delim / literal
test <-- ltest sp+ (word sp+)+ rtest#
ltest < '['
rtest < ']'
literal <- !reserved (!']' ![ \t\v\f\n`'\")};|&\\] .)+
identifier <- [_a-zA-Z][_a-zA-Z0-9]*
dollar < '$'
number <-- [0-9]+
substitution <-- dollar lpar compound rpar# / bt ([\\] bt / !bt .)+ bt#
lpar < '('
rpar < ')'
bt < [`]
assignment <-- name assign word?
assign < '='
variable <-- dollar ('*' / '@' / [0-9] / name /
lbrace name (variable-or / variable-and / variable-word / variable-literal / &rbrace) rbrace)
variable-or <-- min variable-word
variable-and <-- plus variable-word
variable-word <- (variable-regex / substitution / variable / variable-literal)+
variable-regex <-- ('%%' / '%' / '##' / '#' / '^^' / '^' /',,' / ',' / '*' / '@' / '?')+ variable-word
variable-literal <- (!rbrace .)+
min < '-'
plus < '+'
lbrace < '{'
rbrace < '}'
delim <-- singlequotes / doublequotes / substitution
sq < [']
dq < [\"]
singlequotes <- sq (!['] .)* sq#
doublequotes <- dq (substitution / variable / (![\"] .))* dq#")
(catch 'syntax-error
(lambda ()
(let* ((match (match-pattern script input))
(end (peg:end match))
(tree (peg:tree match)))
(when (> %debug-level 0)
(pretty-print tree))
(if (eq? (string-length input) end)
tree
(if match
(begin
(format (current-error-port) "parse error: at offset: ~a\n" end)
(pretty-print tree)
#f)
(begin
(format (current-error-port) "parse error: no match\n")
#f)))))
(lambda (key . args)
(define (line-column input pos)
(let ((length (string-length input)))
(let loop ((lines (string-split input #\newline)) (ln 1) (p 0))
(if (null? lines) (values #f #f input)
(let* ((line (car lines))
(length (string-length line))
(end (+ p length 1))
(last? (null? (cdr lines))))
(if (<= pos end) (values ln (+ (if last? 0 1) (- pos p))
(if last? line
(string-append line "\\n" (cadr lines))))
(loop (cdr lines) (1+ ln) end)))))))
(define (format-peg o)
(match o
(('or l ...) (string-join (map format-peg l) ", or "))
(('and l ...) (string-join (map format-peg l) " "))
((? symbol?) (symbol->string o))
((? string?) o)))
(receive (ln col line) (line-column input (caar args))
(let* ((col (- col 1))
(indent (make-string col #\space)))
(format #t "~a:~a:~a: syntax-error:\n~a\n~a^\n~aexpected: ~a\n"
""
ln col line
indent
indent
(format-peg (cadar args)))
(exit 1))))))