828 lines
24 KiB
Scheme
828 lines
24 KiB
Scheme
;;; Gash -- Guile As SHell
|
|
;;; Copyright © 2018, 2019 Timothy Sample <samplet@ngyro.com>
|
|
;;;
|
|
;;; This file is part of Gash.
|
|
;;;
|
|
;;; Gash is free software: you can redistribute it and/or modify
|
|
;;; it under the terms of the GNU General Public License as published by
|
|
;;; the Free Software Foundation, either version 3 of the License, or
|
|
;;; (at your option) any later version.
|
|
;;;
|
|
;;; Gash is distributed in the hope that it will be useful,
|
|
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
;;; GNU General Public License for more details.
|
|
;;;
|
|
;;; You should have received a copy of the GNU General Public License
|
|
;;; along with Gash. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
(define-module (gash parser)
|
|
#:use-module (gash compat)
|
|
#:use-module (gash compat textual-ports)
|
|
#:use-module (gash lexer)
|
|
#:use-module (ice-9 match)
|
|
#:use-module (srfi srfi-1)
|
|
#:use-module (srfi srfi-11)
|
|
#:use-module (system base lalr)
|
|
#:export (read-sh
|
|
read-sh-all))
|
|
|
|
;;; Commentary:
|
|
;;;
|
|
;;; This module contains the parser for the Shell language.
|
|
;;;
|
|
;;; Code:
|
|
|
|
(define io-redirect-defaults
|
|
'((< . 0) (<& . 0) (> . 1)
|
|
(>& . 1) (>> . 1) (<> . 0)
|
|
(>! . 1) (<< . 0) (<<- . 0)))
|
|
|
|
(define (io-redirect? sexp)
|
|
"Determine if @var{sexp} is an I/O redirect form."
|
|
(match sexp
|
|
((or ('< x y) ('<& x y) ('> x y)
|
|
('>& x y) ('>> x y) ('<> x y)
|
|
('>! x y) ('<< x y) ('<<- x y)) #t)
|
|
(_ #f)))
|
|
|
|
(define (split-assignment word)
|
|
"Split assignment @var{word} into a list where the first element is
|
|
the variable name and the second element is the value expression."
|
|
|
|
(define (assignment-name-and-value str)
|
|
(let* ((index (string-index str #\=))
|
|
(name (substring str 0 index))
|
|
(value (substring str (1+ index))))
|
|
`(,name . ,value)))
|
|
|
|
(match word
|
|
(((? string?) . tail)
|
|
(match (assignment-name-and-value (car word))
|
|
((name . value)
|
|
(cond
|
|
((null? tail) `(,name ,value))
|
|
((string-null? value) (if (null? (cdr tail))
|
|
`(,name ,(car tail))
|
|
`(,name ,tail)))
|
|
(else `(,name ,(cons value tail)))))))
|
|
((? string?)
|
|
(match (assignment-name-and-value word)
|
|
((name . value) `(,name ,value))))))
|
|
|
|
(define (map+fold proc init xs)
|
|
"Apply @var{proc} to each element of @var{xs}, mapping and folding
|
|
at the same time. The procedure @var{proc} must return two values:
|
|
the first is the result for mapping, and the second is the result for
|
|
folding."
|
|
(let loop ((xs xs) (map-acc '()) (fold-acc init))
|
|
(match xs
|
|
((x . rest) (let-values (((map-value fold-value) (proc x fold-acc)))
|
|
(loop rest (cons map-value map-acc) fold-value)))
|
|
(() (values (reverse! map-acc) fold-acc)))))
|
|
|
|
(define (merge-here-docs cmd here-docs)
|
|
"Replace @code{'(<sh-here-end> ...)} forms in @var{cmd} with words
|
|
from the list @var{here-docs}. Returns two values: the modified
|
|
@var{cmd} and the unused strings from @var{here-docs}."
|
|
(match cmd
|
|
(('<sh-with-redirects> redirs . rest)
|
|
(let*-values (((redirs here-docs)
|
|
(let loop ((redirs redirs) (here-docs here-docs) (acc '()))
|
|
(match redirs
|
|
((((or '<< '<<-) fdes ('<sh-here-end> _)) . rest)
|
|
(loop rest (cdr here-docs)
|
|
(cons `(<< ,fdes ,(car here-docs)) acc)))
|
|
((redir . rest)
|
|
(loop rest here-docs
|
|
(cons redir acc)))
|
|
(() (values (reverse! acc) here-docs)))))
|
|
((rest here-docs)
|
|
(map+fold merge-here-docs here-docs rest)))
|
|
(values `(<sh-with-redirects> ,redirs ,@rest) here-docs)))
|
|
((xs ...) (map+fold merge-here-docs here-docs xs))
|
|
(x (values x here-docs))))
|
|
|
|
(define (remove-quotes here-end)
|
|
"Remove quote forms from @var{here-end} and concatenate the result
|
|
into a single field (string). If there are no quote forms in
|
|
@var{here-end}, it is returned as-is. This means that when @code{(eq?
|
|
here-end (remove-quotes here-end))}, then @var{here-end} does not
|
|
contain any quote forms."
|
|
(let loop ((word here-end) (quotes? #f) (acc '()))
|
|
(match word
|
|
(() (if quotes? (string-concatenate-reverse acc) here-end))
|
|
(('<sh-quote> word*) (loop '() #t (cons (remove-quotes word*) acc)))
|
|
((('<sh-quote> word*) . t) (loop t #t (cons (remove-quotes word*) acc)))
|
|
((? string?) (loop '() quotes? (cons word acc)))
|
|
(((? string? h) . t) (loop t quotes? (cons h acc))))))
|
|
|
|
(define (read-here-docs op+ends port)
|
|
"Read a here-document from @var{port} for each operator and here-end
|
|
pair in @var{op+ends}."
|
|
(map (match-lambda
|
|
((op . end)
|
|
(let ((end* (remove-quotes end)))
|
|
(match (get-here-doc (remove-quotes end) port
|
|
#:trim-tabs? (eq? op '<<-)
|
|
#:quoted? (not (eq? end end*)))
|
|
((? lexical-token? token)
|
|
(lexical-token-value token))))))
|
|
op+ends))
|
|
|
|
(define* (make-lexer port #:key (here-docs-hook noop))
|
|
"Make a lexer thunk that reads tokens from @var{port}. If
|
|
@var{here-docs-hook} is set, it will be called for each list of
|
|
here-documents the lexer encounters."
|
|
(define next-tokens '())
|
|
(define here-ends '())
|
|
(lambda ()
|
|
(parameterize ((read-bracketed-command read-sh/bracketed)
|
|
(read-backquoted-command read-sh/backquoted))
|
|
(match next-tokens
|
|
(()
|
|
(let* ((token (get-token port))
|
|
(category (and (lexical-token? token)
|
|
(lexical-token-category token))))
|
|
(match category
|
|
((or 'DLESS 'DLESSDASH)
|
|
(let ((here-end (get-here-end port)))
|
|
(unless (lexical-token? here-end)
|
|
(error "Unexpected EOF."))
|
|
(let ((op (if (eq? category 'DLESS) '<< '<<-))
|
|
(end (lexical-token-value here-end)))
|
|
(set! here-ends (cons `(,op . ,end) here-ends))
|
|
(set! next-tokens `(,here-end)))
|
|
token))
|
|
('NEWLINE
|
|
(match here-ends
|
|
(() token)
|
|
(_ (here-docs-hook (read-here-docs (reverse here-ends) port))
|
|
(set! here-ends '())
|
|
token)))
|
|
(_ token))))
|
|
((next-token . rest)
|
|
(set! next-tokens rest)
|
|
next-token)))))
|
|
|
|
(define* (make-parser #:key (command-hook noop)
|
|
(open-bracket-hook noop)
|
|
(close-bracket-hook noop))
|
|
"Make an LALR parser for the Shell language. The optional hooks are
|
|
all thunks. The @var{command-hook} thunk is called to transform each
|
|
complete command. The @var{open-bracket-hook} thunk is called after
|
|
reducing an opening bracket. The @var{close-bracket-hook} is called
|
|
after reducing a closing bracket. (Note that a @var{open-bracket-hook}
|
|
is also called when reducing case patterns that end with an unbalanced
|
|
closing bracket. This ensures that when parsing valid Shell code,
|
|
@var{open-bracket-hook} and @var{close-bracket-hook} should be called
|
|
the same number of times.)"
|
|
|
|
(define command-list->block
|
|
(match-lambda
|
|
((cmd) cmd)
|
|
((cmds ...) `(<sh-begin> ,@cmds))))
|
|
|
|
(lalr-parser
|
|
|
|
(AND ; '&'
|
|
SEMI ; ';'
|
|
LESS ; '<'
|
|
GREAT ; '>'
|
|
PIPE ; '|'
|
|
LPAREN ; '('
|
|
RPAREN ; ')'
|
|
|
|
AND-IF ; '&&'
|
|
OR-IF ; '||'
|
|
DSEMI ; ';;'
|
|
DLESS ; '<<'
|
|
DGREAT ; '>>'
|
|
LESSAND ; '<&'
|
|
GREATAND ; '>&'
|
|
LESSGREAT ; '<>'
|
|
DLESSDASH ; '<<-'
|
|
CLOBBER ; '>|'
|
|
|
|
If ; 'if'
|
|
Then ; 'then'
|
|
Else ; 'else'
|
|
Elif ; 'elif'
|
|
Fi ; 'fi'
|
|
Do ; 'do'
|
|
Done ; 'done'
|
|
Case ; 'case'
|
|
Esac ; 'esac'
|
|
While ; 'while'
|
|
Until ; 'until'
|
|
For ; 'for'
|
|
Lbrace ; '{'
|
|
Rbrace ; '}'
|
|
Bang ; '!'
|
|
In ; 'in'
|
|
|
|
WORD
|
|
ASSIGNMENT-WORD
|
|
NAME
|
|
NEWLINE
|
|
IO-NUMBER
|
|
HERE-DOC
|
|
HERE-DOC-SEP)
|
|
|
|
(program
|
|
(linebreak complete-commands linebreak)
|
|
: (if (null? (cdr $2)) (car $2) (reverse! $2))
|
|
(linebreak)
|
|
: the-eof-object)
|
|
|
|
(complete-commands
|
|
(complete-commands newline-list complete-command)
|
|
: (begin
|
|
(cons (command-hook $3) $1))
|
|
(complete-command)
|
|
: (begin
|
|
`(,(command-hook $1))))
|
|
|
|
(complete-command
|
|
(list separator-op)
|
|
: (command-list->block
|
|
(match $2
|
|
('AND (reverse! (cons `(<sh-async> ,(car $1)) (cdr $1))))
|
|
('SEMI (reverse! $1))))
|
|
(list)
|
|
: (command-list->block (reverse! $1)))
|
|
|
|
(list
|
|
(list separator-op and-or)
|
|
: (match $2
|
|
('AND (cons* $3 `(<sh-async> ,(car $1)) (cdr $1)))
|
|
('SEMI (cons $3 $1)))
|
|
(and-or)
|
|
: `(,$1))
|
|
|
|
(and-or
|
|
(pipeline)
|
|
: $1
|
|
(and-or AND-IF linebreak pipeline)
|
|
: `(<sh-and> ,$1 ,$4)
|
|
(and-or OR-IF linebreak pipeline)
|
|
: `(<sh-or> ,$1 ,$4))
|
|
|
|
(pipeline
|
|
(pipe-sequence)
|
|
: (if (null? (cdr $1))
|
|
(car $1)
|
|
`(<sh-pipeline> ,@(reverse! $1)))
|
|
(Bang pipe-sequence)
|
|
: `(<sh-not> ,(if (null? (cdr $2))
|
|
(car $2)
|
|
`(<sh-pipeline> ,@(reverse! $2)))))
|
|
|
|
(pipe-sequence
|
|
(command)
|
|
: `(,$1)
|
|
(pipe-sequence PIPE linebreak command)
|
|
: (cons $4 $1))
|
|
|
|
(command
|
|
(simple-command)
|
|
: $1
|
|
(compound-command)
|
|
: $1
|
|
(compound-command redirect-list)
|
|
: `(<sh-with-redirects> ,$2 ,$1)
|
|
(function-definition)
|
|
: $1)
|
|
|
|
(compound-command
|
|
(brace-group)
|
|
: (command-list->block $1)
|
|
(subshell)
|
|
: $1
|
|
(for-clause)
|
|
: $1
|
|
(case-clause)
|
|
: $1
|
|
(if-clause)
|
|
: $1
|
|
(while-clause)
|
|
: $1
|
|
(until-clause)
|
|
: $1)
|
|
|
|
(subshell
|
|
(LPAREN! compound-list RPAREN!)
|
|
: `(<sh-subshell> ,@$2))
|
|
|
|
(compound-list
|
|
(linebreak term)
|
|
: (reverse! $2)
|
|
(linebreak term separator)
|
|
: (reverse! (match $3
|
|
('AND (cons `(<sh-async> ,(car $2)) (cdr $2)))
|
|
((or 'SEMI 'NEWLINE) $2))))
|
|
|
|
(term
|
|
(term separator and-or)
|
|
: (match $2
|
|
('AND (cons* $3 `(<sh-async> ,(car $1)) (cdr $1)))
|
|
((or 'SEMI 'NEWLINE) (cons $3 $1)))
|
|
(and-or)
|
|
: `(,$1))
|
|
|
|
(for-clause
|
|
(For name do-group)
|
|
: `(<sh-for> (,$2 ((<sh-quote> (<sh-ref> "@")))) ,@$3)
|
|
(For name sequential-sep do-group)
|
|
: `(<sh-for> (,$2 ((<sh-quote> (<sh-ref> "@")))) ,@$4)
|
|
(For name linebreak in sequential-sep do-group)
|
|
: `(<sh-for> (,$2 ()) ,@$6)
|
|
(For name linebreak in wordlist sequential-sep do-group)
|
|
: `(<sh-for> (,$2 ,$5) ,@$7))
|
|
|
|
(name
|
|
(NAME-with-keywords)
|
|
: $1)
|
|
|
|
(in
|
|
(In)
|
|
: #f)
|
|
|
|
(wordlist
|
|
(wordlist WORD*)
|
|
: (append $1 `(,$2))
|
|
(WORD*)
|
|
: `(,$1))
|
|
|
|
(case-clause
|
|
(Case WORD* linebreak in linebreak case-list Esac)
|
|
: `(<sh-case> ,$2 ,@$6)
|
|
(Case WORD* linebreak in linebreak case-list-ns Esac)
|
|
: `(<sh-case> ,$2 ,@$6)
|
|
(Case WORD* linebreak in linebreak Esac)
|
|
: `(<sh-case> ,$2))
|
|
|
|
(case-list-ns
|
|
(case-list case-item-ns)
|
|
: (append $1 `(,$2))
|
|
(case-item-ns)
|
|
: `(,$1))
|
|
|
|
(case-list
|
|
(case-list case-item)
|
|
: (append $1 `(,$2))
|
|
(case-item)
|
|
: `(,$1))
|
|
|
|
(case-item-ns
|
|
(pattern! RPAREN! linebreak)
|
|
: `(,$1 #f)
|
|
(pattern! RPAREN! compound-list)
|
|
: `(,$1 ,@$3)
|
|
(LPAREN! pattern RPAREN! linebreak)
|
|
: `(,$2 #f)
|
|
(LPAREN! pattern RPAREN! compound-list)
|
|
: `(,$2 ,@$4))
|
|
|
|
(case-item
|
|
(pattern! RPAREN! linebreak DSEMI linebreak)
|
|
: `(,$1 #f)
|
|
(pattern! RPAREN! compound-list DSEMI linebreak)
|
|
: `(,$1 ,@$3)
|
|
(LPAREN! pattern RPAREN! linebreak DSEMI linebreak)
|
|
: `(,$2 #f)
|
|
(LPAREN! pattern RPAREN! compound-list DSEMI linebreak)
|
|
: `(,$2 ,@$4))
|
|
|
|
;; If this rule is updated, the hooked version given below must be
|
|
;; updated as well.
|
|
(pattern
|
|
(WORD*-without-Esac)
|
|
: `(,$1)
|
|
(pattern PIPE WORD*)
|
|
: (append $1 `(,$3)))
|
|
|
|
(if-clause
|
|
(If compound-list Then compound-list else-part Fi)
|
|
: `(<sh-cond> (,(command-list->block $2) ,@$4) ,@$5)
|
|
(If compound-list Then compound-list Fi)
|
|
: `(<sh-cond> (,(command-list->block $2) ,@$4)))
|
|
|
|
(else-part
|
|
(Elif compound-list Then compound-list)
|
|
: `((,(command-list->block $2) ,@$4))
|
|
(Elif compound-list Then compound-list else-part)
|
|
: (cons `(,(command-list->block $2) ,@$4) $5)
|
|
(Else compound-list)
|
|
: `((<sh-else> ,@$2)))
|
|
|
|
(while-clause
|
|
(While compound-list do-group)
|
|
: `(<sh-while> ,(command-list->block $2) ,@$3))
|
|
|
|
(until-clause
|
|
(Until compound-list do-group)
|
|
: `(<sh-until> ,(command-list->block $2) ,@$3))
|
|
|
|
(function-definition
|
|
(fname LPAREN! RPAREN! linebreak function-body)
|
|
: `(<sh-defun> ,$1 ,$5))
|
|
|
|
(function-body
|
|
(compound-command)
|
|
: $1
|
|
(compound-command redirect-list)
|
|
: `(<sh-with-redirects> ,$2 ,$1))
|
|
|
|
(fname
|
|
(NAME)
|
|
: $1)
|
|
|
|
(brace-group
|
|
(Lbrace compound-list Rbrace)
|
|
: $2)
|
|
|
|
(do-group
|
|
(Do compound-list Done)
|
|
: $2)
|
|
|
|
(simple-command
|
|
(cmd-prefix cmd-word cmd-suffix)
|
|
: (let*-values (((redirects-1 assignments*) (partition io-redirect? $1))
|
|
((redirects-2 args) (partition io-redirect? $3))
|
|
((assignments) (map split-assignment assignments*)))
|
|
(match (append redirects-1 redirects-2)
|
|
(() `(<sh-exec-let> ,assignments ,$2 ,@args))
|
|
(redirects `(<sh-with-redirects> ,redirects
|
|
(<sh-exec-let> ,assignments ,$2 ,@args)))))
|
|
(cmd-prefix cmd-word)
|
|
: (let*-values (((redirects assignments*) (partition io-redirect? $1))
|
|
((assignments) (map split-assignment assignments*)))
|
|
(match redirects
|
|
(() `(<sh-exec-let> ,assignments ,$2))
|
|
(_ `(<sh-with-redirects> ,redirects
|
|
,(if (null? assignments)
|
|
`(<sh-exec> ,$2)
|
|
`(<sh-exec-let> ,assignments ,$2))))))
|
|
(cmd-prefix)
|
|
: (let*-values (((redirects assignments*) (partition io-redirect? $1))
|
|
((assignments) (map split-assignment assignments*)))
|
|
(match redirects
|
|
(() `(<sh-set!> ,@assignments))
|
|
(_ `(<sh-with-redirects> ,redirects
|
|
,(if (null? assignments)
|
|
#f
|
|
`(<sh-set!> ,@assignments))))))
|
|
(cmd-name cmd-suffix)
|
|
: (let-values (((redirects args) (partition io-redirect? $2)))
|
|
(match redirects
|
|
(() `(<sh-exec> ,$1 ,@args))
|
|
(_ `(<sh-with-redirects> ,redirects
|
|
(<sh-exec> ,$1 ,@args)))))
|
|
(cmd-name)
|
|
: `(<sh-exec> ,$1))
|
|
|
|
(cmd-name
|
|
(WORD*-without-keywords-or-ASSIGNMENT-WORD)
|
|
: $1)
|
|
|
|
(cmd-word
|
|
(WORD*-without-keywords-or-ASSIGNMENT-WORD)
|
|
: $1)
|
|
|
|
(cmd-prefix
|
|
(io-redirect)
|
|
: `(,$1)
|
|
(cmd-prefix io-redirect)
|
|
: (append $1 `(,$2))
|
|
(ASSIGNMENT-WORD)
|
|
: `(,$1)
|
|
(cmd-prefix ASSIGNMENT-WORD)
|
|
: (append $1 `(,$2)))
|
|
|
|
(cmd-suffix
|
|
(io-redirect)
|
|
: `(,$1)
|
|
(cmd-suffix io-redirect)
|
|
: (append $1 `(,$2))
|
|
(WORD*)
|
|
: `(,$1)
|
|
(cmd-suffix WORD*)
|
|
: (append $1 `(,$2)))
|
|
|
|
(redirect-list
|
|
(io-redirect)
|
|
: `(,$1)
|
|
(redirect-list io-redirect)
|
|
: (append $1 `(,$2)))
|
|
|
|
(io-redirect
|
|
(io-file)
|
|
: `(,(car $1) ,(assoc-ref io-redirect-defaults (car $1)) ,(cdr $1))
|
|
(IO-NUMBER io-file)
|
|
: `(,(car $2) ,(string->number $1) ,(cdr $2))
|
|
(io-here)
|
|
: `(,(car $1) ,(assoc-ref io-redirect-defaults (car $1)) ,(cdr $1))
|
|
(IO-NUMBER io-here)
|
|
: `(,(car $2) ,(string->number $1) ,(cdr $2)))
|
|
|
|
(io-file
|
|
(LESS filename)
|
|
: `(< . ,$2)
|
|
(LESSAND filename)
|
|
: `(<& . ,$2)
|
|
(GREAT filename)
|
|
: `(> . ,$2)
|
|
(GREATAND filename)
|
|
: `(>& . ,$2)
|
|
(DGREAT filename)
|
|
: `(>> . ,$2)
|
|
(LESSGREAT filename)
|
|
: `(<> . ,$2)
|
|
(CLOBBER filename)
|
|
: `(>! . ,$2))
|
|
|
|
(filename
|
|
(WORD*)
|
|
: $1)
|
|
|
|
(io-here
|
|
(DLESS here-end)
|
|
: `(<< . (<sh-here-end> ,$2))
|
|
(DLESSDASH here-end)
|
|
: `(<<- . (<sh-here-end> ,$2)))
|
|
|
|
(here-end
|
|
(WORD*)
|
|
: $1)
|
|
|
|
(newline-list
|
|
(NEWLINE)
|
|
: #f
|
|
(newline-list NEWLINE)
|
|
: #f)
|
|
|
|
(linebreak
|
|
(newline-list)
|
|
: #f
|
|
()
|
|
: #f)
|
|
|
|
(separator-op
|
|
(AND)
|
|
: 'AND
|
|
(SEMI)
|
|
: 'SEMI)
|
|
|
|
(separator
|
|
(separator-op linebreak)
|
|
: $1
|
|
(newline-list)
|
|
: 'NEWLINE)
|
|
|
|
(sequential-sep
|
|
(SEMI linebreak)
|
|
: #f
|
|
(newline-list)
|
|
: #f)
|
|
|
|
;; Rules added to emulate the POSIX context-sensitive lexer
|
|
;; approach.
|
|
|
|
;; Accept all the specializations of a normal word and all
|
|
;; keywords. This is the default case.
|
|
(WORD*
|
|
(WORD) : $1
|
|
(NAME) : $1
|
|
(ASSIGNMENT-WORD) : $1
|
|
(If) : $1
|
|
(Then) : $1
|
|
(Else) : $1
|
|
(Elif) : $1
|
|
(Fi) : $1
|
|
(Do) : $1
|
|
(Done) : $1
|
|
(Case) : $1
|
|
(Esac) : $1
|
|
(While) : $1
|
|
(Until) : $1
|
|
(For) : $1
|
|
(Lbrace) : $1
|
|
(Rbrace) : $1
|
|
(Bang) : $1
|
|
(In) : $1)
|
|
|
|
;; Just like 'WORD*', but no keywords. This corresponds to "rule
|
|
;; 1" in the POSIX specification.
|
|
(WORD*-without-keywords
|
|
(WORD) : $1
|
|
(NAME) : $1
|
|
(ASSIGNMENT-WORD) : $1)
|
|
|
|
;; Just like 'WORD*', but without the "esac" keyword. This
|
|
;; corresponds to "rule 4" in the POSIX specification.
|
|
(WORD*-without-Esac
|
|
(WORD) : $1
|
|
(NAME) : $1
|
|
(ASSIGNMENT-WORD) : $1
|
|
(If) : $1
|
|
(Then) : $1
|
|
(Else) : $1
|
|
(Elif) : $1
|
|
(Fi) : $1
|
|
(Do) : $1
|
|
(Done) : $1
|
|
(Case) : $1
|
|
;; (Esac) : $1
|
|
(While) : $1
|
|
(Until) : $1
|
|
(For) : $1
|
|
(Lbrace) : $1
|
|
(Rbrace) : $1
|
|
(Bang) : $1
|
|
(In) : $1)
|
|
|
|
;; Accept a "NAME" or any keyword. This corresponds to "rule 5" in
|
|
;; the POSIX specification.
|
|
(NAME-with-keywords
|
|
(NAME) : $1
|
|
(If) : $1
|
|
(Then) : $1
|
|
(Else) : $1
|
|
(Elif) : $1
|
|
(Fi) : $1
|
|
(Do) : $1
|
|
(Done) : $1
|
|
(Case) : $1
|
|
(Esac) : $1
|
|
(While) : $1
|
|
(Until) : $1
|
|
(For) : $1
|
|
(Lbrace) : $1
|
|
(Rbrace) : $1
|
|
(Bang) : $1
|
|
(In) : $1)
|
|
|
|
;; Accept any "WORD*" token except for "ASSIGNMENT-WORD". This
|
|
;; corresponds to "rule 7" in the POSIX specification.
|
|
(WORD*-without-keywords-or-ASSIGNMENT-WORD
|
|
(WORD) : $1
|
|
(NAME) : $1)
|
|
|
|
;; Rules for updating bracket balance.
|
|
|
|
(LPAREN!
|
|
(LPAREN)
|
|
: (begin (open-bracket-hook) $1))
|
|
|
|
(RPAREN!
|
|
(RPAREN)
|
|
: (begin (close-bracket-hook) $1))
|
|
|
|
;; Sometimes a "pattern" non-terminal comes before an unbalanced
|
|
;; "RPAREN". This reduction hook can be used to pretend that we
|
|
;; encountered an "LPAREN". It should match the unhooked one given
|
|
;; above.
|
|
|
|
(pattern!
|
|
(WORD*-without-Esac)
|
|
: (begin (open-bracket-hook) `(,$1))
|
|
(pattern! PIPE WORD*)
|
|
: (append $1 `(,$3)))))
|
|
|
|
(define* (syntax-error message #:optional token)
|
|
"Handle a parser error"
|
|
(if (lexical-token? token)
|
|
(throw 'syntax-error #f message
|
|
(and=> (lexical-token-source token)
|
|
source-location->source-properties)
|
|
(or (lexical-token-value token)
|
|
(lexical-token-category token))
|
|
#f)
|
|
(throw 'syntax-error #f message #f token #f)))
|
|
|
|
(define* (parse port #:key (lex-hook (lambda (lex) (lex)))
|
|
(command-hook noop)
|
|
(open-bracket-hook noop)
|
|
(close-bracket-hook noop))
|
|
"Parse a Shell script from @var{port}. There are several hooks that
|
|
can be installed while parsing. The procedure @var{lex-hook} is
|
|
called before reading each token. It must take a thunk (which it can
|
|
use to invoke the normal lexer) and return a token. The
|
|
@var{command-hook} thunk is called after parsing each complete
|
|
command. Its return value is ignored. The @var{open-bracket-hook}
|
|
and @var{close-bracket-hook} thunks are called for each opening
|
|
bracket and closing bracket respectively. Their return values are
|
|
ignored."
|
|
|
|
(define here-docs '())
|
|
|
|
(define (add-here-docs docs)
|
|
(set! here-docs (append-reverse docs here-docs)))
|
|
|
|
(define (insert-here-docs exp)
|
|
(let-values (((exp here-docs*)
|
|
(merge-here-docs exp (reverse here-docs))))
|
|
(unless (null? here-docs*)
|
|
(error "Unused here-documents"))
|
|
(set! here-docs '())
|
|
exp))
|
|
|
|
(define lex
|
|
(let ((pre-lex (make-lexer port #:here-docs-hook add-here-docs)))
|
|
(lambda ()
|
|
(lex-hook pre-lex))))
|
|
|
|
(define (command-hook* command)
|
|
(command-hook)
|
|
(insert-here-docs command))
|
|
|
|
(define %parse
|
|
(make-parser #:command-hook command-hook*
|
|
#:open-bracket-hook open-bracket-hook
|
|
#:close-bracket-hook close-bracket-hook))
|
|
|
|
(%parse lex syntax-error))
|
|
|
|
(define (->command-list code)
|
|
"Make the Shell syntax tree @var{code} a list of commands."
|
|
(match code
|
|
((? eof-object?) '())
|
|
(((? symbol? tag) . rest) `((,tag . ,rest)))
|
|
(code code)))
|
|
|
|
(define* (call-with-backquoted-input-port port proc #:key quoted?)
|
|
"Call @var{proc} with a wrapped version of @var{port} that will
|
|
return the end-of-file object upon encountering an unescaped backquote
|
|
\"`\" (without consuming the backquote). If @var{quoted?} is set,
|
|
treat the double quote character as escapable."
|
|
(define (escape-char? chr)
|
|
(or (char=? chr #\$)
|
|
(char=? chr #\`)
|
|
(char=? chr #\\)
|
|
(and quoted? (char=? chr #\"))))
|
|
|
|
(define wrapped-port
|
|
(make-soft-port
|
|
(vector
|
|
;; put-char, put-string, and flush-output-port
|
|
#f #f #f
|
|
;; get-char
|
|
(lambda ()
|
|
(match (lookahead-char port)
|
|
(#\` the-eof-object)
|
|
(#\\ (begin
|
|
(get-char port)
|
|
(match (lookahead-char port)
|
|
((? escape-char?) (get-char port))
|
|
(_ #\\))))
|
|
(_ (get-char port))))
|
|
;; close-port
|
|
#f)
|
|
"r"))
|
|
(proc wrapped-port))
|
|
|
|
(define (read-sh/bracketed port)
|
|
"Read Shell code from @var{port} until the first unmatched closing
|
|
bracket."
|
|
|
|
(define bracket-depth 0)
|
|
(define (incr-bracket-depth!) (set! bracket-depth (1+ bracket-depth)))
|
|
(define (decr-bracket-depth!) (set! bracket-depth (1- bracket-depth)))
|
|
|
|
(define (stop-if-balanced lex)
|
|
(let ((token (lex)))
|
|
(if (and (= 0 bracket-depth)
|
|
(lexical-token? token)
|
|
(eq? (lexical-token-category token) 'RPAREN))
|
|
(begin
|
|
(unget-char port #\))
|
|
'*eoi*)
|
|
token)))
|
|
|
|
(->command-list (parse port #:lex-hook stop-if-balanced
|
|
#:open-bracket-hook incr-bracket-depth!
|
|
#:close-bracket-hook decr-bracket-depth!)))
|
|
|
|
(define* (read-sh/backquoted port #:key quoted?)
|
|
"Read Shell code from @var{port} until the first unescaped
|
|
backquote. If @var{quoted?} is set, treat the double quote character
|
|
as escapable."
|
|
(call-with-backquoted-input-port port
|
|
(lambda (port)
|
|
(->command-list (parse port)))
|
|
#:quoted? quoted?))
|
|
|
|
(define* (read-sh #:optional (port (current-input-port)))
|
|
"Read a complete Shell command from @var{port} (or the current input
|
|
port if @var{port} is unspecified)."
|
|
(define stop? #f)
|
|
(define (stop!) (set! stop? #t))
|
|
(parse port #:lex-hook (lambda (lex) (if stop? '*eoi* (lex)))
|
|
#:command-hook stop!))
|
|
|
|
(define* (read-sh-all #:optional (port (current-input-port)))
|
|
"Read all complete Shell commands from @var{port} (or the current
|
|
input port if @var{port} is unspecified)."
|
|
(->command-list (parse port)))
|