;;; Gash -- Guile As SHell ;;; Copyright © 2018, 2019 Timothy Sample ;;; ;;; This file is part of Gash. ;;; ;;; Gash is free software: you can redistribute it and/or modify ;;; it under the terms of the GNU General Public License as published by ;;; the Free Software Foundation, either version 3 of the License, or ;;; (at your option) any later version. ;;; ;;; Gash is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with Gash. If not, see . (define-module (gash parser) #:use-module (gash compat textual-ports) #:use-module (gash lexer) #:use-module (ice-9 match) #:use-module (srfi srfi-1) #:use-module (srfi srfi-11) #:use-module (srfi srfi-41) #:use-module (system base lalr) #:export (read-sh read-sh-all)) ;;; Commentary: ;;; ;;; This module contains the parser for the Shell language. ;;; ;;; Code: (define io-redirect-defaults '((< . 0) (<& . 0) (> . 1) (>& . 1) (>> . 1) (<> . 0) (>! . 1) (<< . 0) (<<- . 0))) (define (io-redirect? sexp) "Determine if @var{sexp} is an I/O redirect form." (match sexp ((or ('< x y) ('<& x y) ('> x y) ('>& x y) ('>> x y) ('<> x y) ('>! x y) ('<< x y) ('<<- x y)) #t) (_ #f))) (define (split-assignment word) "Split assignment @var{word} into a list where the first element is the variable name and the second element is the value expression." (define (assignment-name-and-value str) (let* ((index (string-index str #\=)) (name (substring str 0 index)) (value (substring str (1+ index)))) `(,name . ,value))) (match word (((? string?) . tail) (match (assignment-name-and-value (car word)) ((name . value) (cond ((null? tail) `(,name ,value)) ((string-null? value) (if (null? (cdr tail)) `(,name ,(car tail)) `(,name ,tail))) (else `(,name ,(cons value tail))))))) ((? string?) (match (assignment-name-and-value word) ((name . value) `(,name ,value)))))) ;; The (ice-9 textual-ports) module does not allow instantiating ;; end-of-file objects, but (rnrs io ports) does. (define eof-object (@ (rnrs io ports) eof-object)) (define (map+fold proc init xs) "Apply @var{proc} to each element of @var{xs}, mapping and folding at the same time. The procedure @var{proc} must return two values: the first is the result for mapping, and the second is the result for folding." (let loop ((xs xs) (map-acc '()) (fold-acc init)) (match xs ((x . rest) (let-values (((map-value fold-value) (proc x fold-acc))) (loop rest (cons map-value map-acc) fold-value))) (() (values (reverse! map-acc) fold-acc))))) (define (merge-here-docs cmd here-docs) "Replace @code{'( ...)} forms in @var{cmd} with words from the list @var{here-docs}. Returns two values: the modified @var{cmd} and the unused strings from @var{here-docs}." (match cmd ((' redirs . rest) (let*-values (((redirs here-docs) (let loop ((redirs redirs) (here-docs here-docs) (acc '())) (match redirs ((((or '<< '<<-) fdes (' _)) . rest) (loop rest (cdr here-docs) (cons `(<< ,fdes ,(car here-docs)) acc))) ((redir . rest) (loop rest here-docs (cons redir acc))) (() (values (reverse! acc) here-docs))))) ((rest here-docs) (map+fold merge-here-docs here-docs rest))) (values `( ,redirs ,@rest) here-docs))) ((xs ...) (map+fold merge-here-docs here-docs xs)) (x (values x here-docs)))) (define (remove-quotes here-end) "Remove quote forms from @var{here-end} and concatenate the result into a single field (string). If there are no quote forms in @var{here-end}, it is returned as-is. This means that when @code{(eq? here-end (remove-quotes here-end))}, then @var{here-end} does not contain any quote forms." (let loop ((word here-end) (quotes? #f) (acc '())) (match word (() (if quotes? (string-concatenate-reverse acc) here-end)) ((' word*) (loop '() #t (cons (remove-quotes word*) acc))) (((' word*) . t) (loop t #t (cons (remove-quotes word*) acc))) ((? string?) (loop '() quotes? (cons word acc))) (((? string? h) . t) (loop t quotes? (cons h acc)))))) (define (read-here-docs op+ends port) "Read a here-document from @var{port} for each operator and here-end pair in @var{op+ends}." (map (match-lambda ((op . end) (let ((end* (remove-quotes end))) (match (get-here-doc (remove-quotes end) port #:trim-tabs? (eq? op '<<-) #:quoted? (not (eq? end end*))) ((? lexical-token? token) (lexical-token-value token)))))) op+ends)) (define* (make-lexer port #:key (here-docs-hook noop)) "Make a lexer thunk that reads tokens from @var{port}. If @var{here-docs-hook} is set, it will be called for each list of here-documents the lexer encounters." (define next-tokens '()) (define here-ends '()) (lambda () (parameterize ((read-bracketed-command read-sh/bracketed) (read-backquoted-command read-sh/backquoted)) (match next-tokens (() (let* ((token (get-token port)) (category (and (lexical-token? token) (lexical-token-category token)))) (match category ((or 'DLESS 'DLESSDASH) (let ((here-end (get-here-end port))) (unless (lexical-token? here-end) (error "Unexpected EOF.")) (let ((op (if (eq? category 'DLESS) '<< '<<-)) (end (lexical-token-value here-end))) (set! here-ends (cons `(,op . ,end) here-ends)) (set! next-tokens `(,here-end))) token)) ('NEWLINE (match here-ends (() token) (_ (here-docs-hook (read-here-docs (reverse here-ends) port)) (set! here-ends '()) token))) (_ token)))) ((next-token . rest) (set! next-tokens rest) next-token))))) (define* (make-parser #:key (command-hook noop) (open-bracket-hook noop) (close-bracket-hook noop)) "Make an LALR parser for the Shell language. The optional hooks are all thunks. The @var{command-hook} thunk is called to transform each complete command. The @var{open-bracket-hook} thunk is called after reducing an opening bracket. The @var{close-bracket-hook} is called after reducing a closing bracket. (Note that a @var{open-bracket-hook} is also called when reducing case patterns that end with an unbalanced closing bracket. This ensures that when parsing valid Shell code, @var{open-bracket-hook} and @var{close-bracket-hook} should be called the same number of times.)" (define command-list->block (match-lambda ((cmd) cmd) ((cmds ...) `( ,@cmds)))) (lalr-parser (AND ; '&' SEMI ; ';' LESS ; '<' GREAT ; '>' PIPE ; '|' LPAREN ; '(' RPAREN ; ')' AND-IF ; '&&' OR-IF ; '||' DSEMI ; ';;' DLESS ; '<<' DGREAT ; '>>' LESSAND ; '<&' GREATAND ; '>&' LESSGREAT ; '<>' DLESSDASH ; '<<-' CLOBBER ; '>|' If ; 'if' Then ; 'then' Else ; 'else' Elif ; 'elif' Fi ; 'fi' Do ; 'do' Done ; 'done' Case ; 'case' Esac ; 'esac' While ; 'while' Until ; 'until' For ; 'for' Lbrace ; '{' Rbrace ; '}' Bang ; '!' In ; 'in' WORD ASSIGNMENT-WORD NAME NEWLINE IO-NUMBER HERE-DOC HERE-DOC-SEP) (program (linebreak complete-commands linebreak) : (if (null? (cdr $2)) (car $2) (reverse! $2)) (linebreak) : (eof-object)) (complete-commands (complete-commands newline-list complete-command) : (begin (cons (command-hook $3) $1)) (complete-command) : (begin `(,(command-hook $1)))) (complete-command (list separator-op) : (command-list->block (match $2 ('AND (reverse! (cons `( ,(car $1)) (cdr $1)))) ('SEMI (reverse! $1)))) (list) : (command-list->block (reverse! $1))) (list (list separator-op and-or) : (match $2 ('AND (cons* $3 `( ,(car $1)) (cdr $1))) ('SEMI (cons $3 $1))) (and-or) : `(,$1)) (and-or (pipeline) : $1 (and-or AND-IF linebreak pipeline) : `( ,$1 ,$4) (and-or OR-IF linebreak pipeline) : `( ,$1 ,$4)) (pipeline (pipe-sequence) : (if (null? (cdr $1)) (car $1) `( ,@(reverse! $1))) (Bang pipe-sequence) : `( ,(if (null? (cdr $2)) (car $2) `( ,@(reverse! $2))))) (pipe-sequence (command) : `(,$1) (pipe-sequence PIPE linebreak command) : (cons $4 $1)) (command (simple-command) : $1 (compound-command) : $1 (compound-command redirect-list) : `( ,$2 ,$1) (function-definition) : $1) (compound-command (brace-group) : (command-list->block $1) (subshell) : $1 (for-clause) : $1 (case-clause) : $1 (if-clause) : $1 (while-clause) : $1 (until-clause) : $1) (subshell (LPAREN! compound-list RPAREN!) : `( ,@$2)) (compound-list (linebreak term) : (reverse! $2) (linebreak term separator) : (reverse! (match $3 ('AND (cons `( ,(car $2)) (cdr $2))) ((or 'SEMI 'NEWLINE) $2)))) (term (term separator and-or) : (match $2 ('AND (cons* $3 `( ,(car $1)) (cdr $1))) ((or 'SEMI 'NEWLINE) (cons $3 $1))) (and-or) : `(,$1)) (for-clause (For name do-group) : `( (,$2 (( ( "@")))) ,@$3) (For name sequential-sep do-group) : `( (,$2 (( ( "@")))) ,@$4) (For name linebreak in sequential-sep do-group) : `( (,$2 ()) ,@$6) (For name linebreak in wordlist sequential-sep do-group) : `( (,$2 ,$5) ,@$7)) (name (NAME-with-keywords) : $1) (in (In) : #f) (wordlist (wordlist WORD*) : (append $1 `(,$2)) (WORD*) : `(,$1)) (case-clause (Case WORD* linebreak in linebreak case-list Esac) : `( ,$2 ,@$6) (Case WORD* linebreak in linebreak case-list-ns Esac) : `( ,$2 ,@$6) (Case WORD* linebreak in linebreak Esac) : `( ,$2)) (case-list-ns (case-list case-item-ns) : (append $1 `(,$2)) (case-item-ns) : `(,$1)) (case-list (case-list case-item) : (append $1 `(,$2)) (case-item) : `(,$1)) (case-item-ns (pattern! RPAREN! linebreak) : `(,$1 #f) (pattern! RPAREN! compound-list) : `(,$1 ,@$3) (LPAREN! pattern RPAREN! linebreak) : `(,$2 #f) (LPAREN! pattern RPAREN! compound-list) : `(,$2 ,@$4)) (case-item (pattern! RPAREN! linebreak DSEMI linebreak) : `(,$1 #f) (pattern! RPAREN! compound-list DSEMI linebreak) : `(,$1 ,@$3) (LPAREN! pattern RPAREN! linebreak DSEMI linebreak) : `(,$2 #f) (LPAREN! pattern RPAREN! compound-list DSEMI linebreak) : `(,$2 ,@$4)) ;; If this rule is updated, the hooked version given below must be ;; updated as well. (pattern (WORD*-without-Esac) : `(,$1) (pattern PIPE WORD*) : (append $1 `(,$3))) (if-clause (If compound-list Then compound-list else-part Fi) : `( (,(command-list->block $2) ,@$4) ,@$5) (If compound-list Then compound-list Fi) : `( (,(command-list->block $2) ,@$4))) (else-part (Elif compound-list Then compound-list) : `((,(command-list->block $2) ,@$4)) (Elif compound-list Then compound-list else-part) : (cons `(,(command-list->block $2) ,@$4) $5) (Else compound-list) : `(( ,@$2))) (while-clause (While compound-list do-group) : `( ,(command-list->block $2) ,@$3)) (until-clause (Until compound-list do-group) : `( ,(command-list->block $2) ,@$3)) (function-definition (fname LPAREN! RPAREN! linebreak function-body) : `( ,$1 ,$5)) (function-body (compound-command) : $1 (compound-command redirect-list) : `( ,$2 ,$1)) (fname (NAME) : $1) (brace-group (Lbrace compound-list Rbrace) : $2) (do-group (Do compound-list Done) : $2) (simple-command (cmd-prefix cmd-word cmd-suffix) : (let*-values (((redirects-1 assignments*) (partition io-redirect? $1)) ((redirects-2 args) (partition io-redirect? $3)) ((assignments) (map split-assignment assignments*))) (match (append redirects-1 redirects-2) (() `( ,assignments ,$2 ,@args)) (redirects `( ,redirects ( ,assignments ,$2 ,@args))))) (cmd-prefix cmd-word) : (let*-values (((redirects assignments*) (partition io-redirect? $1)) ((assignments) (map split-assignment assignments*))) (match redirects (() `( ,assignments ,$2)) (_ `( ,redirects ,(if (null? assignments) `( ,$2) `( ,assignments ,$2)))))) (cmd-prefix) : (let*-values (((redirects assignments*) (partition io-redirect? $1)) ((assignments) (map split-assignment assignments*))) (match redirects (() `( ,@assignments)) (_ `( ,redirects ,(if (null? assignments) #f `( ,@assignments)))))) (cmd-name cmd-suffix) : (let-values (((redirects args) (partition io-redirect? $2))) (match redirects (() `( ,$1 ,@args)) (_ `( ,redirects ( ,$1 ,@args))))) (cmd-name) : `( ,$1)) (cmd-name (WORD*-without-keywords-or-ASSIGNMENT-WORD) : $1) (cmd-word (WORD*-without-keywords-or-ASSIGNMENT-WORD) : $1) (cmd-prefix (io-redirect) : `(,$1) (cmd-prefix io-redirect) : (append $1 `(,$2)) (ASSIGNMENT-WORD) : `(,$1) (cmd-prefix ASSIGNMENT-WORD) : (append $1 `(,$2))) (cmd-suffix (io-redirect) : `(,$1) (cmd-suffix io-redirect) : (append $1 `(,$2)) (WORD*) : `(,$1) (cmd-suffix WORD*) : (append $1 `(,$2))) (redirect-list (io-redirect) : `(,$1) (redirect-list io-redirect) : (append $1 `(,$2))) (io-redirect (io-file) : `(,(car $1) ,(assoc-ref io-redirect-defaults (car $1)) ,(cdr $1)) (IO-NUMBER io-file) : `(,(car $2) ,(string->number $1) ,(cdr $2)) (io-here) : `(,(car $1) ,(assoc-ref io-redirect-defaults (car $1)) ,(cdr $1)) (IO-NUMBER io-here) : `(,(car $2) ,(string->number $1) ,(cdr $2))) (io-file (LESS filename) : `(< . ,$2) (LESSAND filename) : `(<& . ,$2) (GREAT filename) : `(> . ,$2) (GREATAND filename) : `(>& . ,$2) (DGREAT filename) : `(>> . ,$2) (LESSGREAT filename) : `(<> . ,$2) (CLOBBER filename) : `(>! . ,$2)) (filename (WORD*) : $1) (io-here (DLESS here-end) : `(<< . ( ,$2)) (DLESSDASH here-end) : `(<<- . ( ,$2))) (here-end (WORD*) : $1) (newline-list (NEWLINE) : #f (newline-list NEWLINE) : #f) (linebreak (newline-list) : #f () : #f) (separator-op (AND) : 'AND (SEMI) : 'SEMI) (separator (separator-op linebreak) : $1 (newline-list) : 'NEWLINE) (sequential-sep (SEMI linebreak) : #f (newline-list) : #f) ;; Rules added to emulate the POSIX context-sensitive lexer ;; approach. ;; Accept all the specializations of a normal word and all ;; keywords. This is the default case. (WORD* (WORD) : $1 (NAME) : $1 (ASSIGNMENT-WORD) : $1 (If) : $1 (Then) : $1 (Else) : $1 (Elif) : $1 (Fi) : $1 (Do) : $1 (Done) : $1 (Case) : $1 (Esac) : $1 (While) : $1 (Until) : $1 (For) : $1 (Lbrace) : $1 (Rbrace) : $1 (Bang) : $1 (In) : $1) ;; Just like 'WORD*', but no keywords. This corresponds to "rule ;; 1" in the POSIX specification. (WORD*-without-keywords (WORD) : $1 (NAME) : $1 (ASSIGNMENT-WORD) : $1) ;; Just like 'WORD*', but without the "esac" keyword. This ;; corresponds to "rule 4" in the POSIX specification. (WORD*-without-Esac (WORD) : $1 (NAME) : $1 (ASSIGNMENT-WORD) : $1 (If) : $1 (Then) : $1 (Else) : $1 (Elif) : $1 (Fi) : $1 (Do) : $1 (Done) : $1 (Case) : $1 ;; (Esac) : $1 (While) : $1 (Until) : $1 (For) : $1 (Lbrace) : $1 (Rbrace) : $1 (Bang) : $1 (In) : $1) ;; Accept a "NAME" or any keyword. This corresponds to "rule 5" in ;; the POSIX specification. (NAME-with-keywords (NAME) : $1 (If) : $1 (Then) : $1 (Else) : $1 (Elif) : $1 (Fi) : $1 (Do) : $1 (Done) : $1 (Case) : $1 (Esac) : $1 (While) : $1 (Until) : $1 (For) : $1 (Lbrace) : $1 (Rbrace) : $1 (Bang) : $1 (In) : $1) ;; Accept any "WORD*" token except for "ASSIGNMENT-WORD". This ;; corresponds to "rule 7" in the POSIX specification. (WORD*-without-keywords-or-ASSIGNMENT-WORD (WORD) : $1 (NAME) : $1) ;; Rules for updating bracket balance. (LPAREN! (LPAREN) : (begin (open-bracket-hook) $1)) (RPAREN! (RPAREN) : (begin (close-bracket-hook) $1)) ;; Sometimes a "pattern" non-terminal comes before an unbalanced ;; "RPAREN". This reduction hook can be used to pretend that we ;; encountered an "LPAREN". It should match the unhooked one given ;; above. (pattern! (WORD*-without-Esac) : (begin (open-bracket-hook) `(,$1)) (pattern! PIPE WORD*) : (append $1 `(,$3))))) (define* (syntax-error message #:optional token) "Handle a parser error" (if (lexical-token? token) (throw 'syntax-error #f message (and=> (lexical-token-source token) source-location->source-properties) (or (lexical-token-value token) (lexical-token-category token)) #f) (throw 'syntax-error #f message #f token #f))) (define* (parse port #:key (lex-hook (lambda (lex) (lex))) (command-hook noop) (open-bracket-hook noop) (close-bracket-hook noop)) "Parse a Shell script from @var{port}. There are several hooks that can be installed while parsing. The procedure @var{lex-hook} is called before reading each token. It must take a thunk (which it can use to invoke the normal lexer) and return a token. The @var{command-hook} thunk is called after parsing each complete command. Its return value is ignored. The @var{open-bracket-hook} and @var{close-bracket-hook} thunks are called for each opening bracket and closing bracket respectively. Their return values are ignored." (define here-docs '()) (define (add-here-docs docs) (set! here-docs (append-reverse docs here-docs))) (define (insert-here-docs exp) (let-values (((exp here-docs*) (merge-here-docs exp (reverse here-docs)))) (unless (null? here-docs*) (error "Unused here-documents")) (set! here-docs '()) exp)) (define lex (let ((pre-lex (make-lexer port #:here-docs-hook add-here-docs))) (lambda () (lex-hook pre-lex)))) (define (command-hook* command) (command-hook) (insert-here-docs command)) (define %parse (make-parser #:command-hook command-hook* #:open-bracket-hook open-bracket-hook #:close-bracket-hook close-bracket-hook)) (%parse lex syntax-error)) (define (->command-list code) "Make the Shell syntax tree @var{code} a list of commands." (match code ((? eof-object?) '()) (((? symbol? tag) . rest) `((,tag . ,rest))) (code code))) (define* (call-with-backquoted-input-port port proc #:key quoted?) "Call @var{proc} with a wrapped version of @var{port} that will return the end-of-file object upon encountering an unescaped backquote \"`\" (without consuming the backquote). If @var{quoted?} is set, treat the double quote character as escapable." (define (escape-char? chr) (or (char=? chr #\$) (char=? chr #\`) (char=? chr #\\) (and quoted? (char=? chr #\")))) (define wrapped-port (make-soft-port (vector ;; put-char, put-string, and flush-output-port #f #f #f ;; get-char (lambda () (match (lookahead-char port) (#\` (eof-object)) (#\\ (begin (get-char port) (match (lookahead-char port) ((? escape-char?) (get-char port)) (_ #\\)))) (_ (get-char port)))) ;; close-port #f) "r")) (proc wrapped-port)) (define (read-sh/bracketed port) "Read Shell code from @var{port} until the first unmatched closing bracket." (define bracket-depth 0) (define (incr-bracket-depth!) (set! bracket-depth (1+ bracket-depth))) (define (decr-bracket-depth!) (set! bracket-depth (1- bracket-depth))) (define (stop-if-balanced lex) (let ((token (lex))) (if (and (= 0 bracket-depth) (lexical-token? token) (eq? (lexical-token-category token) 'RPAREN)) (begin (unget-char port #\)) '*eoi*) token))) (->command-list (parse port #:lex-hook stop-if-balanced #:open-bracket-hook incr-bracket-depth! #:close-bracket-hook decr-bracket-depth!))) (define* (read-sh/backquoted port #:key quoted?) "Read Shell code from @var{port} until the first unescaped backquote. If @var{quoted?} is set, treat the double quote character as escapable." (call-with-backquoted-input-port port (lambda (port) (->command-list (parse port))) #:quoted? quoted?)) (define* (read-sh #:optional (port (current-input-port))) "Read a complete Shell command from @var{port} (or the current input port if @var{port} is unspecified)." (define stop? #f) (define (stop!) (set! stop? #t)) (parse port #:lex-hook (lambda (lex) (if stop? '*eoi* (lex))) #:command-hook stop!)) (define* (read-sh-all #:optional (port (current-input-port))) "Read all complete Shell commands from @var{port} (or the current input port if @var{port} is unspecified)." (->command-list (parse port)))