722 lines
26 KiB
Scheme
722 lines
26 KiB
Scheme
;;; Gash -- Guile As SHell
|
||
;;; Copyright © 2017, 2018, 2020, 2021 Timothy Sample <samplet@ngyro.com>
|
||
;;;
|
||
;;; This file is part of Gash.
|
||
;;;
|
||
;;; Gash is free software: you can redistribute it and/or modify
|
||
;;; it under the terms of the GNU General Public License as published by
|
||
;;; the Free Software Foundation, either version 3 of the License, or
|
||
;;; (at your option) any later version.
|
||
;;;
|
||
;;; Gash is distributed in the hope that it will be useful,
|
||
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
;;; GNU General Public License for more details.
|
||
;;;
|
||
;;; You should have received a copy of the GNU General Public License
|
||
;;; along with Gash. If not, see <http://www.gnu.org/licenses/>.
|
||
|
||
(define-module (gash lexer)
|
||
#:use-module (gash compat textual-ports)
|
||
#:use-module (ice-9 match)
|
||
#:use-module (ice-9 rdelim)
|
||
#:use-module (ice-9 receive)
|
||
#:use-module (srfi srfi-1)
|
||
#:use-module (srfi srfi-9)
|
||
#:use-module (srfi srfi-26)
|
||
#:use-module (system base lalr)
|
||
#:export (read-bracketed-command
|
||
read-backquoted-command
|
||
get-token
|
||
get-here-end
|
||
get-here-doc))
|
||
|
||
;;; Commentary:
|
||
;;;
|
||
;;; This module contains the lexer for the Shell language.
|
||
;;;
|
||
;;; Code:
|
||
|
||
(define-record-type <port-location>
|
||
(make-port-location filename line column offset)
|
||
port-location?
|
||
(filename port-location-filename)
|
||
(line port-location-line)
|
||
(column port-location-column)
|
||
(offset port-location-offset))
|
||
|
||
(define (port->port-location port)
|
||
"Create a <port-location> from a port"
|
||
(make-port-location (port-filename port)
|
||
(port-line port)
|
||
(port-column port)
|
||
(false-if-exception (ftell port))))
|
||
|
||
(define (complete-source-location port-location len)
|
||
"Create a <source-location> by mixing a <port-location> and a length"
|
||
(make-source-location (port-location-filename port-location)
|
||
(port-location-line port-location)
|
||
(port-location-column port-location)
|
||
(port-location-offset port-location)
|
||
len))
|
||
|
||
(define *operators*
|
||
'(("&" . AND)
|
||
("|" . PIPE)
|
||
(";" . SEMI)
|
||
("<" . LESS)
|
||
(">" . GREAT)
|
||
("(" . LPAREN)
|
||
(")" . RPAREN)
|
||
("&&" . AND-IF)
|
||
("||" . OR-IF)
|
||
(";;" . DSEMI)
|
||
("<<" . DLESS)
|
||
(">>" . DGREAT)
|
||
("<&" . LESSAND)
|
||
(">&" . GREATAND)
|
||
("<>" . LESSGREAT)
|
||
("<<-" . DLESSDASH)
|
||
(">|" . CLOBBER)))
|
||
|
||
(define *reserved-words*
|
||
'(("!" . Bang)
|
||
("{" . Lbrace)
|
||
("}" . Rbrace)
|
||
("case" . Case)
|
||
("do" . Do)
|
||
("done" . Done)
|
||
("elif" . Elif)
|
||
("else" . Else)
|
||
("esac" . Esac)
|
||
("fi" . Fi)
|
||
("for" . For)
|
||
("if" . If)
|
||
("in" . In)
|
||
("then" . Then)
|
||
("until" . Until)
|
||
("while" . While)))
|
||
|
||
(define (operator-prefix? str)
|
||
(any (cut string-prefix? str <>) (map car *operators*)))
|
||
|
||
(define operator-prefix-char?
|
||
(let ((prefix-chars (delete-duplicates
|
||
(map (match-lambda
|
||
((str . _) (string-ref str 0)))
|
||
*operators*))))
|
||
(cut memv <> prefix-chars)))
|
||
|
||
(define blank? (cut char-set-contains? char-set:blank <>))
|
||
|
||
(define name-start-char?
|
||
(let ((char-set:name-start
|
||
(char-set-intersection char-set:ascii
|
||
(char-set-union char-set:letter
|
||
(char-set #\_)))))
|
||
(lambda (chr)
|
||
(and (char? chr)
|
||
(char-set-contains? char-set:name-start chr)))))
|
||
|
||
(define name-char?
|
||
(let ((char-set:name
|
||
(char-set-intersection char-set:ascii
|
||
(char-set-union char-set:letter+digit
|
||
(char-set #\_)))))
|
||
(lambda (chr)
|
||
(and (char? chr)
|
||
(char-set-contains? char-set:name chr)))))
|
||
|
||
(define (name? str)
|
||
(and (string? str)
|
||
(not (string-null? str))
|
||
(name-start-char? (string-ref str 0))
|
||
(string-every name-char? str)))
|
||
|
||
(define ascii-digit-char?
|
||
(let ((char-set:ascii-digit
|
||
(char-set-intersection char-set:ascii
|
||
char-set:digit)))
|
||
(lambda (chr)
|
||
(and (char? chr)
|
||
(char-set-contains? char-set:ascii-digit chr)))))
|
||
|
||
(define io-number?
|
||
(let ((char-set:ascii-digit (char-set-intersection char-set:ascii
|
||
char-set:digit)))
|
||
(lambda (str)
|
||
(and (string? str)
|
||
(string-every ascii-digit-char? str)))))
|
||
|
||
(define (assignment-word? word)
|
||
(define (name-then-=? str)
|
||
(match (string-index str #\=)
|
||
(#f #f)
|
||
(index (name? (substring str 0 index)))))
|
||
(match word
|
||
(((? string? str) . tail) (name-then-=? str))
|
||
((? string? str) (name-then-=? str))
|
||
(_ #f)))
|
||
|
||
(define (reserved-word? word)
|
||
(assoc word *reserved-words*))
|
||
|
||
(define (join-contiguous-strings lst)
|
||
"Join all contiguous strings in @var{lst}."
|
||
(fold-right (lambda (x lst)
|
||
(let ((head (if (null? lst) #f (car lst))))
|
||
(if (and (string? x) (string? head))
|
||
(cons (string-append x head) (cdr lst))
|
||
(cons x lst))))
|
||
'()
|
||
lst))
|
||
|
||
(define-inlinable (next-char port)
|
||
"Advance @var{port} by one character and return the lookahead
|
||
character."
|
||
(get-char port)
|
||
(lookahead-char port))
|
||
|
||
(define read-bracketed-command
|
||
;; A procedure for reading a bracketed command (e.g, "$(command)").
|
||
;; This is parameterized to avoid a circular dependency.
|
||
(make-parameter (lambda (port) (throw 'bracketed-command-parser-unset))))
|
||
|
||
(define read-backquoted-command
|
||
;; A procedure for reading a backquoted command (e.g, "`command`").
|
||
;; This is parameterized to avoid a circular dependency.
|
||
(make-parameter (lambda* (port #:key quoted?)
|
||
(throw 'backquoted-command-parser-unset))))
|
||
|
||
(define* (get-parameter port #:key (multidigit? #f))
|
||
"Get a parameter name (excluding the leading '$') from @var{port}.
|
||
If @var{multidigit?} is true, treat strings of numbers as a valid
|
||
name. If a valid parameter name cannot be read from @var{port},
|
||
nothing will be read and @code{#f} will be returned."
|
||
(match (lookahead-char port)
|
||
;; Special parameter names
|
||
((or #\@ #\* #\# #\? #\- #\$ #\! #\0)
|
||
(string (get-char port)))
|
||
|
||
;; Numeric parameter names (excluding "0")
|
||
((? ascii-digit-char? digit)
|
||
(if multidigit?
|
||
(let loop ((chr (next-char port)) (acc `(,digit)))
|
||
(match chr
|
||
((? ascii-digit-char?) (loop (next-char port) (cons chr acc)))
|
||
(_ (list->string (reverse! acc)))))
|
||
(string (get-char port))))
|
||
|
||
;; Regular names ("[a-zA-Z_][a-zA-Z0-9_]*").
|
||
((? name-start-char? start-chr)
|
||
(let loop ((chr (next-char port)) (acc `(,start-chr)))
|
||
(match chr
|
||
((? name-char?) (loop (next-char port) (cons chr acc)))
|
||
(_ (list->string (reverse! acc))))))
|
||
;; Not a parameter name.
|
||
(_ #f)))
|
||
|
||
(define *parameter-operators*
|
||
;; Associate Scheme-like names to all of the Shell parameter
|
||
;; operators. Note that "#" means the infix version and not the
|
||
;; prefixed "#", which means "length".
|
||
'(("-" . <sh-ref-or>)
|
||
(":-" . <sh-ref-or*>)
|
||
("=" . <sh-ref-or!>)
|
||
(":=" . <sh-ref-or!*>)
|
||
("?" . <sh-ref-assert>)
|
||
(":?" . <sh-ref-assert*>)
|
||
("+" . <sh-ref-and>)
|
||
(":+" . <sh-ref-and*>)
|
||
("%" . <sh-ref-except-min>)
|
||
("%%" . <sh-ref-except-max>)
|
||
("#" . <sh-ref-skip-min>)
|
||
("##" . <sh-ref-skip-max>)))
|
||
|
||
(define (try-get-parameter-operator port)
|
||
"Try to get a Shell parameter operator from @var{port}. Upon failure,
|
||
return #f and use 'unget-char' to return at most one character back to
|
||
@var{port}."
|
||
(match (lookahead-char port)
|
||
((or #\- #\= #\? #\+) (string (get-char port)))
|
||
(#\: (match (next-char port)
|
||
((or #\- #\= #\? #\+) (string #\: (get-char port)))
|
||
(_ (unget-char port #\:)
|
||
#f)))
|
||
(#\% (match (next-char port)
|
||
(#\% (string #\% (get-char port)))
|
||
(_ "%")))
|
||
(#\# (match (next-char port)
|
||
(#\# (string #\# (get-char port)))
|
||
(_ "#")))
|
||
(_ #f)))
|
||
|
||
(define (get-parameter-word port)
|
||
"Get a parameter word (the bit that comes after the operator in a
|
||
parameter expression) from @var{port}."
|
||
|
||
(define (get-parameter-word-string port)
|
||
(let loop ((chr (lookahead-char port)) (acc '()))
|
||
(match chr
|
||
((? eof-object?) (throw 'lex-error))
|
||
((or #\{ #\} #\" #\' #\$ #\` #\\) (list->string (reverse! acc)))
|
||
(_ (loop (next-char port) (cons chr acc))))))
|
||
|
||
(let loop ((chr (lookahead-char port)) (brace-balance 0) (acc '()))
|
||
(match chr
|
||
((or #\$ #\`) (let ((expansion (get-expansion port)))
|
||
(loop (lookahead-char port)
|
||
brace-balance
|
||
(cons (or expansion (string chr)) acc))))
|
||
(#\\ (let ((escape (get-escape port)))
|
||
(loop (lookahead-char port) brace-balance (append escape acc ))))
|
||
(#\' (let ((quotation (get-single-quotation port)))
|
||
(loop (lookahead-char port) brace-balance (cons quotation acc))))
|
||
(#\" (let ((quotation (get-double-quotation port)))
|
||
(loop (lookahead-char port) brace-balance (cons quotation acc))))
|
||
(#\{ (loop (next-char port) (+ brace-balance 1) (cons "{" acc)))
|
||
(#\} (if (= brace-balance 0)
|
||
(match (join-contiguous-strings (reverse! acc))
|
||
((str) str)
|
||
(x x))
|
||
(loop (next-char port) (- brace-balance 1) (cons "}" acc))))
|
||
(_ (let ((str (get-parameter-word-string port)))
|
||
(loop (lookahead-char port)
|
||
brace-balance
|
||
(if (not (string-null? str))
|
||
(cons str acc)
|
||
acc)))))))
|
||
|
||
(define (get-parameter-expression port)
|
||
"Get a parameter expression ('${...}') from @var{port} (excluding the
|
||
leading '$')."
|
||
(match (get-char port)
|
||
(#\{ (match (lookahead-char port)
|
||
(#\#
|
||
(get-char port)
|
||
(let ((parameter (get-parameter port #:multidigit? #t)))
|
||
(match (get-char port)
|
||
(#\} `(<sh-ref-length> ,parameter)))))
|
||
(_
|
||
(let* ((parameter (get-parameter port #:multidigit? #t))
|
||
(operator (assoc-ref *parameter-operators*
|
||
(try-get-parameter-operator port)))
|
||
(word (if operator
|
||
(match (lookahead-char port)
|
||
(#\} #f)
|
||
(_ (get-parameter-word port)))
|
||
#f)))
|
||
(match (get-char port)
|
||
(#\} (match `(,parameter ,operator ,word)
|
||
((p #f #f) `(<sh-ref> ,p))
|
||
((p o #f) `(,o ,p #f))
|
||
((p o w) `(,o ,p ,w)))))))))))
|
||
|
||
(define (get-parameter-expansion port)
|
||
"Get a parameter expansion (either '$name' or '${...}') from
|
||
@var{port} (excluding the leading '$')."
|
||
(let ((result (match (lookahead-char port)
|
||
(#\{ (get-parameter-expression port))
|
||
(_ (and=> (get-parameter port)
|
||
(lambda (name)
|
||
`(<sh-ref> ,name)))))))
|
||
(match result
|
||
((op "LINENO") `(,op ("LINENO" . ,(1+ (port-line port)))))
|
||
((op "LINENO" x) `(,op ("LINENO" . ,(1+ (port-line port))) ,x))
|
||
(_ result))))
|
||
|
||
(define (get-arithmetic-expansion port)
|
||
"Get an arithmetic expansion from @var{port}."
|
||
|
||
(define (get-arithmetic-expansion-string port)
|
||
(let loop ((chr (lookahead-char port)) (acc '()))
|
||
(match chr
|
||
((? eof-object?) (throw 'lex-error))
|
||
((or #\( #\) #\$ #\` #\\) (list->string (reverse! acc)))
|
||
(_ (loop (next-char port) (cons chr acc))))))
|
||
|
||
(match (list (get-char port) (get-char port))
|
||
((#\( #\()
|
||
(let loop ((chr (lookahead-char port)) (depth 0) (acc '()))
|
||
(match chr
|
||
(#\( (loop (next-char port) (1+ depth) (cons "(" acc)))
|
||
(#\) (cond
|
||
((and (zero? depth) (equal? (next-char port) #\)))
|
||
(get-char port)
|
||
`(<sh-arithmetic>
|
||
,(match (join-contiguous-strings (reverse! acc))
|
||
(() "")
|
||
((word) word)
|
||
(words words))))
|
||
((positive? depth)
|
||
(loop (next-char port) (1- depth) (cons ")" acc)))
|
||
(else
|
||
(throw 'lex-error))))
|
||
((or #\$ #\`)
|
||
(let ((expansion (get-expansion port)))
|
||
(loop (lookahead-char port)
|
||
depth
|
||
(cons (or expansion (string chr)) acc))))
|
||
(#\\ (let ((escape (get-escape port
|
||
(cut member <> '(#\$ #\` #\\)))))
|
||
(loop (lookahead-char port) depth (append escape acc))))
|
||
(_ (let ((str (get-arithmetic-expansion-string port)))
|
||
(loop (lookahead-char port)
|
||
depth
|
||
(if (not (string-null? str))
|
||
(cons str acc)
|
||
acc)))))))))
|
||
|
||
(define (get-bracketed-command port)
|
||
"Get a bracketed command ('$(...)') from @var{port} (excluding the
|
||
leading '$')."
|
||
(match (get-char port)
|
||
(#\(
|
||
(let ((result ((read-bracketed-command) port)))
|
||
(match (get-char port)
|
||
(#\) `(<sh-cmd-sub> ,@result)))))))
|
||
|
||
(define* (get-backquoted-command port #:key quoted?)
|
||
"Get a backquoted command ('`...`') from @var{port}. If
|
||
@var{quoted?} is set, treat the backquoted command as if it were
|
||
quoted."
|
||
(match (get-char port)
|
||
(#\`
|
||
(let ((result ((read-backquoted-command) port #:quoted? quoted?)))
|
||
(match (get-char port)
|
||
(#\` `(<sh-cmd-sub> ,@result)))))))
|
||
|
||
(define* (get-expansion port #:key quoted?)
|
||
"Get an expansion ('$name', '${...}', '$(...)', or '`...`') from
|
||
@var{port}. If @var{quoted?} is set, treat backquoted commands as if
|
||
they were quoted."
|
||
(match (lookahead-char port)
|
||
(#\$ (begin
|
||
(get-char port)
|
||
(match (lookahead-char port)
|
||
(#\( (let ((next (next-char port)))
|
||
(unget-char port #\()
|
||
(match next
|
||
(#\( (get-arithmetic-expansion port))
|
||
(_ (get-bracketed-command port)))))
|
||
(_ (get-parameter-expansion port)))))
|
||
(#\` (get-backquoted-command port #:quoted? quoted?))))
|
||
|
||
;; When this parameter is true, expansion processing is enabled.
|
||
(define expansions? (make-parameter #t))
|
||
|
||
(define* (get-escape port #:optional (pred (lambda _ #t)))
|
||
"Get an escape sequence ('\\x') from @var{port}. If @var{pred} is set,
|
||
then the backslash will be treated as a literal backslash unless the
|
||
next character statisfies @var{pred} (or is a newline)."
|
||
(match (get-char port)
|
||
(#\\
|
||
(let ((chr (lookahead-char port)))
|
||
(match chr
|
||
(#\newline (begin (get-char port) '()))
|
||
((and (? char?) (? pred)) (begin (get-char port)
|
||
`((<sh-quote> ,(string chr)))))
|
||
(_ `(,(string #\\))))))))
|
||
|
||
(define (get-single-quotation port)
|
||
"Get a single-quote wrapped string from @var{port}."
|
||
(match (get-char port)
|
||
(#\'
|
||
(let loop ((chr (get-char port)) (acc '()))
|
||
(match chr
|
||
((? eof-object?) (throw 'lex-error))
|
||
(#\' `(<sh-quote> ,(list->string (reverse! acc))))
|
||
(x (loop (get-char port) (cons x acc))))))))
|
||
|
||
(define (get-double-quotation port)
|
||
"Get a double-quote wrapped string from @var{port}."
|
||
|
||
(define (get-double-quotation-string port)
|
||
(let loop ((chr (lookahead-char port)) (acc '()))
|
||
(match chr
|
||
((? eof-object?) (throw 'lex-error))
|
||
((or #\" #\$ #\` #\\) (list->string (reverse! acc)))
|
||
(_ (loop (next-char port) (cons chr acc))))))
|
||
|
||
(match (get-char port)
|
||
(#\"
|
||
(let loop ((chr (lookahead-char port)) (acc '()))
|
||
(match chr
|
||
(#\" (begin
|
||
(get-char port)
|
||
`(<sh-quote> ,(match (join-contiguous-strings (reverse! acc))
|
||
(() "")
|
||
((word) word)
|
||
(words words)))))
|
||
((or #\$ #\`)
|
||
(if (expansions?)
|
||
(let ((expansion (get-expansion port #:quoted? #t)))
|
||
(loop (lookahead-char port)
|
||
(cons (or expansion (string chr)) acc)))
|
||
(loop (next-char port) (cons (string chr) acc))))
|
||
(#\\ (let ((escape (get-escape port
|
||
(cut member <> '(#\" #\$ #\` #\\)))))
|
||
(loop (lookahead-char port) (append escape acc))))
|
||
(_ (let ((str (get-double-quotation-string port)))
|
||
(loop (lookahead-char port) (if (not (string-null? str))
|
||
(cons str acc)
|
||
acc)))))))))
|
||
|
||
(define (get-operator port)
|
||
"Get an operator from @var{port}."
|
||
(let loop ((chr (lookahead-char port)) (acc '()))
|
||
(if (and (not (eof-object? chr))
|
||
(operator-prefix? (list->string (reverse (cons chr acc)))))
|
||
(loop (next-char port) (cons chr acc))
|
||
(let ((operator (list->string (reverse! acc))))
|
||
`(,(assoc-ref *operators* operator) . ,operator)))))
|
||
|
||
(define (get-word port)
|
||
"Get a word (name, keyword, number, etc.) from @var{port}."
|
||
|
||
(define (get-word-string port)
|
||
(let loop ((chr (lookahead-char port)) (acc '()))
|
||
(match chr
|
||
((or (? eof-object?)
|
||
(? operator-prefix-char?)
|
||
(? blank?)
|
||
#\newline
|
||
#\$ #\` #\' #\" #\\) (list->string (reverse! acc)))
|
||
(_ (loop (next-char port) (cons chr acc))))))
|
||
|
||
(define* (acc->token acc #:optional (delimiter #f))
|
||
(match (join-contiguous-strings (reverse! acc))
|
||
((str) (match str
|
||
((? io-number?) (if (member delimiter '(#\< #\>))
|
||
`(IO-NUMBER . ,str)
|
||
`(WORD . ,str)))
|
||
((? reserved-word?) `(,(assoc-ref *reserved-words* str) . ,str))
|
||
((? name?) `(NAME . ,str))
|
||
((? assignment-word?) `(ASSIGNMENT-WORD . ,str))
|
||
(_ `(WORD . ,str))))
|
||
(lst (match lst
|
||
((? assignment-word?) `(ASSIGNMENT-WORD . ,lst))
|
||
(_ `(WORD . ,lst))))))
|
||
|
||
(let loop ((chr (lookahead-char port)) (acc '()))
|
||
(match chr
|
||
((or (? eof-object?)
|
||
(? operator-prefix-char?)
|
||
(? blank?)
|
||
#\newline) (acc->token acc chr))
|
||
((or #\$ #\`)
|
||
(if (expansions?)
|
||
(let ((expansion (get-expansion port)))
|
||
(loop (lookahead-char port)
|
||
(cons (or expansion (string chr)) acc)))
|
||
(loop (next-char port) (cons (string chr) acc))))
|
||
(#\\ (let ((escape (get-escape port)))
|
||
(loop (lookahead-char port) (append escape acc))))
|
||
(#\' (let ((quotation (get-single-quotation port)))
|
||
(loop (lookahead-char port) (cons quotation acc))))
|
||
(#\" (let ((quotation (get-double-quotation port)))
|
||
(loop (lookahead-char port) (cons quotation acc))))
|
||
(_ (let ((str (get-word-string port)))
|
||
(loop (lookahead-char port) (if (not (string-null? str))
|
||
(cons str acc)
|
||
acc)))))))
|
||
|
||
(define (call-with-metered-input-port port proc)
|
||
"Call @var{proc} with @var{port} instrumented to count the number of
|
||
characters read."
|
||
(define meter 0)
|
||
(define wrapped-port
|
||
(make-soft-port
|
||
(vector
|
||
;; put-char, put-string, and flush-output-port
|
||
#f #f #f
|
||
;; get-char
|
||
(lambda ()
|
||
(set! meter (+ meter 1))
|
||
(get-char port))
|
||
;; close-port
|
||
#f)
|
||
"r"))
|
||
(set-port-line! wrapped-port (port-line port))
|
||
(let ((result (proc wrapped-port)))
|
||
;; The soft port seems to have an independent buffer from the
|
||
;; input port. This means that the "lookahead" character on the
|
||
;; soft port will disappear unless we move it back to the input
|
||
;; port.
|
||
(when (char? (lookahead-char wrapped-port))
|
||
(unget-char port (lookahead-char wrapped-port)))
|
||
(values meter result)))
|
||
|
||
(define (get-token->get-lexical-token proc)
|
||
"Convert @var{proc} from a procedure that returns a token-value pair
|
||
to a procedure that returns a lexical token to be consumed by the LALR
|
||
module."
|
||
(lambda (port)
|
||
(let ((port-location (port->port-location port)))
|
||
(receive (length token)
|
||
(call-with-metered-input-port port proc)
|
||
(match token
|
||
((category . value)
|
||
(make-lexical-token
|
||
category
|
||
;; We must use "length - 1" since proc will get a final
|
||
;; delimiting character that is not part of the token.
|
||
(complete-source-location port-location (- length 1))
|
||
value)))))))
|
||
|
||
(define get-operator-lexical-token
|
||
(get-token->get-lexical-token get-operator))
|
||
|
||
(define get-word-lexical-token
|
||
(get-token->get-lexical-token get-word))
|
||
|
||
(define (get-newline-lexical-token port)
|
||
"Get a newline as a lexical token to be consumed by the LALR module."
|
||
(let ((port-location (port->port-location port)))
|
||
(match (get-char port)
|
||
(#\newline (make-lexical-token
|
||
'NEWLINE
|
||
(complete-source-location port-location 1)
|
||
#\newline)))))
|
||
|
||
(define (skip-to-end-of-line port)
|
||
"Skip characters from @var{port} until the next character to be read
|
||
is a newline (or EOF)."
|
||
(let loop ((chr (lookahead-char port)))
|
||
(match chr
|
||
((or (? eof-object?) #\newline) #f)
|
||
(_ (loop (next-char port))))))
|
||
|
||
(define (get-token port)
|
||
"Get the next lexical token from @var{port}."
|
||
(let loop ((chr (lookahead-char port)))
|
||
(match chr
|
||
((? eof-object?) '*eoi*)
|
||
((? operator-prefix-char?) (get-operator-lexical-token port))
|
||
((? blank?) (loop (next-char port)))
|
||
(#\# (begin
|
||
(skip-to-end-of-line port)
|
||
(loop (lookahead-char port))))
|
||
(#\newline (get-newline-lexical-token port))
|
||
(#\\ (match (next-char port)
|
||
(#\newline (loop (next-char port)))
|
||
(_ (unget-char port #\\)
|
||
(get-word-lexical-token port))))
|
||
(_ (get-word-lexical-token port)))))
|
||
|
||
|
||
;;; Here-documents.
|
||
|
||
(define (get-here-end port)
|
||
"Get the next lexical token from @var{port}, using the special rules
|
||
for lexing a here-end word. Namely, do not treat expansions
|
||
(parameters, command substitutions, etc.) specially."
|
||
(parameterize ((expansions? #f))
|
||
(get-token port)))
|
||
|
||
(define (get-quoted-here-doc end port)
|
||
"Get a quoted here-document string from @var{port}, where @var{end}
|
||
marks the end of the here-document."
|
||
(let loop ((line (read-line port 'concat)) (acc '()))
|
||
(if (eof-object? line)
|
||
;; XXX: Following Bash, we should issue a warning here.
|
||
`(<sh-quote> ,(string-concatenate-reverse acc))
|
||
(let ((line* (string-trim-right line #\newline)))
|
||
(if (string=? line* end)
|
||
`(<sh-quote> ,(string-concatenate-reverse acc))
|
||
(loop (read-line port 'concat) (cons line acc)))))))
|
||
|
||
(define (get-unquoted-here-doc end port)
|
||
"Get an unquoted here-document string from @var{port}, where
|
||
@var{end} marks the end of the here-document."
|
||
|
||
(define end-list (string->list end))
|
||
|
||
(define (get-unquoted-here-doc-string port)
|
||
(let loop ((chr (lookahead-char port)) (acc '()))
|
||
(match chr
|
||
((or #\$ #\` #\\ (? eof-object?)) (list->string (reverse! acc)))
|
||
(#\newline (list->string (reverse! (cons (get-char port) acc))))
|
||
(_ (loop (next-char port) (cons chr acc))))))
|
||
|
||
(let loop ((chr (lookahead-char port))
|
||
(end end-list)
|
||
(end-acc '())
|
||
(acc '()))
|
||
(cond
|
||
;; We've read the end string and are looking at newline or EOF.
|
||
((and (null? end)
|
||
(or (eof-object? chr)
|
||
(char=? chr #\newline)))
|
||
(get-char port)
|
||
`(<sh-quote> ,(match (join-contiguous-strings (reverse! acc))
|
||
((word) word)
|
||
(words words))))
|
||
;; We've hit EOF prematurely.
|
||
((eof-object? chr)
|
||
;; XXX: Following Bash, we should issue a warning here.
|
||
(let* ((end-str (list->string (reverse! end-acc)))
|
||
(acc (if (string-null? end-str) acc (cons end-str acc))))
|
||
`(<sh-quote> ,(match (join-contiguous-strings (reverse! acc))
|
||
((word) word)
|
||
(words words)))))
|
||
;; We've read another character from the end string.
|
||
((and (pair? end)
|
||
(char=? (car end) chr))
|
||
(loop (next-char port) (cdr end) (cons chr end-acc) acc))
|
||
;; We've read a non-end-string character, and have some
|
||
;; characters from the end string already read.
|
||
((pair? end-acc)
|
||
(loop chr #f '() (cons (list->string (reverse! end-acc)) acc)))
|
||
;; We've nothing to do with the end string.
|
||
(else
|
||
(match chr
|
||
((or #\$ #\`) (let ((expansion (get-expansion port)))
|
||
(loop (lookahead-char port) #f '()
|
||
(cons (or expansion (string chr)) acc))))
|
||
(#\\ (let ((escape (get-escape port (cut member <> '(#\$ #\` #\\)))))
|
||
(loop (lookahead-char port) #f '() (append escape acc))))
|
||
(_ (let ((str (get-unquoted-here-doc-string port)))
|
||
(loop (lookahead-char port) end-list '()
|
||
(if (not (string-null? str))
|
||
(cons str acc)
|
||
acc)))))))))
|
||
|
||
(define (wrap-port-with-tab-trimming port)
|
||
"Wrap @var{port} filtering out all tabs that occur at the beginning
|
||
of a line."
|
||
(define after-newline? #t)
|
||
(make-soft-port
|
||
(vector
|
||
;; put-char, put-string, and flush-output-port
|
||
#f #f #f
|
||
;; get-char
|
||
(lambda ()
|
||
(if after-newline?
|
||
(let loop ((chr (get-char port)))
|
||
(match chr
|
||
(#\tab (loop (get-char port)))
|
||
(#\newline chr)
|
||
(_ (set! after-newline? #f)
|
||
chr)))
|
||
(match (get-char port)
|
||
(#\newline
|
||
(set! after-newline? #t)
|
||
#\newline)
|
||
(chr chr))))
|
||
;; close-port
|
||
#f)
|
||
"r"))
|
||
|
||
(define* (get-here-doc end port #:key (trim-tabs? #f) (quoted? #f))
|
||
"Get a here-document token from @var{port}, using @var{end} to
|
||
signal the ending. If @var{trim-tabs?} is set, remove leading tabs
|
||
from each line. If @var{quoted?} is set, ignore substitutions."
|
||
((get-token->get-lexical-token
|
||
(lambda (port)
|
||
(let ((port (if trim-tabs? (wrap-port-with-tab-trimming port) port)))
|
||
`(HERE-DOC . ,(if quoted?
|
||
(get-quoted-here-doc end port)
|
||
(get-unquoted-here-doc end port))))))
|
||
port))
|