sed: Fully parse commands before processing them.

This commit is contained in:
Timothy Sample 2018-12-09 13:30:54 -05:00
parent 0568f73a21
commit 194c098ab7
3 changed files with 383 additions and 25 deletions

View File

@ -77,6 +77,8 @@ ${srcdest}gash/commands/tr.scm
${srcdest}gash/commands/wc.scm
${srcdest}gash/commands/which.scm
${srcdest}gash/commands/sed/reader.scm
"
SCRIPTS="

View File

@ -1,5 +1,6 @@
;;; Gash --- Guile As SHell
;;; Copyright © 2018 Jan (janneke) Nieuwenhuizen <janneke@gnu.org>
;;; Copyright © 2018 Timothy Sample <samplet@ngyro.com>
;;;
;;; This file is part of Gash.
;;;
@ -25,7 +26,9 @@
#:use-module (ice-9 match)
#:use-module (ice-9 receive)
#:use-module (ice-9 regex)
#:use-module (srfi srfi-26)
#:use-module (gash commands sed reader)
#:use-module (gash config)
#:use-module (gash guix-utils)
#:use-module (gash shell-utils)
@ -35,7 +38,7 @@
sed
))
(define (replace->lambda string modifiers)
(define (replace->lambda string global?)
(define (replace->string m s)
(list->string
(let loop ((lst (string->list string)))
@ -73,9 +76,51 @@
(let* ((refs (- (vector-length m) 2))
(replace (replace->string m string))
(replace (cons* replace (substring l o (match:start m)) r)))
(if (memq #\g modifiers) (loop rest (match:end m) replace)
(if global? (loop rest (match:end m) replace)
(loop '() (match:end m) replace))))))))
(define (replace-escapes str)
(let* ((str (string-replace-string str "\\n" "\n"))
(str (string-replace-string str "\\r" "\r"))
(str (string-replace-string str "\\t" "\t")))
str))
(define extended? (make-parameter #f))
(define (substitute str pattern replacement flags)
(let* ((global? (memq 'g flags))
(flags (cons (if (extended?) regexp/extended regexp/basic)
(if (memq 'i flags) `(,regexp/icase) '())))
(regexp (apply make-regexp (replace-escapes pattern) flags))
(proc (replace->lambda (replace-escapes replacement) global?)))
(match (list-matches regexp str)
((and m+ (_ _ ...)) (proc str m+))
(_ str))))
(define (execute-function function str)
(match function
(('s pattern replacement flags)
(substitute str pattern replacement flags))
(_ (error "SED: unsupported function" function))))
(define (execute-commands commands str)
(match commands
(() str)
((('always function) . rest)
(execute-commands rest (execute-function function str)))
((cmd . rest) (error "SED: could not process command" cmd))))
(define* (edit-stream commands #:optional
(in (current-input-port))
(out (current-output-port)))
(let loop ((pattern-space (read-line in)))
(unless (eof-object? pattern-space)
(let ((result (execute-commands commands pattern-space)))
(display result out)
(newline out)
(loop (read-line in))))
#t))
(define (sed . args)
(let* ((option-spec
'((expression (single-char #\e) (value #t))
@ -87,12 +132,13 @@
(version (single-char #\V))))
(options (getopt-long args option-spec))
(files (option-ref options '() '()))
(extended? (or (option-ref options 'extended #f)
(option-ref options 'posix-extended #f)))
(help? (option-ref options 'help #f))
(in-place? (option-ref options 'in-place #f))
(usage? (and (not help?) (or (and (null? files) (isatty? (current-input-port))))))
(version? (option-ref options 'version #f)))
(when (or (option-ref options 'extended #f)
(option-ref options 'posix-extended #f))
(extended? #t))
(cond (version? (format #t "sed (GASH) ~a\n" %version) (exit 0))
((or help? usage?) (format (if usage? (current-error-port) #t)
"\
@ -111,33 +157,21 @@ Usage: sed [OPTION]... [SCRIPT] [FILE]...
(receive (scripts files)
(if (pair? (append script-files scripts)) (values scripts files)
(values (list-head files 1) (cdr files)))
(define (script->command o)
(cond ((string-prefix? "s" o)
(let* ((command (substring o 1))
(string (substring command 1))
(string (string-replace-string string "\\n" "\n"))
(string (string-replace-string string "\\r" "\r"))
(string (string-replace-string string "\\t" "\t"))
(separator (string-ref command 0)))
(receive (search replace modifier-string)
(apply values (string-split string separator))
(let* ((modifiers (string->list modifier-string))
(flags (if extended? (list regexp/extended) (list regexp/basic)))
(flags (if (memq #\i modifiers) (cons regexp/icase flags)
flags)))
`((,search . ,flags) . ,(replace->lambda replace modifiers))))))
(else (error (format #f "SED: command not supported: ~s\n" o)))))
(when (pair? script-files)
(error "SED: script files not supported"))
(let ((commands (map script->command scripts)))
(let* ((script (string-join scripts "\n"))
(commands
(call-with-input-string script
(cut read-sed-all <> #:extended? (extended?)))))
(cond ((and in-place? (pair? files))
(for-each (lambda (file) (substitute* file commands)) files))
(with-atomic-file-replacement
(cut edit-stream commands <> <>)))
((pair? files)
(for-each (lambda (file)
(with-input-from-file file
(lambda _ (substitute-port commands))))
(call-with-input-file file
(cut edit-stream commands <>)))
files))
(else (substitute-port commands))))))))))
(else (edit-stream commands))))))))))
(use-modules (ice-9 rdelim))
(define main sed)

View File

@ -0,0 +1,322 @@
;;; Gash --- Guile As SHell
;;; Copyright © 2018 Timothy Sample <samplet@ngyro.com>
;;;
;;; This file is part of Gash.
;;;
;;; Gash is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; Gash is distributed in the hope that it will be useful, but WITHOUT ANY
;;; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
;;; FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
;;; details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with Gash. If not, see <http://www.gnu.org/licenses/>.
(define-module (gash commands sed reader)
#:use-module (ice-9 match)
#:use-module (ice-9 rdelim)
#:use-module (rnrs io ports)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-26)
#:export (read-sed
read-sed-all))
;;; Commentary:
;;;
;;; This module provides a reader for the `sed' stream editing
;;; language.
;;;
;;; Code:
(define (next-char port)
"Discard one character from PORT, and return the next character to
be read."
(get-char port)
(lookahead-char port))
(define (get-char-while cs port)
"Read text from PORT until a character is found that does not belong
to the character set CS."
(let loop ((chr (lookahead-char port)) (acc '()))
(if (or (eof-object? chr)
(not (char-set-contains? cs chr)))
(reverse-list->string acc)
(loop (next-char port) (cons chr acc)))))
(define (read-number port)
"Read a nonnegative integer from PORT."
(let* ((str (get-char-while char-set:digit port))
(n (string->number str)))
(unless n
(error "Expected a number"))
n))
(define (read-bracket-expression port)
"Read a regular expression bracket expression from PORT,
assuming that it is positioned just after the initial open
bracket (`['). Return as a string the complete bracket expression,
including both brackets.
This procedure takes into account all the ways that a close
bracket (`]') may occur in a bracket expression without terminating
it, such as named character classes and backslash escapes."
(define (read-until-pair chr1 chr2 port)
(let loop ((chunk (read-delimited chr1 port 'concat)) (acc '()))
(unless (and (not (string-null? chunk))
(char=? (string-ref chunk (1- (string-length chunk)))
chr1))
(error "Unterminated bracket expression"))
(if (char=? (lookahead-char port) chr2)
(string->list (string-concatenate (reverse! acc)))
(loop (read-delimited chr1 port 'concat) (cons chunk acc)))))
(define (read-rest)
(let loop ((chr (get-char port)) (acc '()))
(match chr
((? eof-object?) (error "Unterminated bracket expression"))
(#\] (reverse-list->string (cons #\] acc)))
(#\[ (match (get-char port)
((? eof-object?) (error "Unterminated bracket expression"))
((and cc (or #\= #\. #\:))
(let ((class (read-until-pair cc #\] port)))
(loop (get-char port) (append-reverse class acc))))
(chr (loop (get-char port) (cons* chr #\[ acc)))))
(#\\ (match (get-char port)
((? eof-object?) (error "Unterminated bracket expression"))
(chr (loop (get-char port) (cons* chr #\\ acc)))))
(chr (loop (get-char port) (cons chr acc))))))
(match (lookahead-char port)
(#\^ (match (next-char port)
(#\] (get-char port) (string-append "[^]" (read-rest)))
(_ (string-append "[^" (read-rest)))))
(#\] (get-char port) (string-append "[]" (read-rest)))
(_ (string-append "[" (read-rest)))))
(define %extended? (make-parameter #f))
(define (read-re-until delim port)
"Read text from PORT as a regular expression until encountering the
delimiting character DELIM. Return the text of the regular expression
with the trailing delimiter discarded.
This procedure takes into account the ways that the delimiter could
appear in the regular expression without ending it, such as in a
bracket expression or capture group. It order to determine what
constitutes a capture group, it uses the `%extended?' parameter."
(let loop ((chr (lookahead-char port)) (depth 0) (acc '()))
(cond
((eof-object? chr)
(error "Unterminated regular expression"))
((char=? chr #\[)
(get-char port)
(let* ((be (read-bracket-expression port))
(be-chars (string->list be)))
(loop (lookahead-char port) depth (append-reverse! be-chars acc))))
((and (%extended?) (char=? chr #\())
(loop (next-char port) (1+ depth) (cons #\( acc)))
((and (%extended?) (char=? chr #\)))
(loop (next-char port) (1- depth) (cons #\) acc)))
((char=? chr #\\)
(if (%extended?)
(match (next-char port)
((? eof-object?) (error "Unterminated regular expression"))
(nchr (loop (next-char port) depth (cons* nchr chr acc))))
(match (next-char port)
((? eof-object?) (error "Unterminated regular expression"))
(#\( (loop (next-char port) (1+ depth) (cons* #\( chr acc)))
(#\) (loop (next-char port) (1- depth) (cons* #\) chr acc)))
(nchr (loop (next-char port) depth (cons* nchr chr acc))))))
((and (= depth 0)
(char=? chr delim))
(get-char port)
(reverse-list->string acc))
(else (loop (next-char port) depth (cons chr acc))))))
(define (read-string-until delim port)
"Read text from PORT until encountering the character DELIM,
taking into account escaping with backslashes (`\\')."
(let loop ((chr (lookahead-char port)) (acc '()))
(cond
((eof-object? chr) (error "Unterminated string"))
((char=? chr #\\)
(let ((next-chr (next-char port)))
(if (eof-object? next-chr)
(error "Unterminated string")
(loop (next-char port) (cons* next-chr chr acc)))))
((and (char=? chr delim))
(get-char port)
(reverse-list->string acc))
(else (loop (next-char port) (cons chr acc))))))
(define (read-re port)
"Read a delimited regular expression from PORT."
(let ((delim (get-char port)))
(if (eof-object? delim)
(error "Expected regular expression")
(read-re-until delim port))))
(define (read-re+string port)
"Read a delimited regular expression and a replacement string from
PORT."
(let ((delim (get-char port)))
(if (eof-object? delim)
(error "Expected regular expression and replacement")
(let* ((re (read-re-until delim port))
(str (read-string-until delim port)))
`(,re . ,str)))))
(define (read-string+string port)
"Read two delimited strings from PORT."
(let ((delim (get-char port)))
(if (eof-object? delim)
(error "Expected characters and their replacements")
(let* ((str1 (read-string-until delim port))
(str2 (read-string-until delim port)))
`(,str1 . ,str2)))))
(define (read-text port)
"Read text from PORT until either an unescaped newline or end of
file is encountered."
(get-char-while char-set:whitespace port)
(let loop ((chr (get-char port)) (acc '()))
(match chr
((or (? eof-object?)
#\newline)
(reverse-list->string acc))
(#\\
(let ((next-chr (get-char port)))
(if (eof-object? next-chr)
(error "Unterminated text")
(loop (get-char port) (cons next-chr acc)))))
(_ (loop (get-char port) (cons chr acc))))))
(define char-set:label
(string->char-set
(string-append "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789._-")))
(define label-char? (cut char-set-contains? char-set:label <>))
(define (read-label port)
"Read a label from PORT."
(get-char-while char-set:whitespace port)
(get-char-while char-set:label port))
(define (read-flags port)
"Read flags (for the `s' command) from PORT."
(let loop ((chr (lookahead-char port)) (acc '()))
(match chr
((? eof-object?) (reverse! acc))
(#\g (loop (next-char port) (cons 'g acc)))
(#\i (loop (next-char port) (cons 'i acc)))
(#\p (loop (next-char port) (cons 'p acc)))
((? (cut char-set-contains? char-set:digit <>))
(let ((n (read-number port)))
(loop (lookahead-char port) (cons n acc))))
(#\w
(get-char port)
(let ((filename (read-text port)))
(reverse! (cons `(w ,filename) acc))))
(_ (reverse! acc)))))
(define (read-address port)
"Read an address from PORT."
(match (lookahead-char port)
(#\$ '$)
((? (cut char-set-contains? char-set:digit <>)) (read-number port))
(_ (read-re port))))
(define* (read-function port #:key (depth 0))
"Read a function and its arguments from PORT."
(get-char-while char-set:whitespace port)
(match (get-char port)
(#\{ `(begin ,@(%read-sed-all port #:depth (1+ depth))))
(#\a `(a ,(read-text port)))
(#\b `(b ,(read-label port)))
(#\c `(c ,(read-text port)))
(#\d '(d))
(#\D '(D))
(#\g '(g))
(#\G '(G))
(#\h '(h))
(#\H '(H))
(#\i `(i ,(read-text port)))
(#\l '(l))
(#\n '(n))
(#\N '(N))
(#\p '(p))
(#\P '(P))
(#\q '(q))
(#\r `(r ,(read-text port)))
(#\s (match-let (((re . str) (read-re+string port)))
`(s ,re ,str ,(read-flags port))))
(#\t `(t ,(read-label port)))
(#\w `(w ,(read-text port)))
(#\x '(x))
(#\y (match-let (((str1 . str2) (read-string+string port)))
`(y ,str1 ,str2)))
(#\: `(: ,(read-label port)))
(#\= `(= ,(1+ (port-line port))))
(#\# `(comment ,(read-line port)))))
(define char-set:function
(string->char-set "abcdDgGhHilnNpPqrstwxy:=#"))
(define function-char? (cut char-set-contains? char-set:function <>))
(define (read-addresses port)
"Read zero, one, or two address from PORT, separated by a
comma (`,') and delimited by a function name."
(match (lookahead-char port)
((? function-char?) '())
(_ (let ((address1 (read-address port)))
(match (lookahead-char port)
(#\, (let* ((_ (get-char port))
(address2 (read-address port)))
`(,address1 ,address2)))
(_ `(,address1)))))))
(define char-set:whitespace+semi (char-set-adjoin char-set:whitespace #\;))
(define* (%read-sed port #:key (depth 0))
"Read a sed command from PORT."
(get-char-while char-set:whitespace+semi port)
(match (lookahead-char port)
((? eof-object?) (eof-object))
(#\}
(get-char port)
(if (> depth 0)
(eof-object)
(error "Unmatched close brace")))
(_ (let* ((addresses (read-addresses port))
(function (read-function port #:depth depth)))
(match addresses
(() `(always ,function))
((address) `(at ,address ,function))
((address1 address2) `(in (,address1 . ,address2) ,function)))))))
(define* (%read-sed-all port #:key (depth 0))
"Read a sequence of sed commands from PORT."
(let loop ((cmd (%read-sed port #:depth depth)) (acc '()))
(match cmd
((? eof-object?) (reverse! acc))
(_ (loop (%read-sed port #:depth depth) (cons cmd acc))))))
(define* (read-sed port #:key (extended? #f))
"Read a sed command from PORT. If EXTENDED? is set, treat regular
expressions as extended rather than basic."
(parameterize ((%extended? extended?))
(%read-sed port)))
(define* (read-sed-all port #:key (extended? #f))
"Read a sequence of sed commands from PORT. If EXTENDED? is set,
treat regular expressions as extended rather than basic."
(parameterize ((%extended? extended?))
(%read-sed-all port)))