diff --git a/.gitignore b/.gitignore index 8815e7c..7339ac8 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ /bin/grep /bin/ls /bin/reboot +/bin/sed /bin/sh /bin/tar /bin/wc diff --git a/build-aux/build-guile.sh b/build-aux/build-guile.sh index 5b55be7..50533d7 100755 --- a/build-aux/build-guile.sh +++ b/build-aux/build-guile.sh @@ -65,6 +65,7 @@ gash/commands/find.scm gash/commands/grep.scm gash/commands/ls.scm gash/commands/reboot.scm +gash/commands/sed.scm gash/commands/tar.scm gash/commands/wc.scm gash/commands/which.scm @@ -90,6 +91,7 @@ bin/gash bin/grep bin/ls bin/reboot +bin/sed bin/tar bin/wc bin/which diff --git a/check.sh b/check.sh index 62fd478..6a1679a 100755 --- a/check.sh +++ b/check.sh @@ -3,7 +3,6 @@ if [ -n "$V" ]; then fi DIFF=diff SHELL=${SHELL-bin/gash} -#SHELL=bin/gash tests=" assign @@ -43,12 +42,26 @@ substitution 32-for-substitute 33-string-args 35-assignment-eval-echo + +00-sed +00-sed-once +00-sed-global +00-sed-case +00-sed-group +00-sed-group-extended +00-sed-twice +00-sed-undo " broken=" - " +if [ "$(basename $SHELL)" = bash ]; then + broken=" +00-sed +" +fi + expect=$(echo $broken | wc -w) pass=0 fail=0 diff --git a/configure b/configure index b4d2d68..b6e7769 100755 --- a/configure +++ b/configure @@ -96,6 +96,7 @@ find grep ls reboot +sed tar wc which diff --git a/gash/commands/sed.scm b/gash/commands/sed.scm new file mode 100644 index 0000000..5f3f7d5 --- /dev/null +++ b/gash/commands/sed.scm @@ -0,0 +1,130 @@ +;;; Gash -- Guile As SHell +;;; Copyright © 2018 Jan (janneke) Nieuwenhuizen +;;; +;;; This file is part of Gash. +;;; +;;; Gash is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 3 of the License, or (at +;;; your option) any later version. +;;; +;;; Gash is distributed in the hope that it will be useful, but WITHOUT ANY +;;; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +;;; FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +;;; details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with Gash. If not, see . + +;;; Commentary: + +;;; Code: + +(define-module (gash commands sed) + #:use-module (ice-9 getopt-long) + #:use-module (ice-9 match) + #:use-module (ice-9 receive) + #:use-module (ice-9 regex) + + #:use-module (gash config) + #:use-module (gash guix-utils) + #:use-module (gash shell-utils) + + #:export ( + sed + )) + +(define (replace->lambda string modifiers) + (define (replace->string m s) + (list->string + (let loop ((lst (string->list string))) + (cond ((null? lst) '()) + ((null? (cdr lst)) lst) + ((and (eq? (car lst) #\\) + (char-numeric? (cadr lst))) + (let ((i (- (char->integer (cadr lst)) (char->integer #\0)))) + (append (string->list (match:substring m i)) (loop (cddr lst))))) + ((and (eq? (car lst) #\\) + (eq? (cadr lst) #\\)) + (append '(#\\ #\\) (cddr lst))) + (else (cons (car lst) (loop (cdr lst)))))))) + (lambda (l m+) + ;; Iterate over matches M+ and + ;; return the modified line + ;; based on L. + (let loop ((m* m+) ; matches + (o 0) ; offset in L + (r '())) ; result + (match m* + (() + (let ((r (cons (substring l o) r))) + (string-concatenate-reverse r))) + ((m . rest) + (let* ((refs (- (vector-length m) 2)) + (replace (replace->string m string)) + (replace (cons* replace (substring l o (match:start m)) r))) + (if (memq #\g modifiers) (loop rest (match:end m) replace) + (loop '() (match:end m) replace)))))))) + +(define (sed . args) + (let* ((option-spec + '((expression (single-char #\e) (value #t)) + (extended (single-char #\r)) + (posix-extended (single-char #\E)) + (file (single-char #\f) (value #t)) + (help (single-char #\h)) + (in-place (single-char #\i)) + (version (single-char #\V)))) + (options (getopt-long args option-spec)) + (files (option-ref options '() '())) + (extended? (or (option-ref options 'extended #f) + (option-ref options 'posix-extended #f))) + (help? (option-ref options 'help #f)) + (in-place? (option-ref options 'in-place #f)) + (usage? (and (not help?) (or (and (null? files) (isatty? (current-input-port)))))) + (version? (option-ref options 'version #f))) + (cond (version? (format #t "sed (GASH) ~a\n" %version) (exit 0)) + ((or help? usage?) (format (if usage? (current-error-port) #t) + "\ +Usage: sed [OPTION]... [SCRIPT] [FILE]... + -e, --expression=SCRIPT add SCRIPT to the commands to be executed + -E, -r, --regexp-extended use extended regular expressions in the script + -f, --file=SCRIPT add contents of SCRIPT to the commands to be executed + -h, --help display this help + -i, --in-place edit files in place + -V, --version display version +") + (exit (if usage? 2 0))) + (else + (let* ((script-files (multi-opt options 'file)) + (scripts (multi-opt options 'expression))) + (receive (scripts files) + (if (pair? (append script-files scripts)) (values scripts files) + (values (list-head files 1) (cdr files))) + (define (script->command o) + (cond ((string-prefix? "s" o) + (let* ((command (substring o 1)) + (string (substring command 1)) + (separator (string-ref command 0))) + (receive (search replace modifier-string) + (apply values (string-split string separator)) + (let* ((modifiers (string->list modifier-string)) + (flags (if extended? (list regexp/extended) (list regexp/basic))) + (flags (if (memq #\i modifiers) (cons regexp/icase flags) + flags))) + `((,search . ,flags) . ,(replace->lambda replace modifiers)))))) + (else (error (format #f "SED: command not supported: ~s\n" o))))) + (when (pair? script-files) + (error "SED: script files not supported")) + (let ((commands (map script->command scripts))) + (cond ((and in-place? (pair? files)) + (for-each (lambda (file) (substitute* file commands)) files)) + ((pair? files) + (for-each (lambda (file) + (with-input-from-file file + (lambda _ (substitute-port commands)))) + files)) + (else (substitute-port commands)))))))))) + +(use-modules (ice-9 rdelim)) +(define main sed) diff --git a/gash/shell-utils.scm b/gash/shell-utils.scm index 0ebb705..e115a17 100644 --- a/gash/shell-utils.scm +++ b/gash/shell-utils.scm @@ -63,6 +63,10 @@ executable-file? regular-file? symbolic-link? + substitute* + substitute-port + with-atomic-file-replacement + let-matches )) ;;; Commentary: @@ -343,7 +347,7 @@ TERMINAL-WIDTH. Use COLUMN-GAP spaces between two subsequent columns." (define (multi-opt options name) (let ((opt? (lambda (o) (and (eq? (car o) name) (cdr o))))) - (filter-map opt? options))) + (filter-map opt? (reverse options)))) (define %not-colon (char-set-complement (char-set #\:))) (define (executable-path) @@ -351,3 +355,83 @@ TERMINAL-WIDTH. Use COLUMN-GAP spaces between two subsequent columns." (match (getenv "PATH") (#f '()) (str (string-tokenize str %not-colon)))) + + +;;; +;;; Text substitution (aka. sed). +;;; + +(define (with-atomic-file-replacement file proc) + "Call PROC with two arguments: an input port for FILE, and an output +port for the file that is going to replace FILE. Upon success, FILE is +atomically replaced by what has been written to the output port, and +PROC's result is returned." + (let* ((template (string-append file ".XXXXXX")) + (out (mkstemp! template)) + (mode (stat:mode (stat file)))) + (with-throw-handler #t + (lambda () + (call-with-input-file file + (lambda (in) + (let ((result (proc in out))) + (close out) + (chmod template mode) + (rename-file template file) + result)))) + (lambda (key . args) + (false-if-exception (delete-file template)))))) + +(define (substitute* file pattern+procs) + "PATTERN+PROCS is a list of regexp/two-argument-procedure pairs. For each +line of FILE, and for each PATTERN that it matches, call the corresponding +PROC as (PROC LINE MATCHES); PROC must return the line that will be written as +a substitution of the original line. Be careful about using '$' to match the +end of a line; by itself it won't match the terminating newline of a line." + (let ((rx+proc (map (match-lambda + ;; (((? regexp? pattern) . proc) + ;; (cons pattern proc)) + (((pattern . flags) . proc) + (cons (apply make-regexp pattern flags) + proc))) + pattern+procs))) + (with-atomic-file-replacement file + (lambda (in out) + (let loop ((line (read-line in 'concat))) + (if (eof-object? line) + #t + (let ((line (fold (lambda (r+p line) + (match r+p + ((regexp . proc) + (match (list-matches regexp line) + ((and m+ (_ _ ...)) + (proc line m+)) + (_ line))))) + line + rx+proc))) + (display line out) + (loop (read-line in 'concat))))))))) + +(define (substitute-port pattern+procs) + (let ((rx+proc (map (match-lambda + ;; (((? regexp? pattern) . proc) + ;; (cons pattern proc)) + (((pattern . flags) . proc) + (cons (apply make-regexp pattern flags) + proc))) + pattern+procs)) + (in (current-input-port)) + (out (current-output-port))) + (let loop ((line (read-line in 'concat))) + (if (eof-object? line) + #t + (let ((line (fold (lambda (r+p line) + (match r+p + ((regexp . proc) + (match (list-matches regexp line) + ((and m+ (_ _ ...)) + (proc line m+)) + (_ line))))) + line + rx+proc))) + (display line out) + (loop (read-line in 'concat))))))) diff --git a/test/00-sed-case.sh b/test/00-sed-case.sh new file mode 100644 index 0000000..9e78665 --- /dev/null +++ b/test/00-sed-case.sh @@ -0,0 +1 @@ +echo ooO | \sed s,o,O,i diff --git a/test/00-sed-case.stdout b/test/00-sed-case.stdout new file mode 100644 index 0000000..327d153 --- /dev/null +++ b/test/00-sed-case.stdout @@ -0,0 +1 @@ +OoO diff --git a/test/00-sed-global.sh b/test/00-sed-global.sh new file mode 100644 index 0000000..d3f53bd --- /dev/null +++ b/test/00-sed-global.sh @@ -0,0 +1 @@ +echo 001 | \sed s,0,1,g diff --git a/test/00-sed-global.stdout b/test/00-sed-global.stdout new file mode 100644 index 0000000..58c9bdf --- /dev/null +++ b/test/00-sed-global.stdout @@ -0,0 +1 @@ +111 diff --git a/test/00-sed-group-extended.sh b/test/00-sed-group-extended.sh new file mode 100644 index 0000000..ece581c --- /dev/null +++ b/test/00-sed-group-extended.sh @@ -0,0 +1 @@ +echo 012 | \sed -r 's,(0)1(2),\21\1,' diff --git a/test/00-sed-group-extended.stdout b/test/00-sed-group-extended.stdout new file mode 100644 index 0000000..cd7da05 --- /dev/null +++ b/test/00-sed-group-extended.stdout @@ -0,0 +1 @@ +210 diff --git a/test/00-sed-group.sh b/test/00-sed-group.sh new file mode 100644 index 0000000..f5bff3e --- /dev/null +++ b/test/00-sed-group.sh @@ -0,0 +1 @@ +echo 012 | \sed 's,\(0\)1\(2\),\21\1,' diff --git a/test/00-sed-group.stdout b/test/00-sed-group.stdout new file mode 100644 index 0000000..cd7da05 --- /dev/null +++ b/test/00-sed-group.stdout @@ -0,0 +1 @@ +210 diff --git a/test/00-sed-once.sh b/test/00-sed-once.sh new file mode 100644 index 0000000..f133d0e --- /dev/null +++ b/test/00-sed-once.sh @@ -0,0 +1 @@ +echo 001 | \sed s,0,1, diff --git a/test/00-sed-once.stdout b/test/00-sed-once.stdout new file mode 100644 index 0000000..398050c --- /dev/null +++ b/test/00-sed-once.stdout @@ -0,0 +1 @@ +101 diff --git a/test/00-sed-twice.sh b/test/00-sed-twice.sh new file mode 100644 index 0000000..fe13896 --- /dev/null +++ b/test/00-sed-twice.sh @@ -0,0 +1 @@ +echo 0001 | \sed -e s,0,1, -e s,0,1, diff --git a/test/00-sed-twice.stdout b/test/00-sed-twice.stdout new file mode 100644 index 0000000..4f1e6aa --- /dev/null +++ b/test/00-sed-twice.stdout @@ -0,0 +1 @@ +1101 diff --git a/test/00-sed-undo.sh b/test/00-sed-undo.sh new file mode 100644 index 0000000..cd50810 --- /dev/null +++ b/test/00-sed-undo.sh @@ -0,0 +1 @@ +echo 001 | \sed -e s,0,1, -e s,1,0, diff --git a/test/00-sed-undo.stdout b/test/00-sed-undo.stdout new file mode 100644 index 0000000..5325a8d --- /dev/null +++ b/test/00-sed-undo.stdout @@ -0,0 +1 @@ +001 diff --git a/test/00-sed.sh b/test/00-sed.sh new file mode 100644 index 0000000..50f862c --- /dev/null +++ b/test/00-sed.sh @@ -0,0 +1 @@ +\sed --help diff --git a/test/00-sed.stdout b/test/00-sed.stdout new file mode 100644 index 0000000..b041df2 --- /dev/null +++ b/test/00-sed.stdout @@ -0,0 +1,7 @@ +Usage: sed [OPTION]... [SCRIPT] [FILE]... + -e, --expression=SCRIPT add SCRIPT to the commands to be executed + -E, -r, --regexp-extended use extended regular expressions in the script + -f, --file=SCRIPT add contents of SCRIPT to the commands to be executed + -h, --help display this help + -i, --in-place edit files in place + -V, --version display version