From 975f53d98edb733db9b14746ea973f93a41d9764 Mon Sep 17 00:00:00 2001 From: Jan Nieuwenhuizen Date: Sat, 27 Oct 2018 11:19:21 +0200 Subject: [PATCH] bournish: Add compress, from Guile100 challenge by Daniel Hartwig. WIP --- .gitignore | 1 + build-aux/build-guile.sh | 3 + configure | 1 + gash/bournish-commands.scm | 54 ++++++++++--- gash/compress.scm | 158 +++++++++++++++++++++++++++++++++++++ gash/lzw.scm | 151 +++++++++++++++++++++++++++++++++++ gash/ustar.scm | 8 ++ 7 files changed, 367 insertions(+), 9 deletions(-) create mode 100644 gash/compress.scm create mode 100644 gash/lzw.scm diff --git a/.gitignore b/.gitignore index 8f42809..ebb4f43 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ *.go *~ /bin/cat +/bin/compress /bin/cp /bin/find /bin/gash diff --git a/build-aux/build-guile.sh b/build-aux/build-guile.sh index 9dbe8ec..8890283 100755 --- a/build-aux/build-guile.sh +++ b/build-aux/build-guile.sh @@ -42,12 +42,14 @@ SCM_FILES=" gash/bournish-commands.scm gash/guix-build-utils.scm gash/builtins.scm +gash/compress.scm gash/config.scm gash/environment.scm gash/geesh.scm gash/gash.scm gash/io.scm gash/job.scm +gash/lzw.scm gash/peg.scm gash/pipe.scm gash/script.scm @@ -68,6 +70,7 @@ done SCRIPTS=" bin/cat +bin/compress bin/cp bin/find bin/gash diff --git a/configure b/configure index a8dd2b6..c4e6d63 100755 --- a/configure +++ b/configure @@ -36,6 +36,7 @@ sed \ chmod +x bin/gash BUILTINS=" cat +compress cp find grep diff --git a/gash/bournish-commands.scm b/gash/bournish-commands.scm index 57a8d52..6807a86 100644 --- a/gash/bournish-commands.scm +++ b/gash/bournish-commands.scm @@ -31,6 +31,7 @@ #:use-module (srfi srfi-26) #:use-module (gash guix-build-utils) + #:use-module (gash compress) #:use-module (gash config) #:use-module (gash io) #:use-module (gash ustar) @@ -439,15 +440,50 @@ Usage: tar [OPTION]... [FILE]... (list? (list-ustar-archive file files #:verbosity (1+ verbosity))))))) +(define (compress-command . args) + (lambda _ + (let* ((option-spec + '((bits (single-char #\b) (value #t)) + (decompress (single-char #\d)) + (help (single-char #\h)) + (stdout (single-char #\c)) + (verbose (single-char #\v)) + (version (single-char #\V)))) + (args (cons "compress" args)) + (options (getopt-long args option-spec)) + (bits (string->number (option-ref options 'bits "16"))) + (decompress? (option-ref options 'decompress #f)) + (stdout? (option-ref options 'stdout #f)) + (files (option-ref options '() '())) + (help? (option-ref options 'help #f)) + (usage? (and (not help?) (or (and (null? files) (not stdout?))))) + (verbose? (option-ref options 'verbose #f)) + (version? (option-ref options 'version #f))) + (cond ((or help? usage?) (format (if usage? (current-error-port) #t) + "\ +Usage: compress [OPTION]... [FILE]... + -b, --bits=BITS use a maximum of BITS bits per code [16] + -c, --stdout write on standard output, keep original files unchanged + -d, --decompress decompress + -h, --help display this help + -v, --verbose show compression ratio + -V, --version display version +") + (exit (if usage? 2 0))) + (version? (format #t "compress (GASH) ~a\n" %version) (exit 0)) + (decompress? (uncompress-file (car files) verbose?)) + (else (compress-file (car files) bits verbose?)))))) + (define %bournish-commands `( - ("cat" . ,cat-command) - ("cp" . ,cp-command) - ("find" . ,find-command) - ("grep" . ,grep-command) - ("ls" . ,ls-command) - ("reboot" . ,reboot-command) - ("tar" . ,tar-command) - ("wc" . ,wc-command) - ("which" . ,which-command) + ("cat" . ,cat-command) + ("compress" . ,compress-command) + ("cp" . ,cp-command) + ("find" . ,find-command) + ("grep" . ,grep-command) + ("ls" . ,ls-command) + ("reboot" . ,reboot-command) + ("tar" . ,tar-command) + ("wc" . ,wc-command) + ("which" . ,which-command) )) diff --git a/gash/compress.scm b/gash/compress.scm new file mode 100644 index 0000000..63cc931 --- /dev/null +++ b/gash/compress.scm @@ -0,0 +1,158 @@ +;;; Gash --- Guile As SHell +;;; Copyright © 2013 Daniel Hartwig +;;; Copyright © 2018 Jan (janneke) Nieuwenhuizen +;;; +;;; This file is part of Gash. +;;; +;;; Gash is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 3 of the License, or (at +;;; your option) any later version. +;;; +;;; Gash is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with Gash. If not, see . + +;;; Commentary: + +;;; The initial lzw.scm was taken from the Guile100 challenge +;;; https://github.com/spk121/guile100 from a contribution by Daniel +;;; Hartwig. + +;;; Code: + +(define-module (gash compress) + #:use-module (gash lzw) + #:use-module (ice-9 control) + #:use-module (ice-9 format) + #:use-module (ice-9 match) + #:use-module (rnrs bytevectors) + #:use-module (rnrs io ports) + #:use-module (srfi srfi-37) + #:export (compress-file + compress-port + uncompress-file + uncompress-port)) + +(define *program-name* "compress (GASH)") + +(define (_ msg . rest) + (cons msg rest)) + +(define (error* status msg . args) + (force-output) + (let ((port (current-error-port))) + (when *program-name* + (display *program-name* port) + (display ": " port)) + (apply format port msg args) + (newline port) + (unless (zero? status) + ;; This call to 'abort' causes 'main' to immediately return the + ;; specified status value. Similar to 'exit' but more + ;; controlled, for example, when using the REPL to debug, + ;; 'abort' will not cause the entire process to terminate. + ;; + ;; This is also handy to attempt processing every file, even + ;; after an error has occured. To do this, establish another + ;; prompt at an interesting place inside 'main'. + (abort (lambda (k) + status))))) + +(define (make-file-error-handler filename) + (lambda args + (error* 1 (_ "~a: ~a") + filename + (strerror (system-error-errno args))))) + +(define (system-error-handler key subr msg args rest) + (apply error* 1 msg args)) + +(define (compression-ratio nbytes-in nbytes-out) + (exact->inexact (/ (- nbytes-in nbytes-out) nbytes-in))) + +(define (write-lzw-header port bits) + (put-bytevector port (u8-list->bytevector (list #x1F #x9D bits)))) + +(define (compress-port in out bits verbose?) + #; + (begin + (write-lzw-header out bits) + (%lzw-compress (cute get-u8 in) + (cute put-u16 out <>) + eof-object? + (expt 2 bits))) + (let* ((in-bv (get-bytevector-all in)) + (out-bv (lzw-compress in-bv #:table-size (expt 2 bits)))) + (write-lzw-header out bits) + (put-bytevector out out-bv))) + +(define (compress-file infile bits verbose?) + (catch 'system-error + (lambda () + (let ((outfile (string-append infile ".Z"))) + (when (string-suffix? ".Z" infile) + (error* 1 (_ "~a: already has .Z suffix") infile)) + (when (file-exists? outfile) + (error* 1 (_ "~a: already exists") outfile)) + (let ((in (open-file infile "rb")) + (out (open-file outfile "wb"))) + ;; TODO: Keep original files ownership, modes, and access + ;; and modification times. + (compress-port in out bits verbose?) + (when verbose? + (format #; (current-error-port) + (current-output-port) + (_ "~a: compression: ~1,2h%\n") ; '~h' is localized '~f'. + infile + (* 100 (compression-ratio (port-position in) + (port-position out))))) + (for-each close-port (list in out)) + (delete-file infile)))) + system-error-handler)) + +(define (read-lzw-header port) + (match (bytevector->u8-list (get-bytevector-n port 3)) + ((#x1F #x9D bits) + (and (<= 9 bits 16) + (values bits))) + (x #f))) + +(define (uncompress-port in out verbose?) + (let ((bits (read-lzw-header in))) + (unless bits + (error* 1 (_ "incorrect header"))) + #; + (%lzw-uncompress (cute get-u16 in) + (cute put-u8 out <>) + eof-object? + (expt 2 bits)) + (let* ((in-bv (get-bytevector-all in)) + (out-bv (lzw-uncompress in-bv #:table-size (expt 2 bits)))) + (put-bytevector out out-bv)))) + +(define (uncompress-file infile verbose?) + (catch 'system-error + (lambda () + (let ((outfile (string-drop-right infile 2))) + (when (not (string-suffix? ".Z" infile)) + (error* 1 (_ "~a: does not have .Z suffix") infile)) + (when (file-exists? outfile) + (error* 1 (_ "~a: already exists") outfile)) + (let ((in (open-file infile "rb")) + (out (open-file outfile "wb"))) + (uncompress-port in out verbose?) + (when verbose? + (format #; (current-error-port) + (current-output-port) + (_ "~a: compression: ~1,2h%\n") ; '~h is localized '~f'. + infile + (* 100 (compression-ratio (port-position out) + (port-position in))))) + (for-each close-port (list in out)) + (delete-file infile)))) + system-error-handler)) diff --git a/gash/lzw.scm b/gash/lzw.scm new file mode 100644 index 0000000..fcf7a2c --- /dev/null +++ b/gash/lzw.scm @@ -0,0 +1,151 @@ +;;; Gash --- Guile As SHell +;;; Copyright © 2013 Daniel Hartwig +;;; +;;; This file is part of Gash. +;;; +;;; Gash is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 3 of the License, or (at +;;; your option) any later version. +;;; +;;; Gash is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with Gash. If not, see . + +;;; Commentary: + +;;; The initial lzw.scm was taken from the Guile100 challenge +;;; https://github.com/spk121/guile100 from a contribution by Daniel +;;; Hartwig. + +;;; Code: + +(define-module (gash lzw) + #:use-module (rnrs bytevectors) + #:use-module (rnrs io ports) + #:use-module (srfi srfi-1) + #:use-module (srfi srfi-26) + #:export (lzw-compress + lzw-uncompress + %lzw-compress + %lzw-uncompress)) + +;; This procedure adapted from an example in the Guile Reference +;; Manual. +(define (make-serial-number-generator start end) + (let ((current-serial-number (- start 1))) + (lambda () + (and (< current-serial-number end) + (set! current-serial-number (+ current-serial-number 1)) + current-serial-number)))) + +(define (put-u16 port k) + ;; Little endian. + (put-u8 port (logand k #xFF)) + (put-u8 port (logand (ash k -8) #xFF))) + +(define (get-u16 port) + ;; Little endian. Order of evaluation is important, use 'let*'. + (let* ((a (get-u8 port)) + (b (get-u8 port))) + (if (any eof-object? (list a b)) + (eof-object) + (logior a (ash b 8))))) + +(define (%lzw-compress in out done? table-size) + (let ((codes (make-hash-table table-size)) + (next-code (make-serial-number-generator 0 table-size)) + (universe (iota 256)) + (eof-code #f)) + ;; Populate the initial dictionary with all one-element strings + ;; from the universe. + (for-each (lambda (obj) + (hash-set! codes (list obj) (next-code))) + universe) + (set! eof-code (next-code)) + (let loop ((cs '())) + (let ((c (in))) + (cond ((done? c) + (unless (null? cs) + (out (hash-ref codes cs))) + (out eof-code) + (values codes)) + ((hash-ref codes (cons c cs)) + (loop (cons c cs))) + (else + (and=> (next-code) + (cut hash-set! codes (cons c cs) <>)) + (out (hash-ref codes cs)) + (loop (cons c '())))))))) + +(define (ensure-bv-input-port bv-or-port) + (cond ((port? bv-or-port) + bv-or-port) + ((bytevector? bv-or-port) + (open-bytevector-input-port bv-or-port)) + (else + (scm-error 'wrong-type-arg "ensure-bv-input-port" + "Wrong type argument in position ~a: ~s" + (list 1 bv-or-port) (list bv-or-port))))) + +(define (for-each-right proc lst) + (let loop ((lst lst)) + (unless (null? lst) + (loop (cdr lst)) + (proc (car lst))))) + +(define (%lzw-uncompress in out done? table-size) + (let ((strings (make-hash-table table-size)) + (next-code (make-serial-number-generator 0 table-size)) + (universe (iota 256)) + (eof-code #f)) + (for-each (lambda (obj) + (hash-set! strings (next-code) (list obj))) + universe) + (set! eof-code (next-code)) + (let loop ((previous-string '())) + (let ((code (in))) + (unless (or (done? code) + (= code eof-code)) + (unless (hash-ref strings code) + (hash-set! strings + code + (cons (last previous-string) previous-string))) + (for-each-right out + (hash-ref strings code)) + (let ((cs (hash-ref strings code))) + (and=> (and (not (null? previous-string)) + (next-code)) + (cut hash-set! strings <> (cons (last cs) + previous-string))) + (loop cs))))))) + +(define* (lzw-compress bv #:key (table-size 65536) dictionary) + (call-with-values + (lambda () + (open-bytevector-output-port)) + (lambda (output-port get-result) + (let ((dict (%lzw-compress (cute get-u8 (ensure-bv-input-port bv)) + (cute put-u16 output-port <>) + eof-object? + table-size))) + (if dictionary + (values (get-result) dict) + (get-result)))))) + +(define* (lzw-uncompress bv #:key (table-size 65536) dictionary) + (call-with-values + (lambda () + (open-bytevector-output-port)) + (lambda (output-port get-result) + (let ((dict (%lzw-uncompress (cute get-u16 (open-bytevector-input-port bv)) + (cute put-u8 output-port <>) + eof-object? + table-size))) + (if dictionary + (values (get-result) dict) + (get-result)))))) diff --git a/gash/ustar.scm b/gash/ustar.scm index 6e110bc..215fe54 100644 --- a/gash/ustar.scm +++ b/gash/ustar.scm @@ -17,6 +17,14 @@ ;;; You should have received a copy of the GNU General Public License ;;; along with Gash. If not, see . +;;; Commentary: + +;;; The initial ustar.scm was taken from the Guile100 challenge +;;; https://github.com/spk121/guile100 from a contribution by Mark H +;;; Weaver. + +;;; Code: + (define-module (gash ustar) #:use-module (srfi srfi-1) #:use-module (srfi srfi-9 gnu)