bournish: Add compress, from Guile100 challenge by Daniel Hartwig. WIP

This commit is contained in:
Jan Nieuwenhuizen 2018-10-27 11:19:21 +02:00
parent 8aad645152
commit 975f53d98e
7 changed files with 367 additions and 9 deletions

1
.gitignore vendored
View File

@ -1,6 +1,7 @@
*.go
*~
/bin/cat
/bin/compress
/bin/cp
/bin/find
/bin/gash

View File

@ -42,12 +42,14 @@ SCM_FILES="
gash/bournish-commands.scm
gash/guix-build-utils.scm
gash/builtins.scm
gash/compress.scm
gash/config.scm
gash/environment.scm
gash/geesh.scm
gash/gash.scm
gash/io.scm
gash/job.scm
gash/lzw.scm
gash/peg.scm
gash/pipe.scm
gash/script.scm
@ -68,6 +70,7 @@ done
SCRIPTS="
bin/cat
bin/compress
bin/cp
bin/find
bin/gash

1
configure vendored
View File

@ -36,6 +36,7 @@ sed \
chmod +x bin/gash
BUILTINS="
cat
compress
cp
find
grep

View File

@ -31,6 +31,7 @@
#:use-module (srfi srfi-26)
#:use-module (gash guix-build-utils)
#:use-module (gash compress)
#:use-module (gash config)
#:use-module (gash io)
#:use-module (gash ustar)
@ -439,15 +440,50 @@ Usage: tar [OPTION]... [FILE]...
(list?
(list-ustar-archive file files #:verbosity (1+ verbosity)))))))
(define (compress-command . args)
(lambda _
(let* ((option-spec
'((bits (single-char #\b) (value #t))
(decompress (single-char #\d))
(help (single-char #\h))
(stdout (single-char #\c))
(verbose (single-char #\v))
(version (single-char #\V))))
(args (cons "compress" args))
(options (getopt-long args option-spec))
(bits (string->number (option-ref options 'bits "16")))
(decompress? (option-ref options 'decompress #f))
(stdout? (option-ref options 'stdout #f))
(files (option-ref options '() '()))
(help? (option-ref options 'help #f))
(usage? (and (not help?) (or (and (null? files) (not stdout?)))))
(verbose? (option-ref options 'verbose #f))
(version? (option-ref options 'version #f)))
(cond ((or help? usage?) (format (if usage? (current-error-port) #t)
"\
Usage: compress [OPTION]... [FILE]...
-b, --bits=BITS use a maximum of BITS bits per code [16]
-c, --stdout write on standard output, keep original files unchanged
-d, --decompress decompress
-h, --help display this help
-v, --verbose show compression ratio
-V, --version display version
")
(exit (if usage? 2 0)))
(version? (format #t "compress (GASH) ~a\n" %version) (exit 0))
(decompress? (uncompress-file (car files) verbose?))
(else (compress-file (car files) bits verbose?))))))
(define %bournish-commands
`(
("cat" . ,cat-command)
("cp" . ,cp-command)
("find" . ,find-command)
("grep" . ,grep-command)
("ls" . ,ls-command)
("reboot" . ,reboot-command)
("tar" . ,tar-command)
("wc" . ,wc-command)
("which" . ,which-command)
("cat" . ,cat-command)
("compress" . ,compress-command)
("cp" . ,cp-command)
("find" . ,find-command)
("grep" . ,grep-command)
("ls" . ,ls-command)
("reboot" . ,reboot-command)
("tar" . ,tar-command)
("wc" . ,wc-command)
("which" . ,which-command)
))

158
gash/compress.scm Normal file
View File

@ -0,0 +1,158 @@
;;; Gash --- Guile As SHell
;;; Copyright © 2013 Daniel Hartwig <mandyke@gmail.com>
;;; Copyright © 2018 Jan (janneke) Nieuwenhuizen <janneke@gnu.org>
;;;
;;; This file is part of Gash.
;;;
;;; Gash is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; Gash is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with Gash. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;;; The initial lzw.scm was taken from the Guile100 challenge
;;; https://github.com/spk121/guile100 from a contribution by Daniel
;;; Hartwig.
;;; Code:
(define-module (gash compress)
#:use-module (gash lzw)
#:use-module (ice-9 control)
#:use-module (ice-9 format)
#:use-module (ice-9 match)
#:use-module (rnrs bytevectors)
#:use-module (rnrs io ports)
#:use-module (srfi srfi-37)
#:export (compress-file
compress-port
uncompress-file
uncompress-port))
(define *program-name* "compress (GASH)")
(define (_ msg . rest)
(cons msg rest))
(define (error* status msg . args)
(force-output)
(let ((port (current-error-port)))
(when *program-name*
(display *program-name* port)
(display ": " port))
(apply format port msg args)
(newline port)
(unless (zero? status)
;; This call to 'abort' causes 'main' to immediately return the
;; specified status value. Similar to 'exit' but more
;; controlled, for example, when using the REPL to debug,
;; 'abort' will not cause the entire process to terminate.
;;
;; This is also handy to attempt processing every file, even
;; after an error has occured. To do this, establish another
;; prompt at an interesting place inside 'main'.
(abort (lambda (k)
status)))))
(define (make-file-error-handler filename)
(lambda args
(error* 1 (_ "~a: ~a")
filename
(strerror (system-error-errno args)))))
(define (system-error-handler key subr msg args rest)
(apply error* 1 msg args))
(define (compression-ratio nbytes-in nbytes-out)
(exact->inexact (/ (- nbytes-in nbytes-out) nbytes-in)))
(define (write-lzw-header port bits)
(put-bytevector port (u8-list->bytevector (list #x1F #x9D bits))))
(define (compress-port in out bits verbose?)
#;
(begin
(write-lzw-header out bits)
(%lzw-compress (cute get-u8 in)
(cute put-u16 out <>)
eof-object?
(expt 2 bits)))
(let* ((in-bv (get-bytevector-all in))
(out-bv (lzw-compress in-bv #:table-size (expt 2 bits))))
(write-lzw-header out bits)
(put-bytevector out out-bv)))
(define (compress-file infile bits verbose?)
(catch 'system-error
(lambda ()
(let ((outfile (string-append infile ".Z")))
(when (string-suffix? ".Z" infile)
(error* 1 (_ "~a: already has .Z suffix") infile))
(when (file-exists? outfile)
(error* 1 (_ "~a: already exists") outfile))
(let ((in (open-file infile "rb"))
(out (open-file outfile "wb")))
;; TODO: Keep original files ownership, modes, and access
;; and modification times.
(compress-port in out bits verbose?)
(when verbose?
(format #; (current-error-port)
(current-output-port)
(_ "~a: compression: ~1,2h%\n") ; '~h' is localized '~f'.
infile
(* 100 (compression-ratio (port-position in)
(port-position out)))))
(for-each close-port (list in out))
(delete-file infile))))
system-error-handler))
(define (read-lzw-header port)
(match (bytevector->u8-list (get-bytevector-n port 3))
((#x1F #x9D bits)
(and (<= 9 bits 16)
(values bits)))
(x #f)))
(define (uncompress-port in out verbose?)
(let ((bits (read-lzw-header in)))
(unless bits
(error* 1 (_ "incorrect header")))
#;
(%lzw-uncompress (cute get-u16 in)
(cute put-u8 out <>)
eof-object?
(expt 2 bits))
(let* ((in-bv (get-bytevector-all in))
(out-bv (lzw-uncompress in-bv #:table-size (expt 2 bits))))
(put-bytevector out out-bv))))
(define (uncompress-file infile verbose?)
(catch 'system-error
(lambda ()
(let ((outfile (string-drop-right infile 2)))
(when (not (string-suffix? ".Z" infile))
(error* 1 (_ "~a: does not have .Z suffix") infile))
(when (file-exists? outfile)
(error* 1 (_ "~a: already exists") outfile))
(let ((in (open-file infile "rb"))
(out (open-file outfile "wb")))
(uncompress-port in out verbose?)
(when verbose?
(format #; (current-error-port)
(current-output-port)
(_ "~a: compression: ~1,2h%\n") ; '~h is localized '~f'.
infile
(* 100 (compression-ratio (port-position out)
(port-position in)))))
(for-each close-port (list in out))
(delete-file infile))))
system-error-handler))

151
gash/lzw.scm Normal file
View File

@ -0,0 +1,151 @@
;;; Gash --- Guile As SHell
;;; Copyright © 2013 Daniel Hartwig <mandyke@gmail.com>
;;;
;;; This file is part of Gash.
;;;
;;; Gash is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; Gash is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with Gash. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;;; The initial lzw.scm was taken from the Guile100 challenge
;;; https://github.com/spk121/guile100 from a contribution by Daniel
;;; Hartwig.
;;; Code:
(define-module (gash lzw)
#:use-module (rnrs bytevectors)
#:use-module (rnrs io ports)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-26)
#:export (lzw-compress
lzw-uncompress
%lzw-compress
%lzw-uncompress))
;; This procedure adapted from an example in the Guile Reference
;; Manual.
(define (make-serial-number-generator start end)
(let ((current-serial-number (- start 1)))
(lambda ()
(and (< current-serial-number end)
(set! current-serial-number (+ current-serial-number 1))
current-serial-number))))
(define (put-u16 port k)
;; Little endian.
(put-u8 port (logand k #xFF))
(put-u8 port (logand (ash k -8) #xFF)))
(define (get-u16 port)
;; Little endian. Order of evaluation is important, use 'let*'.
(let* ((a (get-u8 port))
(b (get-u8 port)))
(if (any eof-object? (list a b))
(eof-object)
(logior a (ash b 8)))))
(define (%lzw-compress in out done? table-size)
(let ((codes (make-hash-table table-size))
(next-code (make-serial-number-generator 0 table-size))
(universe (iota 256))
(eof-code #f))
;; Populate the initial dictionary with all one-element strings
;; from the universe.
(for-each (lambda (obj)
(hash-set! codes (list obj) (next-code)))
universe)
(set! eof-code (next-code))
(let loop ((cs '()))
(let ((c (in)))
(cond ((done? c)
(unless (null? cs)
(out (hash-ref codes cs)))
(out eof-code)
(values codes))
((hash-ref codes (cons c cs))
(loop (cons c cs)))
(else
(and=> (next-code)
(cut hash-set! codes (cons c cs) <>))
(out (hash-ref codes cs))
(loop (cons c '()))))))))
(define (ensure-bv-input-port bv-or-port)
(cond ((port? bv-or-port)
bv-or-port)
((bytevector? bv-or-port)
(open-bytevector-input-port bv-or-port))
(else
(scm-error 'wrong-type-arg "ensure-bv-input-port"
"Wrong type argument in position ~a: ~s"
(list 1 bv-or-port) (list bv-or-port)))))
(define (for-each-right proc lst)
(let loop ((lst lst))
(unless (null? lst)
(loop (cdr lst))
(proc (car lst)))))
(define (%lzw-uncompress in out done? table-size)
(let ((strings (make-hash-table table-size))
(next-code (make-serial-number-generator 0 table-size))
(universe (iota 256))
(eof-code #f))
(for-each (lambda (obj)
(hash-set! strings (next-code) (list obj)))
universe)
(set! eof-code (next-code))
(let loop ((previous-string '()))
(let ((code (in)))
(unless (or (done? code)
(= code eof-code))
(unless (hash-ref strings code)
(hash-set! strings
code
(cons (last previous-string) previous-string)))
(for-each-right out
(hash-ref strings code))
(let ((cs (hash-ref strings code)))
(and=> (and (not (null? previous-string))
(next-code))
(cut hash-set! strings <> (cons (last cs)
previous-string)))
(loop cs)))))))
(define* (lzw-compress bv #:key (table-size 65536) dictionary)
(call-with-values
(lambda ()
(open-bytevector-output-port))
(lambda (output-port get-result)
(let ((dict (%lzw-compress (cute get-u8 (ensure-bv-input-port bv))
(cute put-u16 output-port <>)
eof-object?
table-size)))
(if dictionary
(values (get-result) dict)
(get-result))))))
(define* (lzw-uncompress bv #:key (table-size 65536) dictionary)
(call-with-values
(lambda ()
(open-bytevector-output-port))
(lambda (output-port get-result)
(let ((dict (%lzw-uncompress (cute get-u16 (open-bytevector-input-port bv))
(cute put-u8 output-port <>)
eof-object?
table-size)))
(if dictionary
(values (get-result) dict)
(get-result))))))

View File

@ -17,6 +17,14 @@
;;; You should have received a copy of the GNU General Public License
;;; along with Gash. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;;; The initial ustar.scm was taken from the Guile100 challenge
;;; https://github.com/spk121/guile100 from a contribution by Mark H
;;; Weaver.
;;; Code:
(define-module (gash ustar)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-9 gnu)