bournish: Add compress, from Guile100 challenge by Daniel Hartwig. WIP
This commit is contained in:
parent
8aad645152
commit
975f53d98e
|
@ -1,6 +1,7 @@
|
|||
*.go
|
||||
*~
|
||||
/bin/cat
|
||||
/bin/compress
|
||||
/bin/cp
|
||||
/bin/find
|
||||
/bin/gash
|
||||
|
|
|
@ -42,12 +42,14 @@ SCM_FILES="
|
|||
gash/bournish-commands.scm
|
||||
gash/guix-build-utils.scm
|
||||
gash/builtins.scm
|
||||
gash/compress.scm
|
||||
gash/config.scm
|
||||
gash/environment.scm
|
||||
gash/geesh.scm
|
||||
gash/gash.scm
|
||||
gash/io.scm
|
||||
gash/job.scm
|
||||
gash/lzw.scm
|
||||
gash/peg.scm
|
||||
gash/pipe.scm
|
||||
gash/script.scm
|
||||
|
@ -68,6 +70,7 @@ done
|
|||
|
||||
SCRIPTS="
|
||||
bin/cat
|
||||
bin/compress
|
||||
bin/cp
|
||||
bin/find
|
||||
bin/gash
|
||||
|
|
|
@ -36,6 +36,7 @@ sed \
|
|||
chmod +x bin/gash
|
||||
BUILTINS="
|
||||
cat
|
||||
compress
|
||||
cp
|
||||
find
|
||||
grep
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#:use-module (srfi srfi-26)
|
||||
|
||||
#:use-module (gash guix-build-utils)
|
||||
#:use-module (gash compress)
|
||||
#:use-module (gash config)
|
||||
#:use-module (gash io)
|
||||
#:use-module (gash ustar)
|
||||
|
@ -439,15 +440,50 @@ Usage: tar [OPTION]... [FILE]...
|
|||
(list?
|
||||
(list-ustar-archive file files #:verbosity (1+ verbosity)))))))
|
||||
|
||||
(define (compress-command . args)
|
||||
(lambda _
|
||||
(let* ((option-spec
|
||||
'((bits (single-char #\b) (value #t))
|
||||
(decompress (single-char #\d))
|
||||
(help (single-char #\h))
|
||||
(stdout (single-char #\c))
|
||||
(verbose (single-char #\v))
|
||||
(version (single-char #\V))))
|
||||
(args (cons "compress" args))
|
||||
(options (getopt-long args option-spec))
|
||||
(bits (string->number (option-ref options 'bits "16")))
|
||||
(decompress? (option-ref options 'decompress #f))
|
||||
(stdout? (option-ref options 'stdout #f))
|
||||
(files (option-ref options '() '()))
|
||||
(help? (option-ref options 'help #f))
|
||||
(usage? (and (not help?) (or (and (null? files) (not stdout?)))))
|
||||
(verbose? (option-ref options 'verbose #f))
|
||||
(version? (option-ref options 'version #f)))
|
||||
(cond ((or help? usage?) (format (if usage? (current-error-port) #t)
|
||||
"\
|
||||
Usage: compress [OPTION]... [FILE]...
|
||||
-b, --bits=BITS use a maximum of BITS bits per code [16]
|
||||
-c, --stdout write on standard output, keep original files unchanged
|
||||
-d, --decompress decompress
|
||||
-h, --help display this help
|
||||
-v, --verbose show compression ratio
|
||||
-V, --version display version
|
||||
")
|
||||
(exit (if usage? 2 0)))
|
||||
(version? (format #t "compress (GASH) ~a\n" %version) (exit 0))
|
||||
(decompress? (uncompress-file (car files) verbose?))
|
||||
(else (compress-file (car files) bits verbose?))))))
|
||||
|
||||
(define %bournish-commands
|
||||
`(
|
||||
("cat" . ,cat-command)
|
||||
("cp" . ,cp-command)
|
||||
("find" . ,find-command)
|
||||
("grep" . ,grep-command)
|
||||
("ls" . ,ls-command)
|
||||
("reboot" . ,reboot-command)
|
||||
("tar" . ,tar-command)
|
||||
("wc" . ,wc-command)
|
||||
("which" . ,which-command)
|
||||
("cat" . ,cat-command)
|
||||
("compress" . ,compress-command)
|
||||
("cp" . ,cp-command)
|
||||
("find" . ,find-command)
|
||||
("grep" . ,grep-command)
|
||||
("ls" . ,ls-command)
|
||||
("reboot" . ,reboot-command)
|
||||
("tar" . ,tar-command)
|
||||
("wc" . ,wc-command)
|
||||
("which" . ,which-command)
|
||||
))
|
||||
|
|
|
@ -0,0 +1,158 @@
|
|||
;;; Gash --- Guile As SHell
|
||||
;;; Copyright © 2013 Daniel Hartwig <mandyke@gmail.com>
|
||||
;;; Copyright © 2018 Jan (janneke) Nieuwenhuizen <janneke@gnu.org>
|
||||
;;;
|
||||
;;; This file is part of Gash.
|
||||
;;;
|
||||
;;; Gash is free software; you can redistribute it and/or modify it
|
||||
;;; under the terms of the GNU General Public License as published by
|
||||
;;; the Free Software Foundation; either version 3 of the License, or (at
|
||||
;;; your option) any later version.
|
||||
;;;
|
||||
;;; Gash is distributed in the hope that it will be useful, but
|
||||
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;;; GNU General Public License for more details.
|
||||
;;;
|
||||
;;; You should have received a copy of the GNU General Public License
|
||||
;;; along with Gash. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
;;; Commentary:
|
||||
|
||||
;;; The initial lzw.scm was taken from the Guile100 challenge
|
||||
;;; https://github.com/spk121/guile100 from a contribution by Daniel
|
||||
;;; Hartwig.
|
||||
|
||||
;;; Code:
|
||||
|
||||
(define-module (gash compress)
|
||||
#:use-module (gash lzw)
|
||||
#:use-module (ice-9 control)
|
||||
#:use-module (ice-9 format)
|
||||
#:use-module (ice-9 match)
|
||||
#:use-module (rnrs bytevectors)
|
||||
#:use-module (rnrs io ports)
|
||||
#:use-module (srfi srfi-37)
|
||||
#:export (compress-file
|
||||
compress-port
|
||||
uncompress-file
|
||||
uncompress-port))
|
||||
|
||||
(define *program-name* "compress (GASH)")
|
||||
|
||||
(define (_ msg . rest)
|
||||
(cons msg rest))
|
||||
|
||||
(define (error* status msg . args)
|
||||
(force-output)
|
||||
(let ((port (current-error-port)))
|
||||
(when *program-name*
|
||||
(display *program-name* port)
|
||||
(display ": " port))
|
||||
(apply format port msg args)
|
||||
(newline port)
|
||||
(unless (zero? status)
|
||||
;; This call to 'abort' causes 'main' to immediately return the
|
||||
;; specified status value. Similar to 'exit' but more
|
||||
;; controlled, for example, when using the REPL to debug,
|
||||
;; 'abort' will not cause the entire process to terminate.
|
||||
;;
|
||||
;; This is also handy to attempt processing every file, even
|
||||
;; after an error has occured. To do this, establish another
|
||||
;; prompt at an interesting place inside 'main'.
|
||||
(abort (lambda (k)
|
||||
status)))))
|
||||
|
||||
(define (make-file-error-handler filename)
|
||||
(lambda args
|
||||
(error* 1 (_ "~a: ~a")
|
||||
filename
|
||||
(strerror (system-error-errno args)))))
|
||||
|
||||
(define (system-error-handler key subr msg args rest)
|
||||
(apply error* 1 msg args))
|
||||
|
||||
(define (compression-ratio nbytes-in nbytes-out)
|
||||
(exact->inexact (/ (- nbytes-in nbytes-out) nbytes-in)))
|
||||
|
||||
(define (write-lzw-header port bits)
|
||||
(put-bytevector port (u8-list->bytevector (list #x1F #x9D bits))))
|
||||
|
||||
(define (compress-port in out bits verbose?)
|
||||
#;
|
||||
(begin
|
||||
(write-lzw-header out bits)
|
||||
(%lzw-compress (cute get-u8 in)
|
||||
(cute put-u16 out <>)
|
||||
eof-object?
|
||||
(expt 2 bits)))
|
||||
(let* ((in-bv (get-bytevector-all in))
|
||||
(out-bv (lzw-compress in-bv #:table-size (expt 2 bits))))
|
||||
(write-lzw-header out bits)
|
||||
(put-bytevector out out-bv)))
|
||||
|
||||
(define (compress-file infile bits verbose?)
|
||||
(catch 'system-error
|
||||
(lambda ()
|
||||
(let ((outfile (string-append infile ".Z")))
|
||||
(when (string-suffix? ".Z" infile)
|
||||
(error* 1 (_ "~a: already has .Z suffix") infile))
|
||||
(when (file-exists? outfile)
|
||||
(error* 1 (_ "~a: already exists") outfile))
|
||||
(let ((in (open-file infile "rb"))
|
||||
(out (open-file outfile "wb")))
|
||||
;; TODO: Keep original files ownership, modes, and access
|
||||
;; and modification times.
|
||||
(compress-port in out bits verbose?)
|
||||
(when verbose?
|
||||
(format #; (current-error-port)
|
||||
(current-output-port)
|
||||
(_ "~a: compression: ~1,2h%\n") ; '~h' is localized '~f'.
|
||||
infile
|
||||
(* 100 (compression-ratio (port-position in)
|
||||
(port-position out)))))
|
||||
(for-each close-port (list in out))
|
||||
(delete-file infile))))
|
||||
system-error-handler))
|
||||
|
||||
(define (read-lzw-header port)
|
||||
(match (bytevector->u8-list (get-bytevector-n port 3))
|
||||
((#x1F #x9D bits)
|
||||
(and (<= 9 bits 16)
|
||||
(values bits)))
|
||||
(x #f)))
|
||||
|
||||
(define (uncompress-port in out verbose?)
|
||||
(let ((bits (read-lzw-header in)))
|
||||
(unless bits
|
||||
(error* 1 (_ "incorrect header")))
|
||||
#;
|
||||
(%lzw-uncompress (cute get-u16 in)
|
||||
(cute put-u8 out <>)
|
||||
eof-object?
|
||||
(expt 2 bits))
|
||||
(let* ((in-bv (get-bytevector-all in))
|
||||
(out-bv (lzw-uncompress in-bv #:table-size (expt 2 bits))))
|
||||
(put-bytevector out out-bv))))
|
||||
|
||||
(define (uncompress-file infile verbose?)
|
||||
(catch 'system-error
|
||||
(lambda ()
|
||||
(let ((outfile (string-drop-right infile 2)))
|
||||
(when (not (string-suffix? ".Z" infile))
|
||||
(error* 1 (_ "~a: does not have .Z suffix") infile))
|
||||
(when (file-exists? outfile)
|
||||
(error* 1 (_ "~a: already exists") outfile))
|
||||
(let ((in (open-file infile "rb"))
|
||||
(out (open-file outfile "wb")))
|
||||
(uncompress-port in out verbose?)
|
||||
(when verbose?
|
||||
(format #; (current-error-port)
|
||||
(current-output-port)
|
||||
(_ "~a: compression: ~1,2h%\n") ; '~h is localized '~f'.
|
||||
infile
|
||||
(* 100 (compression-ratio (port-position out)
|
||||
(port-position in)))))
|
||||
(for-each close-port (list in out))
|
||||
(delete-file infile))))
|
||||
system-error-handler))
|
|
@ -0,0 +1,151 @@
|
|||
;;; Gash --- Guile As SHell
|
||||
;;; Copyright © 2013 Daniel Hartwig <mandyke@gmail.com>
|
||||
;;;
|
||||
;;; This file is part of Gash.
|
||||
;;;
|
||||
;;; Gash is free software; you can redistribute it and/or modify it
|
||||
;;; under the terms of the GNU General Public License as published by
|
||||
;;; the Free Software Foundation; either version 3 of the License, or (at
|
||||
;;; your option) any later version.
|
||||
;;;
|
||||
;;; Gash is distributed in the hope that it will be useful, but
|
||||
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;;; GNU General Public License for more details.
|
||||
;;;
|
||||
;;; You should have received a copy of the GNU General Public License
|
||||
;;; along with Gash. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
;;; Commentary:
|
||||
|
||||
;;; The initial lzw.scm was taken from the Guile100 challenge
|
||||
;;; https://github.com/spk121/guile100 from a contribution by Daniel
|
||||
;;; Hartwig.
|
||||
|
||||
;;; Code:
|
||||
|
||||
(define-module (gash lzw)
|
||||
#:use-module (rnrs bytevectors)
|
||||
#:use-module (rnrs io ports)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (srfi srfi-26)
|
||||
#:export (lzw-compress
|
||||
lzw-uncompress
|
||||
%lzw-compress
|
||||
%lzw-uncompress))
|
||||
|
||||
;; This procedure adapted from an example in the Guile Reference
|
||||
;; Manual.
|
||||
(define (make-serial-number-generator start end)
|
||||
(let ((current-serial-number (- start 1)))
|
||||
(lambda ()
|
||||
(and (< current-serial-number end)
|
||||
(set! current-serial-number (+ current-serial-number 1))
|
||||
current-serial-number))))
|
||||
|
||||
(define (put-u16 port k)
|
||||
;; Little endian.
|
||||
(put-u8 port (logand k #xFF))
|
||||
(put-u8 port (logand (ash k -8) #xFF)))
|
||||
|
||||
(define (get-u16 port)
|
||||
;; Little endian. Order of evaluation is important, use 'let*'.
|
||||
(let* ((a (get-u8 port))
|
||||
(b (get-u8 port)))
|
||||
(if (any eof-object? (list a b))
|
||||
(eof-object)
|
||||
(logior a (ash b 8)))))
|
||||
|
||||
(define (%lzw-compress in out done? table-size)
|
||||
(let ((codes (make-hash-table table-size))
|
||||
(next-code (make-serial-number-generator 0 table-size))
|
||||
(universe (iota 256))
|
||||
(eof-code #f))
|
||||
;; Populate the initial dictionary with all one-element strings
|
||||
;; from the universe.
|
||||
(for-each (lambda (obj)
|
||||
(hash-set! codes (list obj) (next-code)))
|
||||
universe)
|
||||
(set! eof-code (next-code))
|
||||
(let loop ((cs '()))
|
||||
(let ((c (in)))
|
||||
(cond ((done? c)
|
||||
(unless (null? cs)
|
||||
(out (hash-ref codes cs)))
|
||||
(out eof-code)
|
||||
(values codes))
|
||||
((hash-ref codes (cons c cs))
|
||||
(loop (cons c cs)))
|
||||
(else
|
||||
(and=> (next-code)
|
||||
(cut hash-set! codes (cons c cs) <>))
|
||||
(out (hash-ref codes cs))
|
||||
(loop (cons c '()))))))))
|
||||
|
||||
(define (ensure-bv-input-port bv-or-port)
|
||||
(cond ((port? bv-or-port)
|
||||
bv-or-port)
|
||||
((bytevector? bv-or-port)
|
||||
(open-bytevector-input-port bv-or-port))
|
||||
(else
|
||||
(scm-error 'wrong-type-arg "ensure-bv-input-port"
|
||||
"Wrong type argument in position ~a: ~s"
|
||||
(list 1 bv-or-port) (list bv-or-port)))))
|
||||
|
||||
(define (for-each-right proc lst)
|
||||
(let loop ((lst lst))
|
||||
(unless (null? lst)
|
||||
(loop (cdr lst))
|
||||
(proc (car lst)))))
|
||||
|
||||
(define (%lzw-uncompress in out done? table-size)
|
||||
(let ((strings (make-hash-table table-size))
|
||||
(next-code (make-serial-number-generator 0 table-size))
|
||||
(universe (iota 256))
|
||||
(eof-code #f))
|
||||
(for-each (lambda (obj)
|
||||
(hash-set! strings (next-code) (list obj)))
|
||||
universe)
|
||||
(set! eof-code (next-code))
|
||||
(let loop ((previous-string '()))
|
||||
(let ((code (in)))
|
||||
(unless (or (done? code)
|
||||
(= code eof-code))
|
||||
(unless (hash-ref strings code)
|
||||
(hash-set! strings
|
||||
code
|
||||
(cons (last previous-string) previous-string)))
|
||||
(for-each-right out
|
||||
(hash-ref strings code))
|
||||
(let ((cs (hash-ref strings code)))
|
||||
(and=> (and (not (null? previous-string))
|
||||
(next-code))
|
||||
(cut hash-set! strings <> (cons (last cs)
|
||||
previous-string)))
|
||||
(loop cs)))))))
|
||||
|
||||
(define* (lzw-compress bv #:key (table-size 65536) dictionary)
|
||||
(call-with-values
|
||||
(lambda ()
|
||||
(open-bytevector-output-port))
|
||||
(lambda (output-port get-result)
|
||||
(let ((dict (%lzw-compress (cute get-u8 (ensure-bv-input-port bv))
|
||||
(cute put-u16 output-port <>)
|
||||
eof-object?
|
||||
table-size)))
|
||||
(if dictionary
|
||||
(values (get-result) dict)
|
||||
(get-result))))))
|
||||
|
||||
(define* (lzw-uncompress bv #:key (table-size 65536) dictionary)
|
||||
(call-with-values
|
||||
(lambda ()
|
||||
(open-bytevector-output-port))
|
||||
(lambda (output-port get-result)
|
||||
(let ((dict (%lzw-uncompress (cute get-u16 (open-bytevector-input-port bv))
|
||||
(cute put-u8 output-port <>)
|
||||
eof-object?
|
||||
table-size)))
|
||||
(if dictionary
|
||||
(values (get-result) dict)
|
||||
(get-result))))))
|
|
@ -17,6 +17,14 @@
|
|||
;;; You should have received a copy of the GNU General Public License
|
||||
;;; along with Gash. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
;;; Commentary:
|
||||
|
||||
;;; The initial ustar.scm was taken from the Guile100 challenge
|
||||
;;; https://github.com/spk121/guile100 from a contribution by Mark H
|
||||
;;; Weaver.
|
||||
|
||||
;;; Code:
|
||||
|
||||
(define-module (gash ustar)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (srfi srfi-9 gnu)
|
||||
|
|
Loading…
Reference in New Issue