;;; Gash --- Guile As SHell ;;; Copyright © 2013 Mark H Weaver ;;; Copyright © 2018 Jan (janneke) Nieuwenhuizen ;;; ;;; This file is part of Gash. ;;; ;;; Gash is free software; you can redistribute it and/or modify it ;;; under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 3 of the License, or (at ;;; your option) any later version. ;;; ;;; Gash is distributed in the hope that it will be useful, but ;;; WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with Gash. If not, see . ;;; Commentary: ;;; The initial ustar.scm was taken from the Guile100 challenge ;;; https://github.com/spk121/guile100 from a contribution by Mark H ;;; Weaver. ;;; Code: (define-module (gash ustar) #:use-module (srfi srfi-1) #:use-module (srfi srfi-9 gnu) #:use-module (srfi srfi-26) #:use-module (ice-9 match) #:use-module (ice-9 receive) #:use-module (rnrs bytevectors) #:use-module (rnrs io ports) #:use-module (gash shell-utils) #:export (read-ustar-archive read-ustar-port write-ustar-archive write-ustar-port list-ustar-archive list-ustar-port)) (define (fmt-error fmt . args) (error (apply format #f fmt args))) ;; Like 'string-pad-right', but for bytevectors. However, unlike ;; 'string-pad-right', truncation is not allowed here. (define* (bytevector-pad bv len #:optional (byte 0) (start 0) (end (bytevector-length bv))) (when (< len (- end start)) (fmt-error "bytevector-pad: truncation would occur: len ~a, start ~a, end ~a, bv ~s" len start end bv)) (let ((result (make-bytevector len byte))) (bytevector-copy! bv start result 0 (- end start)) result)) (define (bytevector-append . bvs) (let* ((lengths (map bytevector-length bvs)) (total (fold + 0 lengths)) (result (make-bytevector total))) (fold (lambda (bv len pos) (bytevector-copy! bv 0 result pos len) (+ pos len)) 0 bvs lengths) result)) (define ustar-charset #; (char-set-union (ucs-range->char-set #x20 #x23) (ucs-range->char-set #x25 #x40) (ucs-range->char-set #x41 #x5B) (ucs-range->char-set #x5F #x60) (ucs-range->char-set #x61 #x7B)) char-set:ascii) (define (valid-ustar-char? c) (char-set-contains? ustar-charset c)) (define (ustar-string n str name) (unless (>= n (string-length str)) (fmt-error "~a is too long (max ~a): ~a" name n str)) (unless (string-every valid-ustar-char? str) (fmt-error "~a contains unsupported character(s): ~s in ~s" name (string-filter (negate valid-ustar-char?) str) str)) (bytevector-pad (string->bytevector str (make-transcoder (latin-1-codec))) n)) (define (ustar-0string n str name) (bytevector-pad (ustar-string (- n 1) str name) n)) (define (ustar-number n num name) (unless (and (integer? num) (exact? num) (not (negative? num))) (fmt-error "~a is not a non-negative exact integer: ~a" name num)) (unless (< num (expt 8 (- n 1))) (fmt-error "~a is too large (max ~a): ~a" name (expt 8 (- n 1)) num)) (bytevector-pad (string->bytevector (string-pad (number->string num 8) (- n 1) #\0) (make-transcoder (latin-1-codec))) n)) (define (checksum-bv bv) (let ((len (bytevector-length bv))) (let loop ((i 0) (sum 0)) (if (= i len) sum (loop (+ i 1) (+ sum (bytevector-u8-ref bv i))))))) (define (checksum . bvs) (fold + 0 (map checksum-bv bvs))) (define nuls (make-bytevector 512 0)) ;; read a ustar record of exactly 512 bytes. (define (read-ustar-record port) (get-bytevector-n port 512)) ;; write a ustar record of exactly 512 bytes, starting with the ;; segment of BV between START (inclusive) and END (exclusive), and ;; padded at the end with nuls as needed. (define* (write-ustar-record port bv #:optional (start 0) (end (bytevector-length bv))) (when (< 512 (- end start)) (fmt-error "write-ustar-record: record too long: start ~s, end ~s, bv ~s" start end bv)) ;; We could have used 'bytevector-pad' here, ;; but instead use a method that avoids allocation. (put-bytevector port bv start end) (put-bytevector port nuls 0 (- 512 (- end start)))) ;; write 1024 zero bytes, which indicates the end of a ustar archive. (define (write-ustar-footer port) (put-bytevector port nuls) (put-bytevector port nuls)) (define (compose-path-name dir name) (if (or (string-null? dir) (file-name-separator? (string-ref dir (- (string-length dir) 1)))) (string-append dir name) (string-append dir "/" name))) ;; Like 'call-with-port', but also closes PORT if an error occurs. (define (call-with-port* port proc) (dynamic-wind (lambda () #f) (lambda () (proc port)) (lambda () (close port)))) (define (call-with-dirstream* dirstream proc) (dynamic-wind (lambda () #f) (lambda () (proc dirstream)) (lambda () (closedir dirstream)))) (define (files-in-directory dir) (call-with-dirstream* (opendir dir) (lambda (dirstream) (let loop ((files '())) (let ((name (readdir dirstream))) (cond ((eof-object? name) (reverse files)) ((member name '("." "..")) (loop files)) (else (loop (cons (compose-path-name dir name) files))))))))) ;; split the path into prefix and name fields for purposes of the ;; ustar header. If the entire path fits in the name field (100 chars ;; max), then leave the prefix empty. Otherwise, try to put the last ;; component into the name field and everything else into the prefix ;; field (155 chars max). If that fails, put as much as possible into ;; the prefix and the rest into the name field. This follows the ;; behavior of GNU tar when creating a ustar archive. (define (ustar-path-name-split path orig-path) (define (too-long) (fmt-error "~a: file name too long" orig-path)) (let ((len (string-length path))) (cond ((<= len 100) (values "" path)) ((> len 256) (too-long)) ((string-rindex path file-name-separator? (- len 101) (min (- len 1) 156)) => (lambda (i) (values (substring path 0 i) (substring path (+ i 1) len)))) (else (too-long))))) (define (bv->ustar-string bv name) (string-trim-right (bv->ustar-0string bv name) (compose zero? char->integer))) (define (bv->ustar-number bv name) (let ((string (bv->ustar-string bv name))) (or (string->number string 8) 0))) (define (bv->ustar-0string bv name) (bytevector->string bv (make-transcoder (latin-1-codec)))) (define-immutable-record-type (make-ustar-header name mode uid gid size mtime checksum ;; space type-flag link-name magic version uname gname dev-major dev-minor prefix) ustar-header? (name ustar-header-name ) (mode ustar-header-mode ) (uid ustar-header-uid ) (gid ustar-header-gid ) (size ustar-header-size ) (mtime ustar-header-mtime ) (checksum ustar-header-checksum ) ;;(space ustar-header-space ) (type-flag ustar-header-type-flag) (link-name ustar-header-link-name) (magic ustar-header-magic ) (version ustar-header-version ) (uname ustar-header-uname ) (gname ustar-header-gname ) (dev-major ustar-header-dev-major) (dev-minor ustar-header-dev-minor) (prefix ustar-header-prefix )) (define (ustar-header-type header) (let ((file-types #(regular - symlink char-special block-special directory fifo)) (type (string->number (ustar-header-type-flag header)))) (when (or (not type) (< type 0) (>= type (vector-length file-types))) (fmt-error "~a: unsupported file type ~a" (ustar-header-file-name header) type)) (vector-ref file-types (string->number (ustar-header-type-flag header))))) (define ustar-header-field-size-alist '((name . 100) (mode . 8) (uid . 8) (gid . 8) (size . 12) (mtime . 12) (checksum . 7) (space . 1) (type-flag . 1) (link-name . 100) (magic . 6) (version . 2) (uname . 32) (gname . 32) (dev-major . 8) (dev-minor . 8) (prefix . 155))) (define (ustar-footer? bv) (every zero? (array->list bv))) (define (sub-bytevector bv offset size) (let ((sub (make-bytevector size))) (bytevector-copy! bv offset sub 0 size) sub)) (define (read-ustar-header port) (define offset (let ((offset 0)) (lambda (. args) (if (null? args) offset (let ((n (car args))) (set! offset (+ offset n)) n))))) (let ((%record (read-ustar-record port))) (and (not (eof-object? %record)) (not (ustar-footer? %record)) (let* ((field-bv-alist `((dummy-checksum . ,(string->utf8 " ")) ,@(map (match-lambda ((field . size) (cons field (sub-bytevector %record (offset) (offset size))))) ustar-header-field-size-alist))) (checksum-fields '(name mode uid gid size mtime dummy-checksum type-flag link-name magic version uname gname dev-major dev-minor prefix)) (checksum (apply checksum (map (cut assoc-ref field-bv-alist <>) checksum-fields))) (header (make-ustar-header (bv->ustar-string (assoc-ref field-bv-alist 'name ) "file name" ) (bv->ustar-number (assoc-ref field-bv-alist 'mode ) "file mode" ) (bv->ustar-number (assoc-ref field-bv-alist 'uid ) "user id" ) (bv->ustar-number (assoc-ref field-bv-alist 'gid ) "group id" ) (bv->ustar-number (assoc-ref field-bv-alist 'size ) "file size" ) (bv->ustar-number (assoc-ref field-bv-alist 'mtime ) "modification time") (bv->ustar-number (assoc-ref field-bv-alist 'checksum ) "checksum" ) ;; (bv->ustar-string (assoc-ref field-bv-alist 'space ) "space" ) (bv->ustar-string (assoc-ref field-bv-alist 'type-flag) "type flag" ) (bv->ustar-string (assoc-ref field-bv-alist 'link-name) "link name" ) (bv->ustar-string (assoc-ref field-bv-alist 'magic ) "magic field" ) (bv->ustar-string (assoc-ref field-bv-alist 'version ) "version number" ) (bv->ustar-string (assoc-ref field-bv-alist 'uname ) "user name" ) (bv->ustar-string (assoc-ref field-bv-alist 'gname ) "group name" ) (bv->ustar-number (assoc-ref field-bv-alist 'dev-major) "dev major" ) (bv->ustar-number (assoc-ref field-bv-alist 'dev-minor) "dev minor" ) (bv->ustar-string (assoc-ref field-bv-alist 'prefix ) "directory name" )))) (when (not (= (ustar-header-checksum header) checksum)) (error "checksum mismatch, expected: ~s, got: ~s\n" (ustar-header-checksum header) checksum)) header)))) (define* (write-ustar-header port path st #:key group mtime numeric-owner? owner) (let* ((type (stat:type st)) (perms (stat:perms st)) (mtime (or mtime (stat:mtime st))) (uid (or owner (stat:uid st))) (gid (or group (stat:gid st))) (uname (or (false-if-exception (passwd:name (getpwuid uid))) "")) (gname (or (false-if-exception (group:name (getgrgid gid))) "")) (size (case type ((regular) (stat:size st)) (else 0))) (type-flag (case type ((regular) "0") ((symlink) "2") ((char-special) "3") ((block-special) "4") ((directory) "5") ((fifo) "6") (else (fmt-error "~a: unsupported file type ~a" path type)))) (link-name (case type ((symlink) (readlink path)) (else ""))) (dev-major (case type ((char-special block-special) (quotient (stat:rdev st) 256)) (else 0))) (dev-minor (case type ((char-special block-special) (remainder (stat:rdev st) 256)) (else 0))) ;; Convert file name separators to slashes. (slash-path (string-map (lambda (c) (if (file-name-separator? c) #\/ c)) path)) ;; Make the path name relative. ;; TODO: handle drive letters on windows. (relative-path (if (string-every #\/ slash-path) "." (string-trim slash-path #\/))) ;; If it's a directory, add a trailing slash, ;; otherwise remove trailing slashes. (full-path (case type ((directory) (string-append relative-path "/")) (else (string-trim-right relative-path #\/))))) (receive (prefix name) (ustar-path-name-split full-path path) (let* ((%name (ustar-string 100 name "file name")) (%mode (ustar-number 8 perms "file mode")) (%uid (ustar-number 8 uid "user id")) (%gid (ustar-number 8 gid "group id")) (%size (ustar-number 12 size "file size")) (%mtime (ustar-number 12 mtime "modification time")) (%type-flag (ustar-string 1 type-flag "type flag")) (%link-name (ustar-string 100 link-name "link name")) (%magic (ustar-0string 6 "ustar" "magic field")) (%version (ustar-string 2 "00" "version number")) (%uname (ustar-0string 32 uname "user name")) (%gname (ustar-0string 32 gname "group name")) (%dev-major (ustar-number 8 dev-major "dev major")) (%dev-minor (ustar-number 8 dev-minor "dev minor")) (%prefix (ustar-string 155 prefix "directory name")) (%dummy-checksum (string->utf8 " ")) (%checksum (bytevector-append (ustar-number 7 (checksum %name %mode %uid %gid %size %mtime %dummy-checksum %type-flag %link-name %magic %version %uname %gname %dev-major %dev-minor %prefix) "checksum") (string->utf8 " ")))) (write-ustar-record port (bytevector-append %name %mode %uid %gid %size %mtime %checksum %type-flag %link-name %magic %version %uname %gname %dev-major %dev-minor %prefix)))))) (define* (write-ustar-file port file-name #:key group mtime numeric-owner? owner sort-order verbosity) (let* ((file-name (if (string-every file-name-separator? file-name) file-name-separator-string (string-trim-right file-name file-name-separator?))) (st (lstat file-name)) (type (stat:type st)) (size (stat:size st))) (unless (zero? verbosity) (if (> verbosity 1) (display-file file-name st) (display file-name)) (newline)) (write-ustar-header port file-name st #:group group #:mtime mtime #:numeric-owner? numeric-owner? #:owner owner) (case type ((regular) (call-with-port* (open-file file-name "rb") (lambda (in) (let ((buf (make-bytevector 512))) (let loop ((left size)) (when (positive? left) (let* ((asked (min left 512)) (obtained (get-bytevector-n! in buf 0 asked))) (when (or (eof-object? obtained) (< obtained asked)) (fmt-error "~a: file appears to have shrunk" file-name)) (write-ustar-record port buf 0 obtained) (loop (- left obtained))))))))) ((directory) (let* ((files (files-in-directory file-name)) (files (if (eq? sort-order 'name) (sort files string<) files))) (for-each (lambda (file-name) (write-ustar-file port file-name #:group group #:mtime mtime #:numeric-owner? numeric-owner? #:owner owner #:verbosity verbosity)) files)))))) (define* (ustar-header-file-name header #:key (strip 0)) (let* ((name (ustar-header-name header)) (prefix (ustar-header-prefix header)) (file-name (if (string-null? prefix) name (string-append prefix "/" name)))) (if (zero? strip) file-name (string-join (list-tail (string-split file-name #\/) strip) "/")))) (define* (read-ustar-file port header #:key (extract? #t) (strip 0)) (let* ((size (ustar-header-size header)) (file-name (ustar-header-file-name header #:strip strip)) (dir (dirname file-name)) (extract? (and extract? (not (string-null? file-name)))) (thunk (lambda _ (set-port-encoding! (current-output-port) "ISO-8859-1") ; bootstrap-guile uses default UTF-8 (let loop ((read 0)) (and (< read size) (let ((record (read-ustar-record port))) (and record (let* ((read (+ read 512)) (block (if (< read size) record (sub-bytevector record 0 (- size -512 read))))) (when extract? (display (bv->ustar-0string block "block"))) (loop read))))))))) (when extract? (mkdir-p dir)) (if extract? (let ((mtime (ustar-header-mtime header))) (case (ustar-header-type header) ((regular) (if (file-exists? file-name) (delete-file file-name)) (with-output-to-file file-name thunk #:binary #t) (utime file-name mtime mtime) (chmod file-name (ustar-header-mode header))) ((directory) (mkdir-p file-name) (utime file-name mtime mtime)) ((symlink) (symlink (ustar-header-link-name header) file-name )))) (thunk)))) (define (ustar-header->stat header) (let* ((stat-size 17) (si (list->vector (iota stat-size))) (st (make-vector stat-size 0))) (vector-set! st (stat:mode si) (ustar-header-mode header)) (vector-set! st (stat:uid si) (ustar-header-uid header)) (vector-set! st (stat:gid si) (ustar-header-gid header)) (vector-set! st (stat:size si) (ustar-header-size header)) (vector-set! st (stat:mtime si) (ustar-header-mtime header)) (vector-set! st (stat:type si) (ustar-header-type header)) st)) (define* (display-header header #:key verbose?) (let ((file-name (ustar-header-file-name header))) (if verbose? (display-file (ustar-header-file-name header) (ustar-header->stat header)) (display file-name)) (newline))) (define* (write-ustar-port out files #:key group mtime numeric-owner? owner sort-order verbosity) (for-each (cut write-ustar-file out <> #:group group #:mtime mtime #:numeric-owner? numeric-owner? #:owner owner #:sort-order sort-order #:verbosity verbosity) files) (write-ustar-footer out)) (define* (write-ustar-archive file-name files #:key group mtime numeric-owner? owner sort-order verbosity) (catch #t (lambda _ (call-with-port* (open-file file-name "wb") (cut write-ustar-port <> files #:group group #:mtime mtime #:numeric-owner? numeric-owner? #:owner owner #:sort-order sort-order #:verbosity verbosity))) (lambda (key subr message args . rest) (false-if-exception (delete-file file-name)) (format (current-error-port) "ERROR: ~a\n" (apply format #f message args)) (exit 1)))) (define* (read-ustar-port in files #:key (extract? #t) (strip 0) verbosity) (let ((dirs (let loop ((header (read-ustar-header in)) (dirs '())) (if (not (and header (not (eof-object? header)))) dirs (begin (unless (zero? verbosity) (display-header header #:verbose? (> verbosity 1))) (read-ustar-file in header #:extract? extract? #:strip strip) (loop (read-ustar-header in) (if (eq? (ustar-header-type header) 'directory) (cons header dirs) dirs))))))) (define (chmod-header header) (chmod (ustar-header-file-name header #:strip strip) (ustar-header-mode header))) (for-each chmod-header dirs))) (define* (read-ustar-archive file-name files #:key (extract? #t) (strip 0) verbosity) (catch #t (lambda _ (call-with-port* (open-file file-name "rb") (cut read-ustar-port <> files #:extract? extract? #:strip strip #:verbosity verbosity))) (lambda (key subr message args . rest) (format (current-error-port) "ERROR: ~a\n" (apply format #f message args)) (exit 1)))) (define* (list-ustar-archive file-name files #:key (strip 0) verbosity) (read-ustar-archive file-name files #:extract? #f #:strip strip #:verbosity verbosity)) (define* (list-ustar-port in files #:key (strip 0) verbosity) (read-ustar-port in files #:extract? #f #:strip strip #:verbosity verbosity)) ;;; Local Variables: ;;; mode: scheme ;;; eval: (put 'call-with-port* 'scheme-indent-function 1) ;;; eval: (put 'call-with-dirstream* 'scheme-indent-function 1) ;;; End: