Fix handling of non-whitespace IFS separators
* geesh/word.scm (string-tokenize*): New function. (split-fields): Use it to handle non-whitespace IFS separators. * tests/word.scm: Add tests.
This commit is contained in:
parent
3e6d7830a9
commit
4b7d2404f3
|
@ -64,11 +64,40 @@ guaranteed to be a list."
|
|||
(else
|
||||
(loop (cdr xs) (cons (car xs) small-acc) big-acc)))))
|
||||
|
||||
(define (string-tokenize* s token-set)
|
||||
"Split the string @var{s} into a list of substrings, where each
|
||||
substring is a maximal non-empty contiguous sequence of characters
|
||||
from the character set @var{token-set} or its compliment."
|
||||
|
||||
(define token-set-complement
|
||||
(let ((token-set* (char-set-complement token-set)))
|
||||
(lambda (cs)
|
||||
(if (eq? cs token-set) token-set* token-set))))
|
||||
|
||||
(let loop ((index 0) (start 0) (cs token-set) (acc '()))
|
||||
(cond
|
||||
((>= index (string-length s))
|
||||
(reverse! (if (> index start)
|
||||
(cons (substring s start index) acc)
|
||||
acc)))
|
||||
((char-set-contains? cs (string-ref s index))
|
||||
(loop (1+ index) start cs acc))
|
||||
(else
|
||||
(loop index index
|
||||
(token-set-complement cs)
|
||||
(if (> index start)
|
||||
(cons (substring s start index) acc)
|
||||
acc))))))
|
||||
|
||||
(define (split-fields qword ifs)
|
||||
"Split @var{qword} into a list of qwords delimited by any character
|
||||
in the string @var{ifs}."
|
||||
|
||||
(define ifs? (cut string-index ifs <>))
|
||||
(define char-set:ifs
|
||||
(string->char-set ifs))
|
||||
|
||||
(define char-set:ifs/nw
|
||||
(char-set-difference char-set:ifs char-set:whitespace))
|
||||
|
||||
(define (wedge-apart-quote qword)
|
||||
(let loop ((qword (normalize-word qword)) (acc '()))
|
||||
|
@ -87,14 +116,27 @@ in the string @var{ifs}."
|
|||
(match qword
|
||||
(('<sh-quote> quote) (wedge-apart-quote quote))
|
||||
(('<sh-at> vals) (apply append (infix '(wedge) (map wedge-apart vals))))
|
||||
("" '())
|
||||
((? string? str)
|
||||
(let ((str-parts (string-split str ifs?)))
|
||||
(if (every string-null? str-parts)
|
||||
'(wedge)
|
||||
(filter (lambda (x)
|
||||
(or (eq? x 'wedge) (not (string-null? x))))
|
||||
(infix 'wedge str-parts)))))
|
||||
(let ((tokens (string-tokenize* str char-set:ifs)))
|
||||
(append-map (lambda (token)
|
||||
(if (string-any char-set:ifs token)
|
||||
;; Every occurrence of a non-whitespace
|
||||
;; separator must delimit a field. This
|
||||
;; means that we have to add a blank field
|
||||
;; for every non-whitespace separator in
|
||||
;; 'token' beyond the first.
|
||||
(let ((count (string-count token char-set:ifs/nw)))
|
||||
(cons 'wedge
|
||||
(append-map (const '("" wedge))
|
||||
(iota (max 0 (- count 1))))))
|
||||
(list token)))
|
||||
;; When a word starts with a non-whitespace
|
||||
;; separator, it still delimits two fields, the
|
||||
;; one on the left being empty.
|
||||
(match tokens
|
||||
(((? (cut string-any char-set:ifs/nw <>)) . rest)
|
||||
(cons "" tokens))
|
||||
(_ tokens)))))
|
||||
(_ (append-map wedge-apart qword))))
|
||||
|
||||
(let ((wedged (wedge-apart qword)))
|
||||
|
|
|
@ -359,7 +359,45 @@ the `set' built-in for details on these options.)"
|
|||
|
||||
|
||||
;;; Field splitting (IFS)
|
||||
;;;
|
||||
;;; FIXME: Test that field splitting respects the IFS variable.
|
||||
|
||||
(test-equal "Respects IFS value"
|
||||
'("foo" "bar")
|
||||
(let ((env (make-test-env '(("IFS" . "-")))))
|
||||
(expand-word env '("foo-bar"))))
|
||||
|
||||
(test-equal "Combines multiple whitespace separators"
|
||||
'("foo" "bar")
|
||||
(let ((env (make-test-env '(("IFS" . " ")))))
|
||||
(expand-word env '("foo bar"))))
|
||||
|
||||
(test-equal "Keeps multiple non-whitespace separators"
|
||||
'("foo" "" "bar")
|
||||
(let ((env (make-test-env '(("IFS" . "-")))))
|
||||
(expand-word env '("foo--bar"))))
|
||||
|
||||
(test-equal "Combines whitespace separators with a non-whitespace separator"
|
||||
'("foo" "bar")
|
||||
(let ((env (make-test-env '(("IFS" . "- ")))))
|
||||
(expand-word env '("foo - bar"))))
|
||||
|
||||
(test-equal "Keeps multiple non-whitespace separators with whitespace"
|
||||
'("foo" "" "bar")
|
||||
(let ((env (make-test-env '(("IFS" . "- ")))))
|
||||
(expand-word env '("foo - - bar"))))
|
||||
|
||||
(test-equal "Splits on leading non-whitespace separator"
|
||||
'("" "foo")
|
||||
(let ((env (make-test-env '(("IFS" . "-")))))
|
||||
(expand-word env '("-foo"))))
|
||||
|
||||
(test-equal "Does not split on trailing non-whitespace separator"
|
||||
'("foo")
|
||||
(let ((env (make-test-env '(("IFS" . "-")))))
|
||||
(expand-word env '("foo-"))))
|
||||
|
||||
(test-equal "Makes one field for single non-whitespace separator"
|
||||
'("")
|
||||
(let ((env (make-test-env '(("IFS" . "-")))))
|
||||
(expand-word env '("-"))))
|
||||
|
||||
(test-end)
|
||||
|
|
Loading…
Reference in New Issue