318 lines
12 KiB
Scheme
318 lines
12 KiB
Scheme
;;; Gash -- Guile As SHell
|
|
;;; Copyright © 2018, 2019 Timothy Sample <samplet@ngyro.com>
|
|
;;;
|
|
;;; This file is part of Gash.
|
|
;;;
|
|
;;; Gash is free software: you can redistribute it and/or modify
|
|
;;; it under the terms of the GNU General Public License as published by
|
|
;;; the Free Software Foundation, either version 3 of the License, or
|
|
;;; (at your option) any later version.
|
|
;;;
|
|
;;; Gash is distributed in the hope that it will be useful,
|
|
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
;;; GNU General Public License for more details.
|
|
;;;
|
|
;;; You should have received a copy of the GNU General Public License
|
|
;;; along with Gash. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
(define-module (gash pattern)
|
|
#:use-module (ice-9 match)
|
|
#:use-module (ice-9 receive)
|
|
#:use-module (srfi srfi-1)
|
|
#:use-module (srfi srfi-9 gnu)
|
|
#:use-module (srfi srfi-14)
|
|
#:use-module (srfi srfi-26)
|
|
#:use-module (gash compat srfi-43)
|
|
#:export (parse-pattern
|
|
pattern-quote
|
|
pattern-null?
|
|
pattern-plain?
|
|
pattern-match?
|
|
pattern-drop
|
|
pattern-drop-right))
|
|
|
|
(cond-expand
|
|
(mes
|
|
;; Mes does not allow a range for 'string->list'. We use this so
|
|
;; infrequently, that we just define a new procedure for when we
|
|
;; need it.
|
|
(define (substring->list string start end)
|
|
(string->list (substring string start end))))
|
|
(else
|
|
(define substring->list string->list)))
|
|
|
|
(define-immutable-record-type <pattern>
|
|
(make-pattern parts)
|
|
pattern?
|
|
(parts pattern-parts))
|
|
|
|
(define* (parse-rdelim s1 s2 #:optional (start 0) (end (string-length s1)))
|
|
|
|
(define (not-backslash? chr) (not (char=? chr #\\)))
|
|
|
|
(define (escaped? index)
|
|
(even? (- index (or (string-rindex s1 not-backslash? start index) 0))))
|
|
|
|
(let loop ((index (string-contains s1 s2 start end)))
|
|
(match index
|
|
(#f (values #f 0))
|
|
(_ (if (escaped? index)
|
|
(loop (string-contains s1 s2 (1+ index) end))
|
|
(values (substring s1 start index) (+ (- index start) 2)))))))
|
|
|
|
(define* (parse-collating-symbol s #:optional (start 0)
|
|
(end (string-length s)))
|
|
(parse-rdelim s ".]" start end))
|
|
|
|
(define* (parse-equivalence-class s #:optional (start 0)
|
|
(end (string-length s)))
|
|
(parse-rdelim s "=]" start end))
|
|
|
|
(define* (parse-character-class s #:optional (start 0)
|
|
(end (string-length s)))
|
|
(parse-rdelim s ":]" start end))
|
|
|
|
(define character-range
|
|
(let ((lower "abcdefghijklmnopqrstuvwxyz")
|
|
(upper "ABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
|
(digits "0123456789"))
|
|
(lambda (start end)
|
|
(let loop ((strs (list lower upper digits)))
|
|
(match strs
|
|
(() #f)
|
|
((str . tail)
|
|
(match (string-index str start)
|
|
(#f (loop tail))
|
|
(sindex
|
|
(match (string-index str end)
|
|
(#f (loop tail))
|
|
(eindex (if (<= sindex eindex)
|
|
(substring->list str sindex (1+ eindex))
|
|
(loop tail))))))))))))
|
|
|
|
(define* (parse-matching-bracket-expression s #:optional (start 0)
|
|
(end (string-length s)))
|
|
(let loop ((i start) (acc '()) (errors '()))
|
|
(match (and (< i end) (string-ref s i))
|
|
(#f (values #f 0))
|
|
(#\] (if (= i start)
|
|
(loop (1+ i) (cons #\] acc) errors)
|
|
(match errors
|
|
(() (values (list->char-set acc) (1+ (- i start))))
|
|
(_ (throw (last errors))))))
|
|
(#\[ (match (and (< (1+ i) end) (string-ref s (1+ i)))
|
|
(#\. (receive (result length)
|
|
(parse-collating-symbol s (+ i 2) end)
|
|
(if result
|
|
(loop (+ i length 1) acc
|
|
(cons 'pattern-collating-symbol errors))
|
|
(loop (1+ i) (cons #\[ acc) errors))))
|
|
(#\= (receive (result length)
|
|
(parse-equivalence-class s (+ i 2) end)
|
|
(if result
|
|
(loop (+ i length 1) acc
|
|
(cons 'pattern-equivalence-class errors))
|
|
(loop (1+ i) (cons #\[ acc) errors))))
|
|
(#\: (receive (result length)
|
|
(parse-character-class s (+ i 2) end)
|
|
(if result
|
|
(loop (+ i length 1) acc
|
|
(cons 'pattern-character-class errors))
|
|
(loop (1+ i) (cons #\[ acc) errors))))
|
|
(_ (loop (1+ i) (cons #\[ acc) errors))))
|
|
(#\- (if (or (= i start)
|
|
(and (< (1+ i) end) (char=? (string-ref s (1+ i)) #\])))
|
|
(loop (1+ i) (cons #\- acc) errors)
|
|
(let ((alpha (and (pair? acc) (car acc)))
|
|
;; XXX: Escaped range end?
|
|
(omega (and (< (1+ i) end) (string-ref s (1+ i)))))
|
|
(match (character-range alpha omega)
|
|
(#f (loop (+ i 2) acc
|
|
(cons 'pattern-range-expression errors)))
|
|
(chrs (loop (+ i 2) (append chrs acc) errors))))))
|
|
(#\\ (if (< (1+ i) end)
|
|
(loop (+ i 2) (cons (string-ref s (1+ i)) acc) errors)
|
|
(loop (1+ i) acc errors)))
|
|
(chr (loop (1+ i) (cons chr acc) errors)))))
|
|
|
|
(define* (parse-bracket-expression s #:optional (start 0)
|
|
(end (string-length s)))
|
|
(let* ((matching? (not (char=? (string-ref s start) #\!)))
|
|
(start* (if matching? start (1+ start))))
|
|
(receive (result length)
|
|
(parse-matching-bracket-expression s start* end)
|
|
(if (or (not result) matching?)
|
|
(values result length)
|
|
(values (char-set-complement! result) (1+ length))))))
|
|
|
|
(define* (parse-part s #:optional (start 0) (end (string-length s)))
|
|
(let loop ((i start) (acc '()))
|
|
(match (and (< i end) (string-ref s i))
|
|
(#f (match acc
|
|
(() (values #f 0))
|
|
(_ (values (list->vector (reverse! acc)) (- i start)))))
|
|
(#\* (match acc
|
|
(() (values '* 1))
|
|
(_ (values (list->vector (reverse! acc)) (- i start)))))
|
|
(#\? (loop (1+ i) (cons char-set:full acc)))
|
|
(#\[ (if (< (1+ i) end)
|
|
(receive (result length)
|
|
(parse-bracket-expression s (1+ i) end)
|
|
(if result
|
|
(loop (+ i length 1) (cons result acc))
|
|
(loop (1+ i) (cons #\[ acc))))
|
|
(loop (1+ i) (cons #\[ acc))))
|
|
(#\\ (if (< (1+ i) end)
|
|
(loop (+ i 2) (cons (string-ref s (1+ i)) acc))
|
|
(loop (1+ i) acc)))
|
|
(chr (loop (1+ i) (cons chr acc))))))
|
|
|
|
(define* (parse-pattern s #:optional (start 0) (end (string-length s)))
|
|
"Parse the string @var{s} as a pattern."
|
|
(let loop ((i start) (parts '()))
|
|
(receive (part length) (parse-part s i end)
|
|
(match part
|
|
(#f (make-pattern (reverse! parts)))
|
|
('* (match parts
|
|
(() (loop (+ i length) (list #() #())))
|
|
((#() . _) (loop (+ i length) parts))
|
|
(_ (loop (+ i length) (cons #() parts)))))
|
|
(_ (match parts
|
|
((#() . rest) (loop (+ i length) (cons part rest)))
|
|
(_ (loop (+ i length) (cons part parts)))))))))
|
|
|
|
(define pattern-quote
|
|
(let ((specials '(#\\ #\* #\? #\[ #\] #\! #\-)))
|
|
(lambda (s)
|
|
"Quote all the special characters in @var{s} so that none of
|
|
them are treated specially when @var{s} is interpreted as a pattern."
|
|
(reverse-list->string
|
|
(string-fold (lambda (chr acc)
|
|
(if (member chr specials)
|
|
(cons* chr #\\ acc)
|
|
(cons chr acc)))
|
|
'()
|
|
s)))))
|
|
|
|
(define (pattern-null? pattern)
|
|
(match (pattern-parts pattern)
|
|
(() #t)
|
|
((part) (vector-empty? part))
|
|
(_ #f)))
|
|
|
|
(define (pattern-plain? pattern)
|
|
"Check if @var{pattern} free of special pattern constructions like
|
|
asterisks and bracket expressions. If a pattern is ``plain'' its
|
|
source string is the only string that will match it."
|
|
(match (pattern-parts pattern)
|
|
(() #t)
|
|
((part) (vector-every char? part))
|
|
(_ #f)))
|
|
|
|
(define* (string-starts-with-part s part #:optional (start 0)
|
|
(end (string-length s)))
|
|
(and (<= (vector-length part) (- end start))
|
|
(let loop ((i 0) (j start))
|
|
(match (and (< i (vector-length part)) (vector-ref part i))
|
|
(#f #t)
|
|
((? char? chr)
|
|
(and (char=? (string-ref s j) chr)
|
|
(loop (1+ i) (1+ j))))
|
|
((? char-set? cs)
|
|
(and (char-set-contains? cs (string-ref s j))
|
|
(loop (1+ i) (1+ j))))))))
|
|
|
|
(define* (string-ends-with-part s part #:optional (start 0)
|
|
(end (string-length s)))
|
|
(let ((start* (- end (vector-length part))))
|
|
(and (>= start* start)
|
|
(string-starts-with-part s part start* end))))
|
|
|
|
(define* (string-contains-part s part #:optional (start 0)
|
|
(end (string-length s)))
|
|
(let loop ((i start))
|
|
(cond
|
|
((>= i end) #f)
|
|
((string-starts-with-part s part i end) i)
|
|
(else (loop (1+ i))))))
|
|
|
|
(define* (string-contains-part-right s part #:optional (start 0)
|
|
(end (string-length s)))
|
|
(let loop ((i end))
|
|
(cond
|
|
((< i start) #f)
|
|
((string-ends-with-part s part start i) (- i (vector-length part)))
|
|
(else (loop (1- i))))))
|
|
|
|
(define* (pattern-match? pattern str #:key explicit-initial-period?)
|
|
"Check if @var{str} matches @var{pattern}."
|
|
|
|
;; XXX: These used to be optional arguments, but Mes handles mixing
|
|
;; optional and keyword arguments differently than Guile.
|
|
(define start 0)
|
|
(define end (string-length str))
|
|
|
|
(define (parts-match? parts start)
|
|
(match parts
|
|
(() (= start end))
|
|
((part) (string-ends-with-part str part start end))
|
|
((part . rest)
|
|
(and=> (string-contains-part str part start end)
|
|
(lambda (m)
|
|
(let ((start* (+ m (vector-length part))))
|
|
(parts-match? rest start*)))))))
|
|
|
|
(let ((parts (pattern-parts pattern)))
|
|
(if (and explicit-initial-period?
|
|
(< start end)
|
|
(char=? (string-ref str start) #\.))
|
|
(match parts
|
|
((#(#\. _ ...) . _)
|
|
(pattern-match? pattern str #:explicit-initial-period? #f))
|
|
(_ #f))
|
|
(match parts
|
|
(() (= start end))
|
|
((part . rest)
|
|
(and (string-starts-with-part str part start end)
|
|
(let ((start* (+ start (vector-length part))))
|
|
(parts-match? rest start*))))))))
|
|
|
|
(define* (pattern-drop pattern str #:key greedy?)
|
|
(define (match-parts parts i)
|
|
(match parts
|
|
(() (substring str i))
|
|
((part . rest)
|
|
(match (if (and greedy? (null? rest))
|
|
(string-contains-part-right str part i)
|
|
(string-contains-part str part i))
|
|
(#f str)
|
|
(m (match-parts rest (+ m (vector-length part))))))))
|
|
|
|
(match (pattern-parts pattern)
|
|
(() str)
|
|
((part . rest)
|
|
(if (string-starts-with-part str part)
|
|
(match-parts rest (vector-length part))
|
|
str))))
|
|
|
|
(define* (pattern-drop-right pattern str #:key greedy?)
|
|
(define (match-parts parts i)
|
|
(match parts
|
|
(() (substring str 0 i))
|
|
((part . rest)
|
|
(match (if (and greedy? (null? rest))
|
|
(string-contains-part str part 0 i)
|
|
(string-contains-part-right str part 0 i))
|
|
(#f str)
|
|
(m (match-parts rest m))))))
|
|
|
|
(let ((strap (reverse (pattern-parts pattern))))
|
|
(match strap
|
|
(() str)
|
|
((part . rest)
|
|
(if (string-ends-with-part str part)
|
|
(let ((i (- (string-length str) (vector-length part))))
|
|
(match-parts rest i))
|
|
str)))))
|