gash/gash/pattern.scm

254 lines
10 KiB
Scheme

;;; Gash -- Guile As SHell
;;; Copyright © 2018 Timothy Sample <samplet@ngyro.com>
;;;
;;; This file is part of Gash.
;;;
;;; Gash is free software: you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License as published by
;;; the Free Software Foundation, either version 3 of the License, or
;;; (at your option) any later version.
;;;
;;; Gash is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with Gash. If not, see <http://www.gnu.org/licenses/>.
(define-module (gash pattern)
#:use-module (ice-9 match)
#:use-module (ice-9 receive)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-26)
#:export (parse-pattern
pattern-quote
pattern-plain?
pattern-match?))
(define* (parse-rdelim s1 s2 #:optional (start 0) (end (string-length s1)))
(define (not-backslash? chr) (not (char=? chr #\\)))
(define (escaped? index)
(even? (- index (or (string-rindex s1 not-backslash? start index) 0))))
(let loop ((index (string-contains s1 s2 start end)))
(match index
(#f (values #f 0))
(_ (if (escaped? index)
(loop (string-contains s1 s2 (1+ index) end))
(values (substring s1 start index) (+ (- index start) 2)))))))
(define* (parse-collating-symbol s #:optional (start 0)
(end (string-length s)))
(parse-rdelim s ".]" start end))
(define* (parse-equivalence-class s #:optional (start 0)
(end (string-length s)))
(parse-rdelim s "=]" start end))
(define* (parse-character-class s #:optional (start 0)
(end (string-length s)))
(parse-rdelim s ":]" start end))
(define character-range
(let ((lower "abcdefghijklmnopqrstuvwxyz")
(upper "ABCDEFGHIJKLMNOPQRSTUVWXYZ")
(digits "0123456789"))
(lambda (start end)
(let loop ((strs (list lower upper digits)))
(match strs
(() #f)
((str . tail)
(match (string-index str start)
(#f (loop tail))
(sindex
(match (string-index str end)
(#f (loop tail))
(eindex (if (<= sindex eindex)
(string->list str sindex (1+ eindex))
(loop tail))))))))))))
(define* (parse-matching-bracket-expression s #:optional (start 0)
(end (string-length s)))
(let loop ((i start) (acc '()))
(match (and (< i end) (string-ref s i))
(#f (values #f 0))
(#\] (if (= i start)
(loop (1+ i) (cons #\] acc))
(values (list->char-set acc) (1+ (- i start)))))
(#\[ (match (and (< (1+ i) end) (string-ref s (1+ i)))
(#\. (receive (result length)
(parse-collating-symbol s (+ i 2) end)
(if result
(throw 'pattern-collating-symbol)
(loop (1+ i) (cons #\[ acc)))))
(#\= (receive (result length)
(parse-equivalence-class s (+ i 2) end)
(if result
(throw 'pattern-equivalence-class)
(loop (1+ i) (cons #\[ acc)))))
(#\: (receive (result length)
(parse-character-class s (+ i 2) end)
(if result
(throw 'pattern-character-class)
(loop (1+ i) (cons #\[ acc)))))
(_ (loop (1+ i) (cons #\[ acc)))))
(#\- (if (or (= i start)
(and (< (1+ i) end) (char=? (string-ref s (1+ i)) #\])))
(loop (1+ i) (cons #\- acc))
(let ((alpha (and (pair? acc) (car acc)))
;; XXX: Escaped range end?
(omega (and (< (1+ i) end) (string-ref s (1+ i)))))
(match (character-range alpha omega)
(#f (throw 'pattern-range-expression))
(chrs (loop (+ i 2) (append chrs acc)))))))
(#\\ (if (< (1+ i) end)
(loop (+ i 2) (cons (string-ref s (1+ i)) acc))
(loop (1+ i) acc)))
(chr (loop (1+ i) (cons chr acc))))))
(define* (parse-bracket-expression s #:optional (start 0)
(end (string-length s)))
(let* ((matching? (not (char=? (string-ref s start) #\!)))
(start* (if matching? start (1+ start))))
(receive (result length)
(parse-matching-bracket-expression s start* end)
(if (or (not result) matching?)
(values result length)
(values (char-set-complement! result) (1+ length))))))
(define* (parse-pattern s #:optional (start 0) (end (string-length s)))
"Parse the string @var{s} as a pattern."
(let loop ((i start) (parts '()) (acc '()))
(match (and (< i end) (string-ref s i))
(#f (if (null? acc)
(reverse! parts)
(reverse! (cons (reverse! acc) parts))))
(#\* (if (null? acc)
(loop (1+ i) (cons '* parts) '())
(loop (1+ i) (cons* '* (reverse! acc) parts) '())))
(#\? (loop (1+ i) parts (cons '? acc)))
(#\[ (if (< (1+ i) end)
(receive (result length)
(parse-bracket-expression s (1+ i) end)
(if result
(loop (+ i length 1) parts (cons result acc))
(loop (1+ i) parts (cons #\[ acc))))
(loop (1+ i) parts (cons #\[ acc))))
(#\\ (if (< (1+ i) end)
(loop (+ i 2) parts (cons (string-ref s (1+ i)) acc))
(loop (1+ i) parts acc)))
(chr (loop (1+ i) parts (cons chr acc))))))
(define pattern-quote
(let ((specials '(#\\ #\* #\? #\[ #\] #\! #\-)))
(lambda (s)
"Quote all the special characters in @var{s} so that none of
them are treated specially when @var{s} is interpreted as a pattern."
(reverse-list->string
(string-fold (lambda (chr acc)
(if (member chr specials)
(cons* chr #\\ acc)
(cons chr acc)))
'()
s)))))
(define (pattern-plain? pattern)
"Check if @var{pattern} free of special pattern constructions like
asterisks and bracket expressions. If a pattern is ``plain'' its
source string is the only string that will match it."
(every (match-lambda
('* #f)
(parts (every (match-lambda
((or (? char?) (? string?)) #t)
(_ #f))
parts)))
pattern))
(define* (string-starts-with-part s part #:optional (start 0)
(end (string-length s)))
(let loop ((part part) (i start))
(match (and (< i end) part)
(#f (match part
(() (- i start))
(((? string? s2) . rest)
(and (string-null? s2)
(loop rest i)))
(_ #f)))
(() (- i start))
(('? . rest) (loop rest (1+ i)))
(((? string? s2) . rest)
(and (string-prefix? s2 s 0 (string-length s2) i end)
(loop rest (+ i (string-length s2)))))
(((? char? chr) . rest)
(and (char=? (string-ref s i) chr)
(loop rest (1+ i))))
(((? char-set? cs) . rest)
(and (char-set-contains? cs (string-ref s i))
(loop rest (1+ i)))))))
(define (pattern-part-length part)
(let loop ((part part) (length 0))
(match part
(() length)
(((? string? str) . rest) (loop rest (+ length (string-length str))))
((first . rest) (loop rest (1+ length))))))
(define* (string-ends-with-part s part #:optional (start 0)
(end (string-length s)))
(let ((start* (- end (pattern-part-length part))))
(and (>= start* start)
(string-starts-with-part s part start* end))))
(define* (string-contains-part s part #:optional (start 0)
(end (string-length s)))
(let loop ((part part) (i start))
(match part
(() (cons start (- i start)))
(('? . rest) (loop rest (1+ i)))
(((? string? s2) . rest)
(and=> (string-contains s s2 i end)
(lambda (index)
(or (and=> (string-starts-with-part s part index end)
(cut cons index <>))
(loop part (1+ index))))))
(((or (? char? cp) (? char-set? cp)) . rest)
(and=> (string-index s cp i end)
(lambda (index)
(or (and=> (string-starts-with-part s part index end)
(cut cons index <>))
(loop part (1+ index)))))))))
(define* (pattern-match? pattern str #:optional (start 0)
(end (string-length str))
#:key explicit-initial-period?)
"Check if @var{str} matches @var{pattern}."
(if (and explicit-initial-period?
(< start end)
(char=? (string-ref str start) #\.))
(match pattern
(((#\. . _) . _)
(pattern-match? pattern str start end
#:explicit-initial-period? #f))
((((? string? s) . _) ._)
(and (string-prefix? "." s)
(pattern-match? pattern str start end
#:explicit-initial-period? #f)))
(_ #f))
(let loop ((pattern pattern) (i start))
(match pattern
(() (= i end))
(('*) #t)
(('* (? pair? part)) (string-ends-with-part str part i end))
(('* (? pair? part) . rest)
(and=> (string-contains-part str part i end)
(match-lambda
((match-index . match-length)
(loop rest (+ match-index match-length))))))
(((? pair? part) . rest)
(and=> (string-starts-with-part str part i end)
(lambda (length)
(loop rest (+ i length)))))))))