;;; Gash -- Guile As SHell ;;; Copyright © 2018, 2019 Timothy Sample ;;; ;;; This file is part of Gash. ;;; ;;; Gash is free software: you can redistribute it and/or modify ;;; it under the terms of the GNU General Public License as published by ;;; the Free Software Foundation, either version 3 of the License, or ;;; (at your option) any later version. ;;; ;;; Gash is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with Gash. If not, see . (define-module (gash pattern) #:use-module (ice-9 match) #:use-module (ice-9 receive) #:use-module (srfi srfi-1) #:use-module (srfi srfi-9 gnu) #:use-module (srfi srfi-26) #:use-module (gash compat srfi-43) #:export (parse-pattern pattern-quote pattern-null? pattern-plain? pattern-match? pattern-drop pattern-drop-right)) (define-immutable-record-type (make-pattern parts) pattern? (parts pattern-parts)) (define* (parse-rdelim s1 s2 #:optional (start 0) (end (string-length s1))) (define (not-backslash? chr) (not (char=? chr #\\))) (define (escaped? index) (even? (- index (or (string-rindex s1 not-backslash? start index) 0)))) (let loop ((index (string-contains s1 s2 start end))) (match index (#f (values #f 0)) (_ (if (escaped? index) (loop (string-contains s1 s2 (1+ index) end)) (values (substring s1 start index) (+ (- index start) 2))))))) (define* (parse-collating-symbol s #:optional (start 0) (end (string-length s))) (parse-rdelim s ".]" start end)) (define* (parse-equivalence-class s #:optional (start 0) (end (string-length s))) (parse-rdelim s "=]" start end)) (define* (parse-character-class s #:optional (start 0) (end (string-length s))) (parse-rdelim s ":]" start end)) (define character-range (let ((lower "abcdefghijklmnopqrstuvwxyz") (upper "ABCDEFGHIJKLMNOPQRSTUVWXYZ") (digits "0123456789")) (lambda (start end) (let loop ((strs (list lower upper digits))) (match strs (() #f) ((str . tail) (match (string-index str start) (#f (loop tail)) (sindex (match (string-index str end) (#f (loop tail)) (eindex (if (<= sindex eindex) (string->list str sindex (1+ eindex)) (loop tail)))))))))))) (define* (parse-matching-bracket-expression s #:optional (start 0) (end (string-length s))) (let loop ((i start) (acc '()) (errors '())) (match (and (< i end) (string-ref s i)) (#f (values #f 0)) (#\] (if (= i start) (loop (1+ i) (cons #\] acc) errors) (match errors (() (values (list->char-set acc) (1+ (- i start)))) (_ (throw (last errors)))))) (#\[ (match (and (< (1+ i) end) (string-ref s (1+ i))) (#\. (receive (result length) (parse-collating-symbol s (+ i 2) end) (if result (loop (+ i length 1) acc (cons 'pattern-collating-symbol errors)) (loop (1+ i) (cons #\[ acc) errors)))) (#\= (receive (result length) (parse-equivalence-class s (+ i 2) end) (if result (loop (+ i length 1) acc (cons 'pattern-equivalence-class errors)) (loop (1+ i) (cons #\[ acc) errors)))) (#\: (receive (result length) (parse-character-class s (+ i 2) end) (if result (loop (+ i length 1) acc (cons 'pattern-character-class errors)) (loop (1+ i) (cons #\[ acc) errors)))) (_ (loop (1+ i) (cons #\[ acc) errors)))) (#\- (if (or (= i start) (and (< (1+ i) end) (char=? (string-ref s (1+ i)) #\]))) (loop (1+ i) (cons #\- acc) errors) (let ((alpha (and (pair? acc) (car acc))) ;; XXX: Escaped range end? (omega (and (< (1+ i) end) (string-ref s (1+ i))))) (match (character-range alpha omega) (#f (loop (+ i 2) acc (cons 'pattern-range-expression errors))) (chrs (loop (+ i 2) (append chrs acc) errors)))))) (#\\ (if (< (1+ i) end) (loop (+ i 2) (cons (string-ref s (1+ i)) acc) errors) (loop (1+ i) acc errors))) (chr (loop (1+ i) (cons chr acc) errors))))) (define* (parse-bracket-expression s #:optional (start 0) (end (string-length s))) (let* ((matching? (not (char=? (string-ref s start) #\!))) (start* (if matching? start (1+ start)))) (receive (result length) (parse-matching-bracket-expression s start* end) (if (or (not result) matching?) (values result length) (values (char-set-complement! result) (1+ length)))))) (define* (parse-part s #:optional (start 0) (end (string-length s))) (let loop ((i start) (acc '())) (match (and (< i end) (string-ref s i)) (#f (match acc (() (values #f 0)) (_ (values (list->vector (reverse! acc)) (- i start))))) (#\* (match acc (() (values '* 1)) (_ (values (list->vector (reverse! acc)) (- i start))))) (#\? (loop (1+ i) (cons char-set:full acc))) (#\[ (if (< (1+ i) end) (receive (result length) (parse-bracket-expression s (1+ i) end) (if result (loop (+ i length 1) (cons result acc)) (loop (1+ i) (cons #\[ acc)))) (loop (1+ i) (cons #\[ acc)))) (#\\ (if (< (1+ i) end) (loop (+ i 2) (cons (string-ref s (1+ i)) acc)) (loop (1+ i) acc))) (chr (loop (1+ i) (cons chr acc)))))) (define* (parse-pattern s #:optional (start 0) (end (string-length s))) "Parse the string @var{s} as a pattern." (let loop ((i start) (parts '())) (receive (part length) (parse-part s i end) (match part (#f (make-pattern (reverse! parts))) ('* (match parts (() (loop (+ i length) (list #() #()))) ((#() . _) (loop (+ i length) parts)) (_ (loop (+ i length) (cons #() parts))))) (_ (match parts ((#() . rest) (loop (+ i length) (cons part rest))) (_ (loop (+ i length) (cons part parts))))))))) (define pattern-quote (let ((specials '(#\\ #\* #\? #\[ #\] #\! #\-))) (lambda (s) "Quote all the special characters in @var{s} so that none of them are treated specially when @var{s} is interpreted as a pattern." (reverse-list->string (string-fold (lambda (chr acc) (if (member chr specials) (cons* chr #\\ acc) (cons chr acc))) '() s))))) (define (pattern-null? pattern) (match (pattern-parts pattern) (() #t) ((part) (vector-empty? part)) (_ #f))) (define (pattern-plain? pattern) "Check if @var{pattern} free of special pattern constructions like asterisks and bracket expressions. If a pattern is ``plain'' its source string is the only string that will match it." (match (pattern-parts pattern) (() #t) ((part) (vector-every char? part)) (_ #f))) (define* (string-starts-with-part s part #:optional (start 0) (end (string-length s))) (and (<= (vector-length part) (- end start)) (let loop ((i 0) (j start)) (match (and (< i (vector-length part)) (vector-ref part i)) (#f #t) ((? char? chr) (and (char=? (string-ref s j) chr) (loop (1+ i) (1+ j)))) ((? char-set? cs) (and (char-set-contains? cs (string-ref s j)) (loop (1+ i) (1+ j)))))))) (define* (string-ends-with-part s part #:optional (start 0) (end (string-length s))) (let ((start* (- end (vector-length part)))) (and (>= start* start) (string-starts-with-part s part start* end)))) (define* (string-contains-part s part #:optional (start 0) (end (string-length s))) (let loop ((i start)) (cond ((>= i end) #f) ((string-starts-with-part s part i end) i) (else (loop (1+ i)))))) (define* (string-contains-part-right s part #:optional (start 0) (end (string-length s))) (let loop ((i end)) (cond ((< i start) #f) ((string-ends-with-part s part start i) (- i (vector-length part))) (else (loop (1- i)))))) (define* (pattern-match? pattern str #:optional (start 0) (end (string-length str)) #:key explicit-initial-period?) "Check if @var{str} matches @var{pattern}." (define (parts-match? parts start) (match parts (() (= start end)) ((part) (string-ends-with-part str part start end)) ((part . rest) (and=> (string-contains-part str part start end) (lambda (m) (let ((start* (+ m (vector-length part)))) (parts-match? rest start*))))))) (match-let ((($ parts) pattern)) (if (and explicit-initial-period? (< start end) (char=? (string-ref str start) #\.)) (match parts ((#(#\. _ ...) . _) (pattern-match? pattern str start end #:explicit-initial-period? #f)) (_ #f)) (match parts (() (= start end)) ((part . rest) (and (string-starts-with-part str part start end) (let ((start* (+ start (vector-length part)))) (parts-match? rest start*)))))))) (define* (pattern-drop pattern str #:key greedy?) (define (match-parts parts i) (match parts (() (substring str i)) ((part . rest) (match (if (and greedy? (null? rest)) (string-contains-part-right str part i) (string-contains-part str part i)) (#f str) (m (match-parts rest (+ m (vector-length part)))))))) (match-let ((($ parts) pattern)) (match parts (() str) ((part . rest) (if (string-starts-with-part str part) (match-parts rest (vector-length part)) str))))) (define* (pattern-drop-right pattern str #:key greedy?) (define (match-parts parts i) (match parts (() (substring str 0 i)) ((part . rest) (match (if (and greedy? (null? rest)) (string-contains-part str part 0 i) (string-contains-part-right str part 0 i)) (#f str) (m (match-parts rest m)))))) (match-let ((($ parts) pattern)) (let ((strap (reverse parts))) (match strap (() str) ((part . rest) (if (string-ends-with-part str part) (let ((i (- (string-length str) (vector-length part)))) (match-parts rest i)) str))))))