Add pattern module
* geesh/pattern.scm: New file. * tests/pattern.scm: New file. * Makefile.am: Add them.
This commit is contained in:
parent
da71aed3e3
commit
e1ab2ccd94
|
@ -50,6 +50,7 @@ MODULES = \
|
|||
geesh/eval.scm \
|
||||
geesh/lexer.scm \
|
||||
geesh/parser.scm \
|
||||
geesh/pattern.scm \
|
||||
geesh/repl.scm \
|
||||
geesh/shell.scm \
|
||||
geesh/word.scm
|
||||
|
@ -61,6 +62,7 @@ TESTS = \
|
|||
tests/environment.scm \
|
||||
tests/lexer.scm \
|
||||
tests/parser.scm \
|
||||
tests/pattern.scm \
|
||||
tests/shell.scm \
|
||||
tests/word.scm
|
||||
|
||||
|
|
|
@ -0,0 +1,233 @@
|
|||
(define-module (geesh pattern)
|
||||
#:use-module (ice-9 match)
|
||||
#:use-module (ice-9 receive)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (srfi srfi-26)
|
||||
#:export (parse-pattern
|
||||
pattern-quote
|
||||
pattern-plain?
|
||||
pattern-match?))
|
||||
|
||||
(define* (parse-rdelim s1 s2 #:optional (start 0) (end (string-length s1)))
|
||||
|
||||
(define (not-backslash? chr) (not (char=? chr #\\)))
|
||||
|
||||
(define (escaped? index)
|
||||
(even? (- index (or (string-rindex s1 not-backslash? start index) 0))))
|
||||
|
||||
(let loop ((index (string-contains s1 s2 start end)))
|
||||
(match index
|
||||
(#f (values #f 0))
|
||||
(_ (if (escaped? index)
|
||||
(loop (string-contains s1 s2 (1+ index) end))
|
||||
(values (substring s1 start index) (+ (- index start) 2)))))))
|
||||
|
||||
(define* (parse-collating-symbol s #:optional (start 0)
|
||||
(end (string-length s)))
|
||||
(parse-rdelim s ".]" start end))
|
||||
|
||||
(define* (parse-equivalence-class s #:optional (start 0)
|
||||
(end (string-length s)))
|
||||
(parse-rdelim s "=]" start end))
|
||||
|
||||
(define* (parse-character-class s #:optional (start 0)
|
||||
(end (string-length s)))
|
||||
(parse-rdelim s ":]" start end))
|
||||
|
||||
(define ascii-lower-case
|
||||
(char-set->list (char-set-intersection char-set:ascii
|
||||
char-set:lower-case)))
|
||||
|
||||
(define ascii-upper-case
|
||||
(char-set->list (char-set-intersection char-set:ascii
|
||||
char-set:upper-case)))
|
||||
|
||||
(define ascii-digits
|
||||
(char-set->list (char-set-intersection char-set:ascii
|
||||
char-set:digit)))
|
||||
|
||||
(define* (parse-matching-bracket-expression s #:optional (start 0)
|
||||
(end (string-length s)))
|
||||
(let loop ((i start) (acc '()))
|
||||
(match (and (< i end) (string-ref s i))
|
||||
(#f (values #f 0))
|
||||
(#\] (if (= i start)
|
||||
(loop (1+ i) (cons #\] acc))
|
||||
(values (list->char-set acc) (1+ (- i start)))))
|
||||
(#\[ (match (and (< (1+ i) end) (string-ref s (1+ i)))
|
||||
(#\. (receive (result length)
|
||||
(parse-collating-symbol s (+ i 2) end)
|
||||
(if result
|
||||
(throw 'pattern-collating-symbol)
|
||||
(loop (1+ i) (cons #\[ acc)))))
|
||||
(#\= (receive (result length)
|
||||
(parse-equivalence-class s (+ i 2) end)
|
||||
(if result
|
||||
(throw 'pattern-equivalence-class)
|
||||
(loop (1+ i) (cons #\[ acc)))))
|
||||
(#\: (receive (result length)
|
||||
(parse-character-class s (+ i 2) end)
|
||||
(if result
|
||||
(throw 'pattern-character-class)
|
||||
(loop (1+ i) (cons #\[ acc)))))
|
||||
(_ (loop (1+ i) (cons #\[ acc)))))
|
||||
(#\- (if (or (= i start)
|
||||
(and (< (1+ i) end) (char=? (string-ref s (1+ i)) #\])))
|
||||
(loop (1+ i) (cons #\- acc))
|
||||
(let ((alpha (and (pair? acc) (car acc)))
|
||||
(omega (and (< (1+ i) end) (string-ref s (1+ i)))))
|
||||
(match `(,alpha . ,omega)
|
||||
((#\a . #\z)
|
||||
(loop (+ i 2) (append ascii-lower-case acc)))
|
||||
((#\A . #\Z)
|
||||
(loop (+ i 2) (append ascii-upper-case acc)))
|
||||
((#\0 . #\9)
|
||||
(loop (+ i 2) (append ascii-digits acc)))
|
||||
(_ (throw 'pattern-range-expression))))))
|
||||
(#\\ (if (< (1+ i) end)
|
||||
(loop (+ i 2) (cons (string-ref s (1+ i)) acc))
|
||||
(loop (1+ i) acc)))
|
||||
(chr (loop (1+ i) (cons chr acc))))))
|
||||
|
||||
(define* (parse-bracket-expression s #:optional (start 0)
|
||||
(end (string-length s)))
|
||||
(let* ((matching? (not (char=? (string-ref s start) #\!)))
|
||||
(start* (if matching? start (1+ start))))
|
||||
(receive (result length)
|
||||
(parse-matching-bracket-expression s start* end)
|
||||
(if (or (not result) matching?)
|
||||
(values result length)
|
||||
(values (char-set-complement! result) (1+ length))))))
|
||||
|
||||
(define* (parse-pattern s #:optional (start 0) (end (string-length s)))
|
||||
"Parse the string @var{s} as a pattern."
|
||||
(let loop ((i start) (parts '()) (acc '()))
|
||||
(match (and (< i end) (string-ref s i))
|
||||
(#f (if (null? acc)
|
||||
(reverse! parts)
|
||||
(reverse! (cons (reverse! acc) parts))))
|
||||
(#\* (if (null? acc)
|
||||
(loop (1+ i) (cons '* parts) '())
|
||||
(loop (1+ i) (cons* '* (reverse! acc) parts) '())))
|
||||
(#\? (loop (1+ i) parts (cons '? acc)))
|
||||
(#\[ (if (< (1+ i) end)
|
||||
(receive (result length)
|
||||
(parse-bracket-expression s (1+ i) end)
|
||||
(if result
|
||||
(loop (+ i length 1) parts (cons result acc))
|
||||
(loop (1+ i) parts (cons #\[ acc))))
|
||||
(loop (1+ i) parts (cons #\[ acc))))
|
||||
(#\\ (if (< (1+ i) end)
|
||||
(loop (+ i 2) parts (cons (string-ref s (1+ i)) acc))
|
||||
(loop (1+ i) parts acc)))
|
||||
(chr (loop (1+ i) parts (cons chr acc))))))
|
||||
|
||||
(define pattern-quote
|
||||
(let ((specials '(#\\ #\* #\? #\[ #\] #\! #\-)))
|
||||
(lambda (s)
|
||||
"Quote all the special characters in @var{s} so that none of
|
||||
them are treated specially when @var{s} is interpreted as a pattern."
|
||||
(reverse-list->string
|
||||
(string-fold (lambda (chr acc)
|
||||
(if (member chr specials)
|
||||
(cons* chr #\\ acc)
|
||||
(cons chr acc)))
|
||||
'()
|
||||
s)))))
|
||||
|
||||
(define (pattern-plain? pattern)
|
||||
"Check if @var{pattern} free of special pattern constructions like
|
||||
asterisks and bracket expressions. If a pattern is ``plain'' its
|
||||
source string is the only string that will match it."
|
||||
(every (match-lambda
|
||||
('* #f)
|
||||
(parts (every (match-lambda
|
||||
((or (? char?) (? string?)) #t)
|
||||
(_ #f))
|
||||
parts)))
|
||||
pattern))
|
||||
|
||||
(define* (string-starts-with-part s part #:optional (start 0)
|
||||
(end (string-length s)))
|
||||
(let loop ((part part) (i start))
|
||||
(match (and (< i end) part)
|
||||
(#f (match part
|
||||
(() (- i start))
|
||||
(((? string? s2) . rest)
|
||||
(and (string-null? s2)
|
||||
(loop rest i)))
|
||||
(_ #f)))
|
||||
(() (- i start))
|
||||
(('? . rest) (loop rest (1+ i)))
|
||||
(((? string? s2) . rest)
|
||||
(and (string-prefix? s2 s 0 (string-length s2) i end)
|
||||
(loop rest (+ i (string-length s2)))))
|
||||
(((? char? chr) . rest)
|
||||
(and (char=? (string-ref s i) chr)
|
||||
(loop rest (1+ i))))
|
||||
(((? char-set? cs) . rest)
|
||||
(and (char-set-contains? cs (string-ref s i))
|
||||
(loop rest (1+ i)))))))
|
||||
|
||||
(define (pattern-part-length part)
|
||||
(let loop ((part part) (length 0))
|
||||
(match part
|
||||
(() length)
|
||||
(((? string? str) . rest) (loop rest (+ length (string-length str))))
|
||||
((first . rest) (loop rest (1+ length))))))
|
||||
|
||||
(define* (string-ends-with-part s part #:optional (start 0)
|
||||
(end (string-length s)))
|
||||
(let ((start* (- end (pattern-part-length part))))
|
||||
(and (>= start* start)
|
||||
(string-starts-with-part s part start* end))))
|
||||
|
||||
(define* (string-contains-part s part #:optional (start 0)
|
||||
(end (string-length s)))
|
||||
(let loop ((part part) (i start))
|
||||
(match part
|
||||
(() (cons start (- i start)))
|
||||
(('? . rest) (loop rest (1+ i)))
|
||||
(((? string? s2) . rest)
|
||||
(and=> (string-contains s s2 i end)
|
||||
(lambda (index)
|
||||
(or (and=> (string-starts-with-part s part index end)
|
||||
(cut cons index <>))
|
||||
(loop part (1+ index))))))
|
||||
(((or (? char? cp) (? char-set? cp)) . rest)
|
||||
(and=> (string-index s cp i end)
|
||||
(lambda (index)
|
||||
(or (and=> (string-starts-with-part s part index end)
|
||||
(cut cons index <>))
|
||||
(loop part (1+ index)))))))))
|
||||
|
||||
(define* (pattern-match? pattern str #:optional (start 0)
|
||||
(end (string-length str))
|
||||
#:key explicit-initial-period?)
|
||||
"Check if @var{str} matches @var{pattern}."
|
||||
(if (and explicit-initial-period?
|
||||
(< start end)
|
||||
(char=? (string-ref str start) #\.))
|
||||
(match pattern
|
||||
(((#\. . _) . _)
|
||||
(pattern-match? pattern str start end
|
||||
#:explicit-initial-period? #f))
|
||||
((((? string? s) . _) ._)
|
||||
(and (string-prefix? "." s)
|
||||
(pattern-match? pattern str start end
|
||||
#:explicit-initial-period? #f)))
|
||||
(_ #f))
|
||||
(let loop ((pattern pattern) (i start))
|
||||
(match pattern
|
||||
(() (= i end))
|
||||
(('*) #t)
|
||||
(('* (? pair? part)) (string-ends-with-part str part i end))
|
||||
(('* (? pair? part) . rest)
|
||||
(and=> (string-contains-part str part i end)
|
||||
(match-lambda
|
||||
((match-index . match-length)
|
||||
(loop rest (+ match-index match-length))))))
|
||||
(((? pair? part) . rest)
|
||||
(and=> (string-starts-with-part str part i end)
|
||||
(lambda (length)
|
||||
(loop rest (+ i length)))))))))
|
|
@ -0,0 +1,171 @@
|
|||
;;; The Geesh Shell Interpreter
|
||||
;;; Copyright 2018 Timothy Sample <samplet@ngyro.com>
|
||||
;;;
|
||||
;;; This file is part of Geesh.
|
||||
;;;
|
||||
;;; Geesh is free software: you can redistribute it and/or modify
|
||||
;;; it under the terms of the GNU General Public License as published by
|
||||
;;; the Free Software Foundation, either version 3 of the License, or
|
||||
;;; (at your option) any later version.
|
||||
;;;
|
||||
;;; Geesh is distributed in the hope that it will be useful,
|
||||
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;;; GNU General Public License for more details.
|
||||
;;;
|
||||
;;; You should have received a copy of the GNU General Public License
|
||||
;;; along with Geesh. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
(define-module (test-pattern)
|
||||
#:use-module (geesh pattern)
|
||||
#:use-module (srfi srfi-64)
|
||||
#:use-module (tests automake))
|
||||
|
||||
;;; Commentary:
|
||||
;;;
|
||||
;;; Tests for the pattern module.
|
||||
;;;
|
||||
;;; Code:
|
||||
|
||||
(test-begin "pattern")
|
||||
|
||||
|
||||
;;; Basic matching
|
||||
|
||||
(test-assert "Matches single characters"
|
||||
(pattern-match? (parse-pattern "a") "a"))
|
||||
|
||||
(test-assert "Matches a sequence of characters"
|
||||
(pattern-match? (parse-pattern "abc") "abc"))
|
||||
|
||||
(test-assert "Matches an empty string"
|
||||
(pattern-match? (parse-pattern "") ""))
|
||||
|
||||
(test-assert "Fails on pattern too long"
|
||||
(not (pattern-match? (parse-pattern "ab") "a")))
|
||||
|
||||
(test-assert "Fails on pattern too short"
|
||||
(not (pattern-match? (parse-pattern "a") "ab")))
|
||||
|
||||
|
||||
;;; Question marks
|
||||
|
||||
(test-assert "Matches a question mark with anything"
|
||||
(pattern-match? (parse-pattern "???") "abc"))
|
||||
|
||||
(test-assert "Does not match question mark with empty string"
|
||||
(not (pattern-match? (parse-pattern "?") "")))
|
||||
|
||||
|
||||
;;; Bracket expressions
|
||||
|
||||
(test-assert "Matches with bracket expressions"
|
||||
(pattern-match? (parse-pattern "[abc][def][ghi]") "aei"))
|
||||
|
||||
(test-assert "Fails on non-matching bracket expressions"
|
||||
(not (pattern-match? (parse-pattern "[abc][def][ghi]") "aex")))
|
||||
|
||||
(test-assert "Matches unterminated bracket expression normally"
|
||||
(pattern-match? (parse-pattern "a[bc") "a[bc"))
|
||||
|
||||
(test-assert "Matches with bracket expressions with left brackets"
|
||||
(pattern-match? (parse-pattern "a[[]c") "a[c"))
|
||||
|
||||
(test-assert "Matches with bracket expressions with right brackets"
|
||||
(pattern-match? (parse-pattern "a[]]c") "a]c"))
|
||||
|
||||
(test-assert "Matches with bracket expressions starting with hyphen"
|
||||
(pattern-match? (parse-pattern "a[-b]c") "a-c"))
|
||||
|
||||
(test-assert "Matches with bracket expressions ending with hyphen"
|
||||
(pattern-match? (parse-pattern "a[b-]c") "a-c"))
|
||||
|
||||
(test-assert "Matches with bracket expressions containing bang"
|
||||
(pattern-match? (parse-pattern "a[b!]c") "a!c"))
|
||||
|
||||
(test-assert "Matches with negated bracket expressions"
|
||||
(pattern-match? (parse-pattern "a[!x]c") "abc"))
|
||||
|
||||
(test-assert "Fails on non-matching negated bracket expressions"
|
||||
(not (pattern-match? (parse-pattern "a[!x]c") "axc")))
|
||||
|
||||
;; We do not fully support the following features. However, rather
|
||||
;; than do something strange, we raise explicit errors when they are
|
||||
;; encountered.
|
||||
|
||||
(test-error "Does not allow collating symbols"
|
||||
(parse-pattern "[[.ch.]]"))
|
||||
|
||||
(test-assert "Allows unterminated collating symbols"
|
||||
(parse-pattern "[[.ch]"))
|
||||
|
||||
(test-error "Does not allow equivalence classes"
|
||||
(parse-pattern "[[=a=]]"))
|
||||
|
||||
(test-assert "Allows unterminated equivalence classes"
|
||||
(parse-pattern "[[=a]"))
|
||||
|
||||
(test-error "Does not allow character classes"
|
||||
(parse-pattern "[[:space:]]"))
|
||||
|
||||
(test-assert "Allows unterminated character classes"
|
||||
(parse-pattern "[[:space]"))
|
||||
|
||||
(test-error "Does not allow general character ranges"
|
||||
(parse-pattern "[<->]"))
|
||||
|
||||
(test-assert "Allows [a-z]"
|
||||
(parse-pattern "[a-z]"))
|
||||
|
||||
(test-assert "Allows [A-Z]"
|
||||
(parse-pattern "[A-Z]"))
|
||||
|
||||
(test-assert "Allows [0-9]"
|
||||
(parse-pattern "[0-9]"))
|
||||
|
||||
(test-assert "Matches with allowed character ranges"
|
||||
(pattern-match? (parse-pattern "[a-z][A-Z][0-9]") "mJ2"))
|
||||
|
||||
|
||||
;;; Asterisks
|
||||
|
||||
(test-assert "Matches with asterisk"
|
||||
(pattern-match? (parse-pattern "*") "abc"))
|
||||
|
||||
(test-assert "Matches empty string with asterisk"
|
||||
(pattern-match? (parse-pattern "*") ""))
|
||||
|
||||
(test-assert "Matches with trailing asterisk"
|
||||
(pattern-match? (parse-pattern "foo*") "foobar"))
|
||||
|
||||
(test-assert "Fails on non-matching trailing asterisk"
|
||||
(not (pattern-match? (parse-pattern "foo*") "goobar")))
|
||||
|
||||
(test-assert "Matches with leading asterisk"
|
||||
(pattern-match? (parse-pattern "*bar") "foobar"))
|
||||
|
||||
(test-assert "Fails on non-matching leading asterisk"
|
||||
(not (pattern-match? (parse-pattern "*bar") "foobaz")))
|
||||
|
||||
(test-assert "Fails on non-matching leading asterisk (internal match)"
|
||||
(not (pattern-match? (parse-pattern "*bar") "foobarbaz")))
|
||||
|
||||
(test-assert "Matches with internal asterisk"
|
||||
(pattern-match? (parse-pattern "foo*bar") "foo boo! bar"))
|
||||
|
||||
(test-assert "Fails on non-matching internal asterisk (start)"
|
||||
(not (pattern-match? (parse-pattern "foo*bar") "goo boo! bar")))
|
||||
|
||||
(test-assert "Fails on non-matching internal asterisk (end)"
|
||||
(not (pattern-match? (parse-pattern "foo*bar") "foo boo! baz")))
|
||||
|
||||
|
||||
;;; Quoting
|
||||
|
||||
;; TODO: Test quoting.
|
||||
|
||||
(test-end)
|
||||
|
||||
;; Local Variables:
|
||||
;; eval: (put 'test-error 'scheme-indent-function 1)
|
||||
;; End:
|
Loading…
Reference in New Issue