From 94643f8361f00205a972e7f82078b5f241e60010 Mon Sep 17 00:00:00 2001 From: "Jan (janneke) Nieuwenhuizen" Date: Wed, 30 Dec 2020 17:28:21 +0100 Subject: [PATCH] mes: Remove PEG. PEG is not used and mostly broken. * mes/module/mes/peg.mes, mes/module/mes/peg/cache.scm, mes/module/mes/peg/codegen.scm, mes/module/mes/peg/simplify-tree.scm, mes/module/mes/peg/string-peg.scm, mes/module/mes/peg/using-parsers.scm, tests/peg.test: Remove. * AUTHORS: Remove mention. --- AUTHORS | 3 - mes/module/mes/peg.mes | 41 --- mes/module/mes/peg/cache.scm | 47 ---- mes/module/mes/peg/codegen.scm | 361 --------------------------- mes/module/mes/peg/simplify-tree.scm | 100 -------- mes/module/mes/peg/string-peg.scm | 275 -------------------- mes/module/mes/peg/using-parsers.scm | 118 --------- tests/peg.test | 71 ------ 8 files changed, 1016 deletions(-) delete mode 100644 mes/module/mes/peg.mes delete mode 100644 mes/module/mes/peg/cache.scm delete mode 100644 mes/module/mes/peg/codegen.scm delete mode 100644 mes/module/mes/peg/simplify-tree.scm delete mode 100644 mes/module/mes/peg/string-peg.scm delete mode 100644 mes/module/mes/peg/using-parsers.scm delete mode 100755 tests/peg.test diff --git a/AUTHORS b/AUTHORS index c69e5e50..22a328b3 100644 --- a/AUTHORS +++ b/AUTHORS @@ -56,9 +56,6 @@ module/mes/getopt-long.scm Optargs from Guile mes/module/mes/optargs.scm -PEG from Guile -mes/module/mes/peg/ - Pmatch from Guile mes/module/system/base/pmatch.scm diff --git a/mes/module/mes/peg.mes b/mes/module/mes/peg.mes deleted file mode 100644 index cf12e019..00000000 --- a/mes/module/mes/peg.mes +++ /dev/null @@ -1,41 +0,0 @@ -;;; -*-scheme-*- - -;;; GNU Mes --- Maxwell Equations of Software -;;; Copyright © 2016,2018 Jan (janneke) Nieuwenhuizen -;;; -;;; This file is part of GNU Mes. -;;; -;;; GNU Mes is free software; you can redistribute it and/or modify it -;;; under the terms of the GNU General Public License as published by -;;; the Free Software Foundation; either version 3 of the License, or (at -;;; your option) any later version. -;;; -;;; GNU Mes is distributed in the hope that it will be useful, but -;;; WITHOUT ANY WARRANTY; without even the implied warranty of -;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;;; GNU General Public License for more details. -;;; -;;; You should have received a copy of the GNU General Public License -;;; along with GNU Mes. If not, see . - -;;; Commentary: - -;;; peg.mes is loaded after syntax-case: psyntax. It provides PEG -;;; from Guile-2.1. - -;;; Code: - -(mes-use-module (mes let)) -(mes-use-module (mes scm)) -(mes-use-module (mes guile)) -(mes-use-module (mes pretty-print)) -(mes-use-module (mes psyntax)) -(mes-use-module (srfi srfi-13)) -;;(mes-use-module (srfi srfi-9-psyntax)) -;;(mes-use-module (srfi srfi-9)) -(mes-use-module (mes pmatch)) -(include-from-path "mes/peg/cache.scm") -(include-from-path "mes/peg/codegen.scm") -(include-from-path "mes/peg/string-peg.scm") -(include-from-path "mes/peg/using-parsers.scm") -(include-from-path "mes/peg/simplify-tree.scm") diff --git a/mes/module/mes/peg/cache.scm b/mes/module/mes/peg/cache.scm deleted file mode 100644 index 27ce7564..00000000 --- a/mes/module/mes/peg/cache.scm +++ /dev/null @@ -1,47 +0,0 @@ -;;; GNU Mes --- Maxwell Equations of Software -;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc. -;;; -;;; This file is part of GNU Mes. -;;; -;;; GNU Mes is free software; you can redistribute it and/or modify it -;;; under the terms of the GNU General Public License as published by -;;; the Free Software Foundation; either version 3 of the License, or (at -;;; your option) any later version. -;;; -;;; GNU Mes is distributed in the hope that it will be useful, but -;;; WITHOUT ANY WARRANTY; without even the implied warranty of -;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;;; GNU General Public License for more details. -;;; -;;; You should have received a copy of the GNU General Public License -;;; along with GNU Mes. If not, see . - -;;; Taken from GNU Guile -;;; cache.scm --- cache the results of parsing - -(define-module (ice-9 peg cache) - #:export (cg-cached-parser)) - -;; The results of parsing using a nonterminal are cached. Think of it like a -;; hash with no conflict resolution. Process for deciding on the cache size -;; wasn't very scientific; just ran the benchmarks and stopped a little after -;; the point of diminishing returns on my box. -(define *cache-size* 512) - -(define (make-cache) - (make-vector *cache-size* #f)) - -;; given a syntax object which is a parser function, returns syntax -;; which, if evaluated, will become a parser function that uses a cache. -(define (cg-cached-parser parser) - #`(let ((cache (make-cache))) - (lambda (str strlen at) - (let* ((vref (vector-ref cache (modulo at *cache-size*)))) - ;; Check to see whether the value is cached. - (if (and vref (eq? (car vref) str) (= (cadr vref) at)) - (caddr vref);; If it is return it. - (let ((fres ;; Else calculate it and cache it. - (#,parser str strlen at))) - (vector-set! cache (modulo at *cache-size*) - (list str at fres)) - fres)))))) diff --git a/mes/module/mes/peg/codegen.scm b/mes/module/mes/peg/codegen.scm deleted file mode 100644 index e1040def..00000000 --- a/mes/module/mes/peg/codegen.scm +++ /dev/null @@ -1,361 +0,0 @@ -;;; GNU Mes --- Maxwell Equations of Software -;;; Copyright (C) 2011 Free Software Foundation, Inc. -;;; -;;; This file is part of GNU Mes. -;;; -;;; GNU Mes is free software; you can redistribute it and/or modify it -;;; under the terms of the GNU General Public License as published by -;;; the Free Software Foundation; either version 3 of the License, or (at -;;; your option) any later version. -;;; -;;; GNU Mes is distributed in the hope that it will be useful, but -;;; WITHOUT ANY WARRANTY; without even the implied warranty of -;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;;; GNU General Public License for more details. -;;; -;;; You should have received a copy of the GNU General Public License -;;; along with GNU Mes. If not, see . - -;;; Taken from GNU Guile - -;;; codegen.scm --- code generation for composable parsers - -(define-module (ice-9 peg codegen) - #:export (compile-peg-pattern wrap-parser-for-users add-peg-compiler!) - #:use-module (ice-9 pretty-print) - #:use-module (system base pmatch)) - -(define-syntax single? - (syntax-rules () - ;;"Return #t if X is a list of one element." - ((_ x) - (pmatch x - ((_) #t) - (else #f))))) - -(define-syntax single-filter - (syntax-rules () - ;;"If EXP is a list of one element, return the element. Otherwise return EXP." - ((_ exp) - (pmatch exp - ((,elt) elt) - (,elts elts))))) - -(define-syntax push-not-null! - (syntax-rules () - ;;"If OBJ is non-null, push it onto LST, otherwise do nothing." - ((_ lst obj) - (if (not (null? obj)) - (push! lst obj))))) - -(define-syntax push! - (syntax-rules () - ;;"Push an object onto a list." - ((_ lst obj) - (set! lst (cons obj lst))))) - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;; CODE GENERATORS -;; These functions generate scheme code for parsing PEGs. -;; Conventions: -;; accum: (all name body none) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Code we generate will have a certain return structure depending on how we're -;; accumulating (the ACCUM variable). -(define (cg-generic-ret accum name body-uneval at) - ;; name, body-uneval and at are syntax - #`(let ((body #,body-uneval)) - #,(cond - ((and (eq? accum 'all) name) - #`(list #,at - (cond - ((not (list? body)) (list '#,name body)) - ((null? body) '#,name) - ((symbol? (car body)) (list '#,name body)) - (else (cons '#,name body))))) - ((eq? accum 'name) - #`(list #,at '#,name)) - ((eq? accum 'body) - #`(list #,at - (cond - ((single? body) (car body)) - (else body)))) - ((eq? accum 'none) - #`(list #,at '())) - (else - (begin - (pretty-print `(cg-generic-ret-error ,accum ,name ,body-uneval ,at)) - (pretty-print "Defaulting to accum of none.\n") - #`(list #,at '())))))) - -;; The short name makes the formatting below much easier to read. -(define cggr cg-generic-ret) - -;; Generates code that matches a particular string. -;; E.g.: (cg-string syntax "abc" 'body) -(define (cg-string pat accum) - (let ((plen (string-length pat))) - #`(lambda (str len pos) - (let ((end (+ pos #,plen))) - (and (<= end len) - (string= str #,pat pos end) - #,(case accum - ((all) #`(list end (list 'cg-string #,pat))) - ((name) #`(list end 'cg-string)) - ((body) #`(list end #,pat)) - ((none) #`(list end '())) - (else (error "bad accum" accum)))))))) - -;; Generates code for matching any character. -;; E.g.: (cg-peg-any syntax 'body) -(define (cg-peg-any accum) - #`(lambda (str len pos) - (and (< pos len) - #,(case accum - ((all) #`(list (1+ pos) - (list 'cg-peg-any (substring str pos (1+ pos))))) - ((name) #`(list (1+ pos) 'cg-peg-any)) - ((body) #`(list (1+ pos) (substring str pos (1+ pos)))) - ((none) #`(list (1+ pos) '())) - (else (error "bad accum" accum)))))) - -;; Generates code for matching a range of characters between start and end. -;; E.g.: (cg-range syntax #\a #\z 'body) -(define (cg-range pat accum) - (syntax-case pat () - ((start end) - (if (not (and (char? (syntax->datum #'start)) - (char? (syntax->datum #'end)))) - (error "range PEG should have characters after it; instead got" - #'start #'end)) - #`(lambda (str len pos) - (and (< pos len) - (let ((c (string-ref str pos))) - (and (char>=? c start) - (char<=? c end) - #,(case accum - ((all) #`(list (1+ pos) (list 'cg-range (string c)))) - ((name) #`(list (1+ pos) 'cg-range)) - ((body) #`(list (1+ pos) (string c))) - ((none) #`(list (1+ pos) '())) - (else (error "bad accum" accum)))))))))) - -;; Generate code to match a pattern and do nothing with the result -(define (cg-ignore pat accum) - (syntax-case pat () - ((inner) - (compile-peg-pattern #'inner 'none)))) - -(define (cg-capture pat accum) - (syntax-case pat () - ((inner) - (compile-peg-pattern #'inner 'body)))) - -;; Filters the accum argument to compile-peg-pattern for buildings like string -;; literals (since we don't want to tag them with their name if we're doing an -;; "all" accum). -(define (builtin-accum-filter accum) - (cond - ((eq? accum 'all) 'body) - ((eq? accum 'name) 'name) - ((eq? accum 'body) 'body) - ((eq? accum 'none) 'none))) -(define baf builtin-accum-filter) - -;; Top-level function builder for AND. Reduces to a call to CG-AND-INT. -(define (cg-and clauses accum) - #`(lambda (str len pos) - (let ((body '())) - #,(cg-and-int clauses (baf accum) #'str #'len #'pos #'body)))) - -;; Internal function builder for AND (calls itself). -(define (cg-and-int clauses accum str strlen at body) - (syntax-case clauses () - (() - (cggr accum 'cg-and #`(reverse #,body) at)) - ((first rest ...) - #`(let ((res (#,(compile-peg-pattern #'first accum) #,str #,strlen #,at))) - (and res - ;; update AT and BODY then recurse - (let ((newat (car res)) - (newbody (cadr res))) - (set! #,at newat) - (push-not-null! #,body (single-filter newbody)) - #,(cg-and-int #'(rest ...) accum str strlen at body))))))) - -;; Top-level function builder for OR. Reduces to a call to CG-OR-INT. -(define (cg-or clauses accum) - #`(lambda (str len pos) - #,(cg-or-int clauses (baf accum) #'str #'len #'pos))) - -;; Internal function builder for OR (calls itself). -(define (cg-or-int clauses accum str strlen at) - (syntax-case clauses () - (() - #f) - ((first rest ...) - #`(or (#,(compile-peg-pattern #'first accum) #,str #,strlen #,at) - #,(cg-or-int #'(rest ...) accum str strlen at))))) - -(define (cg-* args accum) - (syntax-case args () - ((pat) - #`(lambda (str strlen at) - (let ((body '())) - (let lp ((end at) (count 0)) - (let* ((match (#,(compile-peg-pattern #'pat (baf accum)) - str strlen end)) - (new-end (if match (car match) end)) - (count (if (> new-end end) (1+ count) count))) - (if (> new-end end) - (push-not-null! body (single-filter (cadr match)))) - (if (and (> new-end end) - #,#t) - (lp new-end count) - (let ((success #,#t)) - #,#`(and success - #,(cggr (baf accum) 'cg-body - #'(reverse body) #'new-end))))))))))) - -(define (cg-+ args accum) - (syntax-case args () - ((pat) - #`(lambda (str strlen at) - (let ((body '())) - (let lp ((end at) (count 0)) - (let* ((match (#,(compile-peg-pattern #'pat (baf accum)) - str strlen end)) - (new-end (if match (car match) end)) - (count (if (> new-end end) (1+ count) count))) - (if (> new-end end) - (push-not-null! body (single-filter (cadr match)))) - (if (and (> new-end end) - #,#t) - (lp new-end count) - (let ((success #,#'(>= count 1))) - #,#`(and success - #,(cggr (baf accum) 'cg-body - #'(reverse body) #'new-end))))))))))) - -(define (cg-? args accum) - (syntax-case args () - ((pat) - #`(lambda (str strlen at) - (let ((body '())) - (let lp ((end at) (count 0)) - (let* ((match (#,(compile-peg-pattern #'pat (baf accum)) - str strlen end)) - (new-end (if match (car match) end)) - (count (if (> new-end end) (1+ count) count))) - (if (> new-end end) - (push-not-null! body (single-filter (cadr match)))) - (if (and (> new-end end) - #,#'(< count 1)) - (lp new-end count) - (let ((success #,#t)) - #,#`(and success - #,(cggr (baf accum) 'cg-body - #'(reverse body) #'new-end))))))))))) - -(define (cg-followed-by args accum) - (syntax-case args () - ((pat) - #`(lambda (str strlen at) - (let ((body '())) - (let lp ((end at) (count 0)) - (let* ((match (#,(compile-peg-pattern #'pat (baf accum)) - str strlen end)) - (new-end (if match (car match) end)) - (count (if (> new-end end) (1+ count) count))) - (if (> new-end end) - (push-not-null! body (single-filter (cadr match)))) - (if (and (> new-end end) - #,#'(< count 1)) - (lp new-end count) - (let ((success #,#'(= count 1))) - #,#`(and success - #,(cggr (baf accum) 'cg-body #''() #'at))))))))))) - -(define (cg-not-followed-by args accum) - (syntax-case args () - ((pat) - #`(lambda (str strlen at) - (let ((body '())) - (let lp ((end at) (count 0)) - (let* ((match (#,(compile-peg-pattern #'pat (baf accum)) - str strlen end)) - (new-end (if match (car match) end)) - (count (if (> new-end end) (1+ count) count))) - (if (> new-end end) - (push-not-null! body (single-filter (cadr match)))) - (if (and (> new-end end) - #,#'(< count 1)) - (lp new-end count) - (let ((success #,#'(= count 1))) - #,#`(if success - #f - #,(cggr (baf accum) 'cg-body #''() #'at))))))))))) - -;; Association list of functions to handle different expressions as PEGs -(define peg-compiler-alist '()) - -(define (add-peg-compiler! symbol function) - (set! peg-compiler-alist - (assq-set! peg-compiler-alist symbol function))) - -(add-peg-compiler! 'range cg-range) -(add-peg-compiler! 'ignore cg-ignore) -(add-peg-compiler! 'capture cg-capture) -(add-peg-compiler! 'and cg-and) -(add-peg-compiler! 'or cg-or) -(add-peg-compiler! '* cg-*) -(add-peg-compiler! '+ cg-+) -(add-peg-compiler! '? cg-?) -(add-peg-compiler! 'followed-by cg-followed-by) -(add-peg-compiler! 'not-followed-by cg-not-followed-by) - -;; Takes an arbitrary expressions and accumulation variable, then parses it. -;; E.g.: (compile-peg-pattern syntax '(and "abc" (or "-" (range #\a #\z))) 'all) -(define (compile-peg-pattern pat accum) - (syntax-case pat (peg-any) - (peg-any - (cg-peg-any (baf accum))) - (sym (identifier? #'sym) ;; nonterminal - #'sym) - (str (string? (syntax->datum #'str)) ;; literal string - (cg-string (syntax->datum #'str) (baf accum))) - ((name . args) (let* ((nm (syntax->datum #'name)) - (entry (assq-ref peg-compiler-alist nm))) - (if entry - (entry #'args accum) - (error "Bad peg form" nm #'args - "Not one of" (map car peg-compiler-alist))))))) - -;; Packages the results of a parser -(define (wrap-parser-for-users for-syntax parser accumsym s-syn) - #`(lambda (str strlen at) - (let ((res (#,parser str strlen at))) - ;; Try to match the nonterminal. - (if res - ;; If we matched, do some post-processing to figure out - ;; what data to propagate upward. - (let ((at (car res)) - (body (cadr res))) - #,(cond - ((eq? accumsym 'name) - #`(list at '#,s-syn)) - ((eq? accumsym 'all) - #`(list (car res) - (cond - ((not (list? body)) - (list '#,s-syn body)) - ((null? body) '#,s-syn) - ((symbol? (car body)) - (list '#,s-syn body)) - (else (cons '#,s-syn body))))) - ((eq? accumsym 'none) #`(list (car res) '())) - (else #`(begin res)))) - ;; If we didn't match, just return false. - #f)))) diff --git a/mes/module/mes/peg/simplify-tree.scm b/mes/module/mes/peg/simplify-tree.scm deleted file mode 100644 index 6dc00a63..00000000 --- a/mes/module/mes/peg/simplify-tree.scm +++ /dev/null @@ -1,100 +0,0 @@ -;;; GNU Mes --- Maxwell Equations of Software -;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc. -;;; -;;; This file is part of GNU Mes. -;;; -;;; GNU Mes is free software; you can redistribute it and/or modify it -;;; under the terms of the GNU General Public License as published by -;;; the Free Software Foundation; either version 3 of the License, or (at -;;; your option) any later version. -;;; -;;; GNU Mes is distributed in the hope that it will be useful, but -;;; WITHOUT ANY WARRANTY; without even the implied warranty of -;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;;; GNU General Public License for more details. -;;; -;;; You should have received a copy of the GNU General Public License -;;; along with GNU Mes. If not, see . - -;;; Taken from GNU Guile - -;;; simplify-tree.scm --- utility functions for the PEG parser - -(define-module (ice-9 peg simplify-tree) - #:export (keyword-flatten context-flatten string-collapse) - #:use-module (system base pmatch)) - -(define-syntax single? - (syntax-rules () - ;;"Return #t if X is a list of one element." - ((_ x) - (pmatch x - ((_) #t) - (else #f))))) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;; POST-PROCESSING FUNCTIONS (TO CANONICALIZE MATCH TREES) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Is everything in LST true? -(define (andlst lst) - (or (null? lst) - (and (car lst) (andlst (cdr lst))))) - -;; Is LST a list of strings? -(define (string-list? lst) - (and (list? lst) (not (null? lst)) - (andlst (map string? lst)))) - -;; Groups all strings that are next to each other in LST. Used in -;; STRING-COLLAPSE. -(define (string-group lst) - (if (not (list? lst)) - lst - (if (null? lst) - '() - (let ((next (string-group (cdr lst)))) - (if (not (string? (car lst))) - (cons (car lst) next) - (if (and (not (null? next)) - (list? (car next)) - (string? (caar next))) - (cons (cons (car lst) (car next)) (cdr next)) - (cons (list (car lst)) next))))))) - - -;; Collapses all the string in LST. -;; ("a" "b" (c d) "e" "f") -> ("ab" (c d) "ef") -(define (string-collapse lst) - (if (list? lst) - (let ((res (map (lambda (x) (if (string-list? x) - (apply string-append x) - x)) - (string-group (map string-collapse lst))))) - (if (single? res) (car res) res)) - lst)) - -;; If LST is an atom, return (list LST), else return LST. -(define (mklst lst) - (if (not (list? lst)) (list lst) lst)) - -;; Takes a list and "flattens" it, using the predicate TST to know when to stop -;; instead of terminating on atoms (see tutorial). -(define (context-flatten tst lst) - (if (or (not (list? lst)) (null? lst)) - lst - (if (tst lst) - (list lst) - (apply append - (map (lambda (x) (mklst (context-flatten tst x))) - lst))))) - -;; Takes a list and "flattens" it, using the list of keywords KEYWORD-LST to -;; know when to stop at (see tutorial). -(define (keyword-flatten keyword-lst lst) - (context-flatten - (lambda (x) - (if (or (not (list? x)) (null? x)) - #t - (member (car x) keyword-lst))) - lst)) diff --git a/mes/module/mes/peg/string-peg.scm b/mes/module/mes/peg/string-peg.scm deleted file mode 100644 index 3a11fed8..00000000 --- a/mes/module/mes/peg/string-peg.scm +++ /dev/null @@ -1,275 +0,0 @@ -;;; GNU Mes --- Maxwell Equations of Software -;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc. -;;; -;;; This file is part of GNU Mes. -;;; -;;; GNU Mes is free software; you can redistribute it and/or modify it -;;; under the terms of the GNU General Public License as published by -;;; the Free Software Foundation; either version 3 of the License, or (at -;;; your option) any later version. -;;; -;;; GNU Mes is distributed in the hope that it will be useful, but -;;; WITHOUT ANY WARRANTY; without even the implied warranty of -;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;;; GNU General Public License for more details. -;;; -;;; You should have received a copy of the GNU General Public License -;;; along with GNU Mes. If not, see . - -;;; Taken from GNU Guile - -;;; string-peg.scm --- representing PEG grammars as strings - -(define-module (ice-9 peg string-peg) - #:export (peg-as-peg - define-peg-string-patterns - peg-grammar) - #:use-module (ice-9 peg using-parsers) - #:use-module (ice-9 peg codegen) - #:use-module (ice-9 peg simplify-tree)) - -;; Gets the left-hand depth of a list. -(define (depth lst) - (if (or (not (list? lst)) (null? lst)) - 0 - (+ 1 (depth (car lst))))) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;; Parse string PEGs using sexp PEGs. -;; See the variable PEG-AS-PEG for an easier-to-read syntax. -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Grammar for PEGs in PEG grammar. -(define peg-as-peg -"grammar <-- (nonterminal ('<--' / '<-' / '<') sp pattern)+ -pattern <-- alternative (SLASH sp alternative)* -alternative <-- ([!&]? sp suffix)+ -suffix <-- primary ([*+?] sp)* -primary <-- '(' sp pattern ')' sp / '.' sp / literal / charclass / nonterminal !'<' -literal <-- ['] (!['] .)* ['] sp -charclass <-- LB (!']' (CCrange / CCsingle))* RB sp -CCrange <-- . '-' . -CCsingle <-- . -nonterminal <-- [a-zA-Z0-9-]+ sp -sp < [ \t\n]* -SLASH < '/' -LB < '[' -RB < ']' -") - -(define-syntax define-sexp-parser - (lambda (x) - (syntax-case x () - ((_ sym accum pat) - (let* ((matchf (compile-peg-pattern #'pat (syntax->datum #'accum))) - (accumsym (syntax->datum #'accum)) - (syn (wrap-parser-for-users x matchf accumsym #'sym))) - #`(define sym #,syn)))))) - -(define-sexp-parser peg-grammar all - (+ (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern))) -(define-sexp-parser peg-pattern all - (and peg-alternative - (* (and (ignore "/") peg-sp peg-alternative)))) -(define-sexp-parser peg-alternative all - (+ (and (? (or "!" "&")) peg-sp peg-suffix))) -(define-sexp-parser peg-suffix all - (and peg-primary (* (and (or "*" "+" "?") peg-sp)))) -(define-sexp-parser peg-primary all - (or (and "(" peg-sp peg-pattern ")" peg-sp) - (and "." peg-sp) - peg-literal - peg-charclass - (and peg-nonterminal (not-followed-by "<")))) -(define-sexp-parser peg-literal all - (and "'" (* (and (not-followed-by "'") peg-any)) "'" peg-sp)) -(define-sexp-parser peg-charclass all - (and (ignore "[") - (* (and (not-followed-by "]") - (or charclass-range charclass-single))) - (ignore "]") - peg-sp)) -(define-sexp-parser charclass-range all (and peg-any "-" peg-any)) -(define-sexp-parser charclass-single all peg-any) -(define-sexp-parser peg-nonterminal all - (and (+ (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-")) peg-sp)) -(define-sexp-parser peg-sp none - (* (or " " "\t" "\n"))) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;; PARSE STRING PEGS -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Takes a string representing a PEG grammar and returns syntax that -;; will define all of the nonterminals in the grammar with equivalent -;; PEG s-expressions. -(define (peg-parser str for-syntax) - (let ((parsed (match-pattern peg-grammar str))) - (if (not parsed) - (begin - ;; (display "Invalid PEG grammar!\n") - #f) - (let ((lst (peg:tree parsed))) - (cond - ((or (not (list? lst)) (null? lst)) - lst) - ((eq? (car lst) 'peg-grammar) - #`(begin - #,@(map (lambda (x) (peg-nonterm->defn x for-syntax)) - (context-flatten (lambda (lst) (<= (depth lst) 2)) - (cdr lst)))))))))) - -;; Macro wrapper for PEG-PARSER. Parses PEG grammars expressed as strings and -;; defines all the appropriate nonterminals. -(define-syntax define-peg-string-patterns - (lambda (x) - (syntax-case x () - ((_ str) - (peg-parser (syntax->datum #'str) x))))) - -;; lst has format (nonterm grabber pattern), where -;; nonterm is a symbol (the name of the nonterminal), -;; grabber is a string (either "<", "<-" or "<--"), and -;; pattern is the parse of a PEG pattern expressed as as string. -(define (peg-nonterm->defn lst for-syntax) - (let* ((nonterm (car lst)) - (grabber (cadr lst)) - (pattern (caddr lst)) - (nonterm-name (datum->syntax for-syntax - (string->symbol (cadr nonterm))))) - #`(define-peg-pattern #,nonterm-name - #,(cond - ((string=? grabber "<--") (datum->syntax for-syntax 'all)) - ((string=? grabber "<-") (datum->syntax for-syntax 'body)) - (else (datum->syntax for-syntax 'none))) - #,(compressor (peg-pattern->defn pattern for-syntax) for-syntax)))) - -;; lst has format ('peg-pattern ...). -;; After the context-flatten, (cdr lst) has format -;; (('peg-alternative ...) ...), where the outer list is a collection -;; of elements from a '/' alternative. -(define (peg-pattern->defn lst for-syntax) - #`(or #,@(map (lambda (x) (peg-alternative->defn x for-syntax)) - (context-flatten (lambda (x) (eq? (car x) 'peg-alternative)) - (cdr lst))))) - -;; lst has format ('peg-alternative ...). -;; After the context-flatten, (cdr lst) has the format -;; (item ...), where each item has format either ("!" ...), ("&" ...), -;; or ('peg-suffix ...). -(define (peg-alternative->defn lst for-syntax) - #`(and #,@(map (lambda (x) (peg-body->defn x for-syntax)) - (context-flatten (lambda (x) (or (string? (car x)) - (eq? (car x) 'peg-suffix))) - (cdr lst))))) - -;; lst has the format either -;; ("!" ('peg-suffix ...)), ("&" ('peg-suffix ...)), or -;; ('peg-suffix ...). -(define (peg-body->defn lst for-syntax) - (cond - ((equal? (car lst) "&") - #`(followed-by #,(peg-suffix->defn (cadr lst) for-syntax))) - ((equal? (car lst) "!") - #`(not-followed-by #,(peg-suffix->defn (cadr lst) for-syntax))) - ((eq? (car lst) 'peg-suffix) - (peg-suffix->defn lst for-syntax)) - (else `(peg-parse-body-fail ,lst)))) - -;; lst has format ('peg-suffix (? (/ "*" "?" "+"))) -(define (peg-suffix->defn lst for-syntax) - (let ((inner-defn (peg-primary->defn (cadr lst) for-syntax))) - (cond - ((null? (cddr lst)) - inner-defn) - ((equal? (caddr lst) "*") - #`(* #,inner-defn)) - ((equal? (caddr lst) "?") - #`(? #,inner-defn)) - ((equal? (caddr lst) "+") - #`(+ #,inner-defn))))) - -;; Parse a primary. -(define (peg-primary->defn lst for-syntax) - (let ((el (cadr lst))) - (cond - ((list? el) - (cond - ((eq? (car el) 'peg-literal) - (peg-literal->defn el for-syntax)) - ((eq? (car el) 'peg-charclass) - (peg-charclass->defn el for-syntax)) - ((eq? (car el) 'peg-nonterminal) - (datum->syntax for-syntax (string->symbol (cadr el)))))) - ((string? el) - (cond - ((equal? el "(") - (peg-pattern->defn (caddr lst) for-syntax)) - ((equal? el ".") - (datum->syntax for-syntax 'peg-any)) - (else (datum->syntax for-syntax - `(peg-parse-any unknown-string ,lst))))) - (else (datum->syntax for-syntax - `(peg-parse-any unknown-el ,lst)))))) - -;; Trims characters off the front and end of STR. -;; (trim-1chars "'ab'") -> "ab" -(define (trim-1chars str) (substring str 1 (- (string-length str) 1))) - -;; Parses a literal. -(define (peg-literal->defn lst for-syntax) - (datum->syntax for-syntax (trim-1chars (cadr lst)))) - -;; Parses a charclass. -(define (peg-charclass->defn lst for-syntax) - #`(or - #,@(map - (lambda (cc) - (cond - ((eq? (car cc) 'charclass-range) - #`(range #,(datum->syntax - for-syntax - (string-ref (cadr cc) 0)) - #,(datum->syntax - for-syntax - (string-ref (cadr cc) 2)))) - ((eq? (car cc) 'charclass-single) - (datum->syntax for-syntax (cadr cc))))) - (context-flatten - (lambda (x) (or (eq? (car x) 'charclass-range) - (eq? (car x) 'charclass-single))) - (cdr lst))))) - -;; Compresses a list to save the optimizer work. -;; e.g. (or (and a)) -> a -(define (compressor-core lst) - (if (or (not (list? lst)) (null? lst)) - lst - (cond - ((and (or (eq? (car lst) 'or) (eq? (car lst) 'and)) - (null? (cddr lst))) - (compressor-core (cadr lst))) - ((and (eq? (car lst) 'body) - (eq? (cadr lst) 'lit) - (eq? (cadddr lst) 1)) - (compressor-core (caddr lst))) - (else (map compressor-core lst))))) - -(define (compressor syn for-syntax) - (datum->syntax for-syntax - (compressor-core (syntax->datum syn)))) - -;; Builds a lambda-expressions for the pattern STR using accum. -(define (peg-string-compile args accum) - (syntax-case args () - ((str-stx) (string? (syntax->datum #'str-stx)) - (let ((string (syntax->datum #'str-stx))) - (compile-peg-pattern - (compressor - (peg-pattern->defn - (peg:tree (match-pattern peg-pattern string)) #'str-stx) - #'str-stx) - (if (eq? accum 'all) 'body accum)))) - (else (error "Bad embedded PEG string" args)))) - -(add-peg-compiler! 'peg peg-string-compile) diff --git a/mes/module/mes/peg/using-parsers.scm b/mes/module/mes/peg/using-parsers.scm deleted file mode 100644 index 8a8f6af6..00000000 --- a/mes/module/mes/peg/using-parsers.scm +++ /dev/null @@ -1,118 +0,0 @@ -;;; GNU Mes --- Maxwell Equations of Software -;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc. -;;; -;;; This file is part of GNU Mes. -;;; -;;; GNU Mes is free software; you can redistribute it and/or modify it -;;; under the terms of the GNU General Public License as published by -;;; the Free Software Foundation; either version 3 of the License, or (at -;;; your option) any later version. -;;; -;;; GNU Mes is distributed in the hope that it will be useful, but -;;; WITHOUT ANY WARRANTY; without even the implied warranty of -;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;;; GNU General Public License for more details. -;;; -;;; You should have received a copy of the GNU General Public License -;;; along with GNU Mes. If not, see . - -;;; Taken from GNU Guile - -;;; using-parsers.scm --- utilities to make using parsers easier - -(define-module (ice-9 peg using-parsers) - #:use-module (ice-9 peg simplify-tree) - #:use-module (ice-9 peg codegen) - #:use-module (ice-9 peg cache) - #:export (match-pattern define-peg-pattern search-for-pattern - prec make-prec peg:start peg:end peg:string - peg:tree peg:substring peg-record?)) - -;;; -;;; Helper Macros -;;; - -(define-syntax until - (syntax-rules () - ;;"Evaluate TEST. If it is true, return its value. Otherwise,execute the STMTs and try again." - ((_ test stmt stmt* ...) - (let lp () - (or test - (begin stmt stmt* ... (lp))))))) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;; FOR DEFINING AND USING NONTERMINALS -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Parses STRING using NONTERM -(define (match-pattern nonterm string) - ;; We copy the string before using it because it might have been modified - ;; in-place since the last time it was parsed, which would invalidate the - ;; cache. Guile uses copy-on-write for strings, so this is fast. - (let ((res (nonterm (string-copy string) (string-length string) 0))) - (if (not res) - #f - (make-prec 0 (car res) string (string-collapse (cadr res)))))) - -;; Defines a new nonterminal symbol accumulating with ACCUM. -(define-syntax define-peg-pattern - (lambda (x) - (syntax-case x () - ((_ sym accum pat) - (let ((matchf (compile-peg-pattern #'pat (syntax->datum #'accum))) - (accumsym (syntax->datum #'accum))) - ;; CODE is the code to parse the string if the result isn't cached. - (let ((syn (wrap-parser-for-users x matchf accumsym #'sym))) - #`(define sym #,(cg-cached-parser syn)))))))) - -(define (peg-like->peg pat) - (syntax-case pat () - (str (string? (syntax->datum #'str)) #'(peg str)) - (else pat))) - -;; Searches through STRING for something that parses to PEG-MATCHER. Think -;; regexp search. -(define-syntax search-for-pattern - (lambda (x) - (syntax-case x () - ((_ pattern string-uncopied) - (let ((pmsym (syntax->datum #'pattern))) - (let ((matcher (compile-peg-pattern (peg-like->peg #'pattern) 'body))) - ;; We copy the string before using it because it might have been - ;; modified in-place since the last time it was parsed, which would - ;; invalidate the cache. Guile uses copy-on-write for strings, so - ;; this is fast. - #`(let ((string (string-copy string-uncopied)) - (strlen (string-length string-uncopied)) - (at 0)) - (let ((ret (until (or (>= at strlen) - (#,matcher string strlen at)) - (set! at (+ at 1))))) - (if (eq? ret #t) ;; (>= at strlen) succeeded - #f - (let ((end (car ret)) - (match (cadr ret))) - (make-prec - at end string - (string-collapse match)))))))))))) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;; PMATCH STRUCTURE MUNGING -;; Pretty self-explanatory. -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define prec - (make-record-type "peg" '(start end string tree))) -(define make-prec - (record-constructor prec '(start end string tree))) -(define (peg:start pm) - (if pm ((record-accessor prec 'start) pm) #f)) -(define (peg:end pm) - (if pm ((record-accessor prec 'end) pm) #f)) -(define (peg:string pm) - (if pm ((record-accessor prec 'string) pm) #f)) -(define (peg:tree pm) - (if pm ((record-accessor prec 'tree) pm) #f)) -(define (peg:substring pm) - (if pm (substring (peg:string pm) (peg:start pm) (peg:end pm)) #f)) -(define peg-record? (record-predicate prec)) diff --git a/tests/peg.test b/tests/peg.test deleted file mode 100755 index 830bea32..00000000 --- a/tests/peg.test +++ /dev/null @@ -1,71 +0,0 @@ -#! /bin/sh -# -*-scheme-*- -exec ${MES-bin/mes} --no-auto-compile -L ${0%/*} -L module -C module -e '(tests peg)' -s "$0" "$@" -!# - -;;; -*-scheme-*- - -;;; GNU Mes --- Maxwell Equations of Software -;;; Copyright © 2016 Jan (janneke) Nieuwenhuizen -;;; -;;; This file is part of GNU Mes. -;;; -;;; GNU Mes is free software; you can redistribute it and/or modify it -;;; under the terms of the GNU General Public License as published by -;;; the Free Software Foundation; either version 3 of the License, or (at -;;; your option) any later version. -;;; -;;; GNU Mes is distributed in the hope that it will be useful, but -;;; WITHOUT ANY WARRANTY; without even the implied warranty of -;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;;; GNU General Public License for more details. -;;; -;;; You should have received a copy of the GNU General Public License -;;; along with GNU Mes. If not, see . - -(define-module (tests peg) - #:use-module (mes mes-0) - #:use-module (mes test)) - -(cond-expand - (mes - (mes-use-module (mes peg)) - (mes-use-module (mes test))) - (guile-2.2 - (use-modules (ice-9 peg))) - (guile - (use-modules (ice-9 syncase)) - (display "guile 2.0: no PEG\n" (current-error-port)) - (exit 0))) - -(pass-if "first dummy" #t) -(pass-if-not "second dummy" #f) - -(define *etc-passwd* - "root:x:0:0:root:/root:/bin/bash -daemon:x:1:1:daemon:/usr/sbin:/bin/sh -bin:x:2:2:bin:/bin:/bin/sh -sys:x:3:3:sys:/dev:/bin/sh -nobody:x:65534:65534:nobody:/nonexistent:/bin/sh -messagebus:x:103:107::/var/run/dbus:/bin/false") - -(define-peg-string-patterns - "string-passwd <- entry* !. -entry <-- (! NL .)* NL* -NL < '\n'") - -(pass-if-equal "peg-tree" - (map (lambda (x) (list 'entry x)) (string-split *etc-passwd* #\newline)) - (peg:tree (match-pattern string-passwd *etc-passwd*))) - -(define-peg-pattern passwd body (and (* entry) (not-followed-by peg-any))) -(define-peg-pattern entry all (and (* (and (not-followed-by NL) peg-any)) - (* NL))) -(define-peg-pattern NL none "\n") -(define-peg-pattern passwd body (peg "entry* !.")) - -(pass-if-equal "peg-tree" - (map (lambda (x) (list 'entry x)) (string-split *etc-passwd* #\newline)) - (peg:tree (match-pattern passwd *etc-passwd*))) - -(result 'report)