;;; Gash -- Guile As SHell ;;; Copyright 2017, 2018 Timothy Sample ;;; ;;; This file is part of Gash. ;;; ;;; Gash is free software: you can redistribute it and/or modify ;;; it under the terms of the GNU General Public License as published by ;;; the Free Software Foundation, either version 3 of the License, or ;;; (at your option) any later version. ;;; ;;; Gash is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with Gash. If not, see . (define-module (test-lexer) #:use-module (gash lexer) #:use-module (ice-9 match) #:use-module (srfi srfi-64) #:use-module (system base lalr) #:use-module (tests unit automake)) ;;; Commentary: ;;; ;;; Tests for the lexer module. ;;; ;;; Code: (define (tokenize str) "Covert the string @var{str} into a list of tokens." (define (tokenize-port port) (let loop ((token (get-token port)) (acc '())) (match token ('*eoi* (reverse! acc)) ((? lexical-token?) (let* ((category (lexical-token-category token)) (source (lexical-token-source token)) (value (lexical-token-value token)) (offset (source-location-offset source)) (length (source-location-length source))) (loop (get-token port) (cons `(,category (,offset . ,length) ,value) acc))))))) (call-with-input-string str tokenize-port)) (test-begin "lexer") ;;; ;;; Basic words and operators. ;;; (test-equal "Lexes one word" '((NAME (0 . 3) "foo")) (tokenize "foo")) (test-equal "Ignores escaped newlines (line-joining)" '((NAME (0 . 8) "foobar")) (tokenize "foo\\\nbar")) (test-equal "Handles spaces around escaped newlines" '((NAME (0 . 3) "foo") (NAME (7 . 3) "bar")) (tokenize "foo \\\n bar")) (test-equal "Splits tokens on a space" '((NAME (0 . 3) "foo") (NAME (4 . 3) "bar")) (tokenize "foo bar")) (test-equal "Splits tokens on a newline" '((NAME (0 . 3) "foo") (NEWLINE (3 . 1) #\newline) (NAME (4 . 3) "bar")) (tokenize "foo\nbar")) (test-equal "Splits tokens on an operator" '((NAME (0 . 3) "foo") (PIPE (3 . 1) "|") (NAME (4 . 3) "bar")) (tokenize "foo|bar")) (test-equal "Recognizes a reserved word" '((While (0 . 5) "while")) (tokenize "while")) (test-equal "Recognizes a simple assignment" '((ASSIGNMENT-WORD (0 . 7) "foo=bar")) (tokenize "foo=bar")) (test-equal "Recognizes a complex assignment" '((ASSIGNMENT-WORD (0 . 11) ("foo=bar" ( "baz")))) (tokenize "foo=bar$baz")) ;;; ;;; Comments. ;;; (test-equal "Ignores comments followed by a newline" '((NAME (0 . 3) "foo") (NEWLINE (8 . 1) #\newline) (NAME (9 . 3) "bar")) (tokenize "foo #baz\nbar")) (test-equal "Ignores comments followed by end-of-input" '((NAME (0 . 3) "foo")) (tokenize "foo #baz")) ;;; ;;; IO numbers. ;;; (test-equal "Recogizes an output IO number" '((NAME (0 . 3) "foo") (IO-NUMBER (4 . 1) "2") (GREAT (5 . 1) ">") (WORD (6 . 9) "/dev/null")) (tokenize "foo 2>/dev/null")) (test-equal "Recogizes an input IO number" '((NAME (0 . 3) "foo") (IO-NUMBER (4 . 1) "0") (LESS (5 . 1) "<") (WORD (6 . 12) "/dev/urandom")) (tokenize "foo 0 "foo"))) (tokenize "$foo")) (test-equal "Treats dollar sign normally if not before a name" '((WORD (0 . 3) "$][")) (tokenize "$][")) (test-equal "Delimits unbraced, interspersed parameter names" '((WORD (0 . 12) ("foo-" ( "baz") "-bar"))) (tokenize "foo-$baz-bar")) (test-equal "Delimits braced, interspersed parameter names" '((WORD (0 . 12) ("foo" ( "baz") "bar"))) (tokenize "foo${baz}bar")) (test-equal "Recognizes the \"length\" parameter operator" '((WORD (0 . 7) ( "foo"))) (tokenize "${#foo}")) (for-each (match-lambda ((operator . symbol) (test-equal (string-append "Recognizes a parameter expansion " "with the \"" operator "\" operator") `((WORD (0 . ,(+ (string-length operator) 9)) (,symbol "foo" "bar"))) (tokenize (string-append "${foo" operator "bar}"))))) '(("-" . ) (":-" . ) ("=" . ) (":=" . ) ("?" . ) (":?" . ) ("+" . ) (":+" . ) ("%" . ) ("%%" . ) ("#" . ) ("##" . ))) (test-equal "Recognizes a parameter expansion operator without default" '((WORD (0 . 7) ( "foo" #f))) (tokenize "${foo-}")) (test-equal "Splits multidigit parameter name without braces" '((WORD (0 . 3) (( "1") "2"))) (tokenize "$12")) (test-equal "Preserves multidigit parameter name with braces" '((WORD (0 . 5) ( "12"))) (tokenize "${12}")) (for-each (lambda (special) (test-equal (string-append "Recognizes the \"" special "\" parameter") `((WORD (0 . 2) ( ,special))) (tokenize (string-append "$" special)))) '("@" "*" "#" "?" "-" "$" "!" "0")) (test-equal "Allows brace-nesting in parameter expansions" '((WORD (0 . 11) ( "foo" "b{}r"))) (tokenize "${foo-b{}r}")) (test-equal "Respects escapes in parameter expansions" '((WORD (0 . 11) ( "foo" ("b" ( "}") "r")))) (tokenize "${foo-b\\}r}")) (test-equal "Respects single quotations in parameter expressions" '((WORD (0 . 12) ( "foo" ("b" ( "}") "r")))) (tokenize "${foo-b'}'r}")) (test-equal "Respects double quotations in parameter expressions" '((WORD (0 . 12) ( "foo" ("b" ( "}") "r")))) (tokenize "${foo-b\"}\"r}")) (test-equal "Recognizes nested parameter expansions" '((WORD (0 . 13) ( "foo" ( "bar")))) (tokenize "${foo-${bar}}")) ;;; ;;; Single quotations. ;;; (test-equal "Lexes a single quotation" '((WORD (0 . 5) ( "foo"))) (tokenize "'foo'")) (test-equal "Lexes a single quotation in a word" '((WORD (0 . 5) ("f" ( "o") "o"))) (tokenize "f'o'o")) (test-equal "Ignores special characters in a single quotation" '((WORD (0 . 12) ( "foo\n#\\`$<\""))) (tokenize "'foo\n#\\`$<\"'")) (test-equal "Lexes an empty single quotation" '((WORD (0 . 2) ( ""))) (tokenize "''")) ;;; ;;; Double quotations. ;;; (test-equal "Recognizes a double quotation" '((WORD (0 . 5) ( "foo"))) (tokenize "\"foo\"")) (test-equal "Recognizes a double quotation in a word" '((WORD (0 . 5) ("f" ( "o") "o"))) (tokenize "f\"o\"o")) (test-equal "Ignores special characters in double quotations" '((WORD (0 . 9) ( "foo\n#<'"))) (tokenize "\"foo\n#<'\"")) (test-equal "Recognizes an empty double quotation" '((WORD (0 . 2) ( ""))) (tokenize "\"\"")) (test-equal "Respects escapes for special characters in double quotations" '((WORD (0 . 10) ( ("foo" ( "\"") "bar")))) (tokenize "\"foo\\\"bar\"")) (test-equal "Ignores escapes for normal characters in double quotations" '((WORD (0 . 9) ( "foo\\bar"))) (tokenize "\"foo\\bar\"")) (test-equal "Ignores escaped newlines (line-joining) in double quotations" '((WORD (0 . 10) ( "foobar"))) (tokenize "\"foo\\\nbar\"")) (test-equal "Recognizes expansions in double quotations" '((WORD (0 . 6) ( ( "foo")))) (tokenize "\"$foo\"")) ;;; ;;; Here-documents. ;;; (define (get-here-end* str) (let* ((token (call-with-input-string str get-here-end)) (category (lexical-token-category token)) (source (lexical-token-source token)) (value (lexical-token-value token)) (offset (source-location-offset source)) (length (source-location-length source))) `(,category (,offset . ,length) ,value))) (test-equal "Ignores expansions in here-end" '(WORD (0 . 2) "$x") (get-here-end* "$x")) (define* (get-here-doc* end str #:key (trim-tabs? #f) (quoted? #f)) (call-with-input-string str (lambda (port) (let* ((token (get-here-doc end port #:trim-tabs? trim-tabs? #:quoted? quoted?)) (category (lexical-token-category token)) (source (lexical-token-source token)) (value (lexical-token-value token)) (offset (source-location-offset source)) (length (source-location-length source))) `(,category (,offset . ,length) ,value))))) (test-equal "Lexes a here-document" '(HERE-DOC (0 . 8) ( "foo\n")) (get-here-doc* "eof" "foo\neof")) (test-equal "Lexes a here-document with an expansion" '(HERE-DOC (0 . 7) ( (( "x") "\n"))) (get-here-doc* "eof" "$x\neof")) (test-equal "Lexes a quoted here-document with an expansion" '(HERE-DOC (0 . 7) ( "$x\n")) (get-here-doc* "eof" "$x\neof" #:quoted? #t)) (test-equal "Lexes a multi-line quoted here-document" '(HERE-DOC (0 . 8) ( "a\nb\n")) (get-here-doc* "eof" "a\nb\neof" #:quoted? #t)) (test-equal "Lexes a here-document with tab trimming" '(HERE-DOC (0 . 15) ( "foo\nbar\n")) (get-here-doc* "eof" "\tfoo\n\tbar\n\teof" #:trim-tabs? #t)) (test-equal "Stops lexing a here-document at the end" '(HERE-DOC (0 . 8) ( "foo\n")) (get-here-doc* "eof" "foo\neof\nnbar")) (test-equal "Lexes a here-document containing here-end with prefix" '(HERE-DOC (0 . 13) ( "foo\n eof\n")) (get-here-doc* "eof" "foo\n eof\neof")) (test-equal "Lexes a here-document containing here-end with suffix" '(HERE-DOC (0 . 13) ( "foo\neof \n")) (get-here-doc* "eof" "foo\neof \neof")) (test-equal "Lexes a here-document with repeated here-end" '(HERE-DOC (0 . 15) ( "foo\neofeof\n")) (get-here-doc* "eof" "foo\neofeof\neof")) (test-equal "Lexes a here-document with here-end after an expansion" '(HERE-DOC (0 . 12) ( (( "x") "eof\n"))) (get-here-doc* "eof" "${x}eof\neof")) (test-equal "Lexes a here-document with here-end after an escape" '(HERE-DOC (0 . 9) ( "\\eof\n")) (get-here-doc* "eof" "\\eof\neof")) ;;; ;;; Bracketed commands. ;;; (test-equal "Recognizes a bracketed command substition" '((WORD (0 . 6) ( ( "foo")))) (parameterize ((read-bracketed-command (lambda (port) (string-for-each (lambda _ (read-char port)) "foo") '(( "foo"))))) (tokenize "$(foo)"))) ;;; ;;; Backquoted-commands. ;;; (test-equal "Recognizes a backquoted command substition" '((WORD (0 . 5) ( ( "foo")))) (parameterize ((read-backquoted-command (lambda (port) (string-for-each (lambda _ (read-char port)) "foo") '(( "foo"))))) (tokenize "`foo`"))) (test-end)