PEG shell grammar.

This commit is contained in:
Rutger van Beusekom 2016-05-06 23:53:03 +02:00
parent 7752b5f686
commit a431a84161
4 changed files with 392 additions and 0 deletions

7
README Normal file
View File

@ -0,0 +1,7 @@
ANGUISH that which one might experience when their shell lacks a programming language
or
ANother GUIle SHell
This project aims to produce at least a POSIX compliant sh
replacement. On top of that it also intends to make scheme available
for interactive and scripting application.

205
sh.bnf Normal file
View File

@ -0,0 +1,205 @@
/* -------------------------------------------------------
The grammar symbols
------------------------------------------------------- */
%token WORD
%token ASSIGNMENT_WORD
%token NAME
%token NEWLINE
%token IO_NUMBER
/* The following are the operators mentioned above. */
%token AND_IF OR_IF DSEMI
/* '&&' '||' ';;' */
%token DLESS DGREAT LESSAND GREATAND LESSGREAT DLESSDASH
/* '<<' '>>' '<&' '>&' '<>' '<<-' */
%token CLOBBER
/* '>|' */
/* The following are the reserved words. */
%token If Then Else Elif Fi Do Done
/* 'if' 'then' 'else' 'elif' 'fi' 'do' 'done' */
%token Case Esac While Until For
/* 'case' 'esac' 'while' 'until' 'for' */
/* These are reserved words, not operator tokens, and are
recognized when reserved words are recognized. */
%token Lbrace Rbrace Bang
/* '{' '}' '!' */
%token In
/* 'in' */
/* -------------------------------------------------------
The Grammar
------------------------------------------------------- */
%start complete_command
%%
complete_command : list separator
| list
;
list : list separator_op and_or
| and_or
;
and_or : pipeline
| and_or AND_IF linebreak pipeline
| and_or OR_IF linebreak pipeline
;
pipeline : pipe_sequence
| Bang pipe_sequence
;
pipe_sequence : command
| pipe_sequence '|' linebreak command
;
command : simple_command
| compound_command
| compound_command redirect_list
| function_definition
;
compound_command : brace_group
| subshell
| for_clause
| case_clause
| if_clause
| while_clause
| until_clause
;
subshell : '(' compound_list ')'
;
compound_list : term
| newline_list term
| term separator
| newline_list term separator
;
term : term separator and_or
| and_or
;
for_clause : For name linebreak do_group
| For name linebreak in sequential_sep do_group
| For name linebreak in wordlist sequential_sep do_group
;
name : NAME /* Apply rule 5 */
;
in : In /* Apply rule 6 */
;
wordlist : wordlist WORD
| WORD
;
case_clause : Case WORD linebreak in linebreak case_list Esac
| Case WORD linebreak in linebreak case_list_ns Esac
| Case WORD linebreak in linebreak Esac
;
case_list_ns : case_list case_item_ns
| case_item_ns
;
case_list : case_list case_item
| case_item
;
case_item_ns : pattern ')' linebreak
| pattern ')' compound_list linebreak
| '(' pattern ')' linebreak
| '(' pattern ')' compound_list linebreak
;
case_item : pattern ')' linebreak DSEMI linebreak
| pattern ')' compound_list DSEMI linebreak
| '(' pattern ')' linebreak DSEMI linebreak
| '(' pattern ')' compound_list DSEMI linebreak
;
pattern : WORD /* Apply rule 4 */
| pattern '|' WORD /* Do not apply rule 4 */
;
if_clause : If compound_list Then compound_list else_part Fi
| If compound_list Then compound_list Fi
;
else_part : Elif compound_list Then compound_list
| Elif compound_list Then compound_list else_part
| Else compound_list
;
while_clause : While compound_list do_group
;
until_clause : Until compound_list do_group
;
function_definition : fname '(' ')' linebreak function_body
;
function_body : compound_command /* Apply rule 9 */
| compound_command redirect_list /* Apply rule 9 */
;
fname : NAME /* Apply rule 8 */
;
brace_group : Lbrace compound_list Rbrace
;
do_group : Do compound_list Done /* Apply rule 6 */
;
simple_command : cmd_prefix cmd_word cmd_suffix
| cmd_prefix cmd_word
| cmd_prefix
| cmd_name cmd_suffix
| cmd_name
;
cmd_name : WORD /* Apply rule 7a */
;
cmd_word : WORD /* Apply rule 7b */
;
cmd_prefix : io_redirect
| cmd_prefix io_redirect
| ASSIGNMENT_WORD
| cmd_prefix ASSIGNMENT_WORD
;
cmd_suffix : io_redirect
| cmd_suffix io_redirect
| WORD
| cmd_suffix WORD
;
redirect_list : io_redirect
| redirect_list io_redirect
;
io_redirect : io_file
| IO_NUMBER io_file
| io_here
| IO_NUMBER io_here
;
io_file : '<' filename
| LESSAND filename
| '>' filename
| GREATAND filename
| DGREAT filename
| LESSGREAT filename
| CLOBBER filename
;
filename : WORD /* Apply rule 2 */
;
io_here : DLESS here_end
| DLESSDASH here_end
;
here_end : WORD /* Apply rule 3 */
;
newline_list : NEWLINE
| newline_list NEWLINE
;
linebreak : newline_list
| /* empty */
;
separator_op : '&'
| ';'
;
separator : separator_op linebreak
| newline_list
;
sequential_sep : ';' linebreak
| newline_list
;

106
sh.peg.scm Normal file
View File

@ -0,0 +1,106 @@
(use-modules (ice-9 peg))
(use-modules (ice-9 peg codegen))
(use-modules (ice-9 pretty-print))
(use-modules (ice-9 rdelim))
(use-modules (ice-9 match))
(define (remove-shell-comments s)
(string-join (map
(lambda (s)
(let* ((n (string-index s #\#)))
(if n (string-pad-right s (string-length s) #\space 0 n)
s)))
(string-split s #\newline)) "\n"))
(define (flatten lst)
(cond
((null? lst)
'())
((list? (car lst))
(append (flatten (car lst)) (flatten (cdr lst))))
(else
(cons (car lst) (flatten (cdr lst))))))
(define (sh-exec ast)
(define (sh-exec- ast)
(match ast
(('name o) o)
(('word o) o)
(('command o ...) (map sh-exec- o))
((head tail ...) (map sh-exec- (append (list head) tail)))
;;(('list o ...) (map sh-exec o))
((_ o) (sh-exec- o))
(_ #f)))
(let ((cmd (filter identity (flatten (sh-exec- ast)))))
cmd
(apply system* cmd)
))
;; insert / error at convenient location to short circuit backtracking
(define (parse input)
(define-peg-string-patterns
"script <-- (sp / linebreak)* (term (separator term)* separator?)?
term <-- pipeline (sp* (and / or) (sp / linebreak)* pipeline)*
and <-- '&&'
or <-- '||'
pipeline <-- '!'? sp* command (sp* pipe (sp / linebreak)* command)*
pipe <-- '|'
command <-- simple-command / (compound-command (sp+ io-redirect)*) / function-def
compound-command <-- brace-group / subshell / for-clause / case-clause / if-clause / while-clause / until-clause
subshell <-- '(' sp* compound-list sp* ')'
compound-list <-- (sp / linebreak)* term (separator term)* separator?
case-clause <-- 'case' sp+ word (sp / linebreak)* 'in' (sp / linebreak)* (case-item sp)* 'esac'
case-item <-- '('? sp* pattern sp* ')' (((sp / linebreak) ';;' (sp / linebreak)) / ((compound-list sp* ';;'?)? (sp / linebreak)))
pattern <-- word (sp* '|' sp* word)*
for-clause <-- 'for' sp+ identifier (sp / linebreak)+ ('in' (sp+ word)* sp* sequential-sep)? do-group
do-group <-- 'do' compound-list 'done'
if-clause <-- 'if' compound-list 'then' compound-list else-part? 'fi'
else-part <-- ('elif' compound-list 'then' compound-list else-part?) / ('else' compound-list)
while-clause <-- 'while' compound-list do-group
until-clause <-- 'until' compound-list do-group
function-def <-- name sp* '(' sp* ')' (sp / linebreak)* function-body
function-body <-- compound-command io-redirect*
brace-group <-- '{' sp* compound-list sp* '}'
simple-command <-- (io-redirect sp+)* !reserved word (sp+ (io-redirect / (!reserved word)))*
xsimple-command <-- !reserved ((cmd-prefix (sp+ cmd-suffix)?) / (word (sp+ cmd-suffix)?))
reserved < ('if' / 'then' / 'else' / 'elif' / 'fi' / 'for' / 'done' / 'do' / 'until' / 'while') (sp / linebreak)
cmd-prefix <-- (io-redirect (sp* io-redirect)*) / (word (sp+ word)*)
cmd-suffix <-- (io-redirect (sp* io-redirect)*) / (word (sp+ word)*)
io-redirect <-- [0-9]* sp* (io-here / io-file)
io-file <-- ('<&' / '>&' / '>>' / '>' / '<>'/ '<' / '>|') sp* ([0-9]+ / filename)
filename <-- word
io-here <-- ('<<' / '<<-') sp* word
name <-- identifier
identifier <-- [_a-zA-Z][_a-zA-Z0-9]*
word <-- test / substitution / assignment / literal
test <-- ltest (!' ]' .)* rtest
ltest < '[ '
rtest < ' ]'
substitution <-- ('$' '(' script ')') / ('`' word (sp+ word)* '`')
assignment <-- name assign word?
assign < '='
literal <- (subst / delim / (![0-9] (!sp !linebreak ![;&|$()=] .)+) / ([0-9]+ &separator)) literal*
subst <- '$' ('$' / '*' / '@' / [0-9] / identifier / ([{] (![}] .)+ [}]))
delim <- (['] (!['] .)* [']) / ([\"] (![\"] .)* [\"]) / ([`] (![`] .)* [`])
separator <-- (sp* break (sp / linebreak)*) / (sp / linebreak)+
break <-- '&' / ';'
sequential-sep <-- (semi (sp / linebreak)*) / (sp / linebreak)+
semi < ';'
linebreak < [\r\n]
sp < [\t ]")
(let ((match (match-pattern script input)))
(if (not (eq? (string-length input) (peg:end match)))
(let ((tree (peg:tree match)))
(pretty-print (peg:tree match))
(pretty-print "parse error" (current-error-port))
(pretty-print (peg:end match)))
(peg:tree match))))
;; (let* ((input (read-string (open-input-file (cadr (command-line)))))
;; (input (remove-shell-comments input))
;; (ast (parse input)))
;; (sh-exec ast))
(pretty-print (parse (remove-shell-comments (read-string (open-input-file (cadr (command-line)))))))

74
test.sh Normal file
View File

@ -0,0 +1,74 @@
for file in $(find * -type f)
do
if [ "${file}" != "generator.log"\
-a "${file}" != "gaiag.log"\
-a "${file}" != "${basename}.scm"\
-a "`basename ${file} .dzn`.dzn" != "${file}" ]
then
filecount=$((filecount+1))
#files[${filecount}]=${file}
fi
done
echo foo 2>&1
ls -l / -1
(echo; echo)
if true
then
echo
fi
for f in foo; do echo; done
for file in *.im
do
${bin}/asd -l gen2 ${file} 2>&1 codegenerator.log || error "codegenerator gen2 failure: ${file}" codegenerator.log
done
cat foo || echo ok && echo nok
foo=$*
foo=$@
foo=$(dirname $(dirname $@))
foo || bar && baz
${bin}/generate -p componentfile.dzn > pretty.dzn 2> pretty.err && cat pretty.dzn || cat componentfile.dzn
filecount=-1
if [ "${file}" != "generator.log"\
-a "${file}" != "gaiag.log"\
-a "${file}" != "${basename}.scm"\
-a "`basename ${file} .dzn`.dzn" != "${file}" ]
then
echo
filecount=$((filecount+1))
#files[${filecount}]=${file}
fi
for file in $(find * -type f)
do
echo
done
for file in $(find * -type f)
do
echo $file
done
if ls& ls; then echo foo& echo bar || echo foo; echo barf; fi
for f in foo bar; do echo; done
ls
model=$1
model=
if [ "${model}" = "" ]
then
echo
fi