;;; -*- Mode: Irken -*- (include "lib/core.scm") (include "lib/pair.scm") (include "lib/string.scm") (include "lib/frb.scm") (include "lib/symbol.scm") (include "lib/io.scm") (datatype token ;; (:t symbol string) ) (define eof-token (token:t 'eof "eof")) (define (lex producer consumer) ;; producer gives us characters ;; consumer takes tokens (let ((action 'not-final) (state 0)) (define (final? action) (not (eq? action 'not-final))) ;; defines the function (DFA) from the lexer generator (include "parse/lexstep.scm") (let loop ((ch (producer)) (last 'not-final) (current (list:nil))) (cond ((char=? ch #\eof) (consumer (token:t '<$> "<$>")) #t) (else (set! state (step ch state)) (set! action finals[state]) (cond ((and (not (final? last)) (final? action)) ;; we've entered a new final state (loop (producer) action (list:cons ch current))) ((and (final? last) (not (final? action))) ;; we've left a final state - longest match - emit token (consumer (token:t last (list->string (reverse current)))) (set! state 0) (loop ch 'not-final (list:nil))) (else ;; accumulate this character (loop (producer) action (list:cons ch current))))))) )) (define (make-lex-generator file) (define (producer) (file:read-char file)) (make-generator (lambda (consumer) (lex producer consumer) (let forever () (consumer eof-token) (forever)) ))) ;; parser tables (datatype action (:shift int) (:reduce int int)) (datatype action-list (:nil) (:cons int (action) (action-list))) (datatype goto-list (:nil) (:cons int int (goto-list))) (include "parse/t1.scm") ;; stack = (:elem item state stack) | (:empty) ;; item = (:nt kind (list (item 'a))) | (:t symbol) ;; args = (:cons item args) | (:nil) ;; state = int ;; kind = symbol (datatype item (:nt symbol (list (item))) (:t symbol string) ) (datatype stack (:empty) (:elem (item) int (stack)) ) (define (parse path) (let ((file (file:open-read path)) (token-gen (make-lex-generator file)) (paren-stack (list:nil)) (indentation 0) (start-of-line #t) (held-token eof-token) (tok eof-token) ) (define get-indent ;; XXX handle or disallow tabs (token:t 'whitespace str) -> (string-length str) ;; non-whitespace at the front of a line (token:t _ _) -> 0) (define (next-token) ;; process (i.e., filter/synthesize) the token stream (let loop () (cond ((not (eq? held-token eof-token)) (set! tok held-token) (set! held-token eof-token)) (else (set! tok (token-gen)))) (if start-of-line ;; in this state we might emit INDENT/DEDENT (let ((this-indent (get-indent tok))) (set! start-of-line #f) (set! held-token tok) (cond ((> this-indent indentation) (set! indentation this-indent) (token:t 'indent "")) ((< this-indent indentation) (set! indentation this-indent) (token:t 'dedent "")) (else (loop)))) ;; in the middle of a line somewhere (match tok with (token:t 'newline _) -> (match paren-stack with () -> (begin (set! start-of-line #t) (token:t 'newline "")) _ -> (loop)) (token:t 'whitespace _) -> (loop) (token:t 'comment _ ) -> (loop) (token:t _ _) -> tok )) )) (let ((stack (stack:empty))) (define (get-state) (match stack with (stack:empty) -> 0 (stack:elem _ state _) -> state )) (define (lookup-action state kind) (let loop ((l actions[state])) (vcase action-list l ((:nil) (error "missing action?")) ((:cons tkind action tl) (if (eq? terminals[tkind] kind) action (loop tl)))))) (define (lookup-goto state nt) (let loop ((l goto[state])) (vcase goto-list l ((:nil) (error "missing goto?")) ((:cons nt0 new-state tl) (if (eq? nt0 nt) new-state (loop tl)))))) (define (pop-n n) (let loop ((n n) (result (list:nil))) (if (= n 0) result (loop (- n 1) (list:cons (pop) result))))) (define (push item state) (set! stack (stack:elem item state stack))) (define (pop) (match stack with (stack:elem item _ rest) -> (begin (set! stack rest) item) (stack:empty) -> (error "stack underflow"))) ;; (let loop ((tok (next-token))) ;; (match tok with ;; (token:t 'eof _) ;; -> (begin (pop) (pop)) ;; (token:t kind val) ;; -> (match (lookup-action (get-state) kind) with ;; (action:shift state) ;; -> (begin (push (item:t kind val) state) (loop (next-token))) ;; (action:reduce plen nt) ;; -> (let ((args (pop-n plen)) ;; (next-state (lookup-goto (get-state) nt))) ;; (push (item:nt non-terminals[nt] args) next-state) ;; (loop tok))))) (let loop ((tok (next-token))) (cond ((eq? tok eof-token) (pop) (pop)) (else (vcase token tok ((:t kind val) (let ((a (lookup-action (get-state) kind))) (vcase action a ((:shift state) (push (item:t kind val) state) (loop (next-token))) ((:reduce plen nt) (let ((args (pop-n plen)) (next-state (lookup-goto (get-state) nt))) (push (item:nt non-terminals[nt] args) next-state)) (loop tok))))))))) ))) (define (print-parse-tree t) (define (indent n) (let loop ((n n)) (cond ((= n 0) #t) (else (print-string " ") (loop (- n 1)))))) (let loop0 ((d 0) (t t)) (indent d) (vcase item t ((:t sym str) (print sym) (print-string " ") (printn str)) ((:nt sym items) (printn sym) (let loop1 ((l items)) (vcase list l ((:nil) #u) ((:cons item tail) (loop0 (+ d 1) item) (loop1 tail))))))) ) ;; print a parse tree out in a way that facilitates writing patterns for it. (define ppt (item:nt sym items) -> (begin (print-string "(item:nt ") (print sym) (print-string " ") (ppt-list items) (print-string ")")) (item:t sym str) -> (begin (print-string "(item:t ") (print sym) (print-string " \"") (print-string str) (print-string "\")")) ) (define (ppt-list l) (print-string "(") (ppt-list2 l)) (define ppt-list2 () -> (print-string ")") (hd . tl) -> (begin (ppt hd) (print-string " ") (ppt-list2 tl)) ) ;; AST (datatype atom (:name string) (:number int) (:string string) ) (datatype expr (:pred string (list (expr))) (:atom (atom)) ) ;; parse-tree->AST (define p-atom (item:t 'NAME x) -> (atom:name x) (item:t 'NUMBER x) -> (atom:number (string->int x)) (item:t 'STRING x) -> (atom:string x) _ -> (error "p-atom") ) (define p-list (item:nt _ ((item:t 'comma _) expr recur)) -> (list:cons (p-expr expr) (p-list recur)) (item:nt _ ()) -> (list:nil) _ -> (error "p-list2") ) (define p-args (item:nt _ ((item:nt 'list (expr rest)))) -> (list:cons (p-expr expr) (p-list rest)) (item:nt _ ()) -> (list:nil) _ -> (error "p-args") ) (define p-expr2 (item:nt 'predicate ((item:t 'NAME name) _ args _)) -> (let ((r {t='pred})) (set! r.args (p-args args)) (set! r.pos 33) r ) (item:nt 'atom (x)) -> (let ((r {t='atom})) (set! r.val (p-atom x)) (set! r.pos 34) r) _ -> (error "p-expr2") ) (define p-expr (item:nt 'expr (x)) -> (p-expr2 x) _ -> (error "p-expr") ) ;; an unparser ;; (define print-expr ;; (expr:pred name args) -> (begin (print-string name) (print-string "(") (print-args args) (print-string ")") #u) ;; (expr:atom atom) -> (print-atom atom)) ;; (define print-args ;; (arg0) -> (print-expr arg0) ;; (arg0 . rest) -> (begin (print-expr arg0) (print-string ", ") (print-args rest)) ;; () -> (begin (print-string "") #u) ;; ) ;; (define print-atom ;; (atom:string s) -> (begin (print-string s) #u) ;; (atom:number n) -> (begin (print-string (int->string n)) #u) ;; (atom:name s) -> (begin (print-string s) #u) ;; ) (let ((t (if (> (sys:argc) 1) (parse sys:argv[1]) (parse "tests/parse_0.py")))) (printn t) (print-parse-tree t) (ppt t) (terpri) (let ((parsed (p-expr t))) (printn parsed) ; (print-expr parsed) ) )