;;; -*- Mode: Irken -*-
(include "lib/core.scm")
(include "lib/pair.scm")
(include "lib/string.scm")
(include "lib/frb.scm")
(include "lib/symbol.scm")
(include "lib/io.scm")
(datatype token
;; <kind> <value>
(:t symbol string)
)
(define eof-token (token:t 'eof "eof"))
(define (lex producer consumer)
;; producer gives us characters
;; consumer takes tokens
(let ((action 'not-final)
(state 0))
(define (final? action)
(not (eq? action 'not-final)))
;; defines the <step> function (DFA) from the lexer generator
(include "parse/lexstep.scm")
(let loop ((ch (producer))
(last 'not-final)
(current (list:nil)))
(cond ((char=? ch #\eof) (consumer (token:t '<$> "<$>")) #t)
(else
(set! state (step ch state))
(set! action finals[state])
(cond ((and (not (final? last)) (final? action))
;; we've entered a new final state
(loop (producer) action (list:cons ch current)))
((and (final? last) (not (final? action)))
;; we've left a final state - longest match - emit token
(consumer (token:t last (list->string (reverse current))))
(set! state 0)
(loop ch 'not-final (list:nil)))
(else
;; accumulate this character
(loop (producer) action (list:cons ch current)))))))
))
(define (make-lex-generator file)
(define (producer)
(file:read-char file))
(make-generator
(lambda (consumer)
(lex producer consumer)
(let forever ()
(consumer eof-token)
(forever))
)))
;; parser tables
(datatype action
(:shift int)
(:reduce int int))
(datatype action-list
(:nil)
(:cons int (action) (action-list)))
(datatype goto-list
(:nil)
(:cons int int (goto-list)))
(include "parse/t1.scm")
;; stack = (:elem item state stack) | (:empty)
;; item = (:nt kind (list (item 'a))) | (:t symbol)
;; args = (:cons item args) | (:nil)
;; state = int
;; kind = symbol
(datatype item
(:nt symbol (list (item)))
(:t symbol string)
)
(datatype stack
(:empty)
(:elem (item) int (stack))
)
(define (parse path)
(let ((file (file:open-read path))
(token-gen (make-lex-generator file))
(paren-stack (list:nil))
(indentation 0)
(start-of-line #t)
(held-token eof-token)
(tok eof-token)
)
(define get-indent
;; XXX handle or disallow tabs
(token:t 'whitespace str) -> (string-length str)
;; non-whitespace at the front of a line
(token:t _ _) -> 0)
(define (next-token)
;; process (i.e., filter/synthesize) the token stream
(let loop ()
(cond ((not (eq? held-token eof-token))
(set! tok held-token)
(set! held-token eof-token))
(else
(set! tok (token-gen))))
(if start-of-line
;; in this state we might emit INDENT/DEDENT
(let ((this-indent (get-indent tok)))
(set! start-of-line #f)
(set! held-token tok)
(cond ((> this-indent indentation)
(set! indentation this-indent)
(token:t 'indent ""))
((< this-indent indentation)
(set! indentation this-indent)
(token:t 'dedent ""))
(else
(loop))))
;; in the middle of a line somewhere
(match tok with
(token:t 'newline _)
-> (match paren-stack with
() -> (begin (set! start-of-line #t) (token:t 'newline ""))
_ -> (loop))
(token:t 'whitespace _) -> (loop)
(token:t 'comment _ ) -> (loop)
(token:t _ _) -> tok
))
))
(let ((stack (stack:empty)))
(define (get-state)
(match stack with
(stack:empty) -> 0
(stack:elem _ state _) -> state
))
(define (lookup-action state kind)
(let loop ((l actions[state]))
(vcase action-list l
((:nil) (error "missing action?"))
((:cons tkind action tl)
(if (eq? terminals[tkind] kind)
action
(loop tl))))))
(define (lookup-goto state nt)
(let loop ((l goto[state]))
(vcase goto-list l
((:nil) (error "missing goto?"))
((:cons nt0 new-state tl)
(if (eq? nt0 nt)
new-state
(loop tl))))))
(define (pop-n n)
(let loop ((n n) (result (list:nil)))
(if (= n 0)
result
(loop (- n 1) (list:cons (pop) result)))))
(define (push item state)
(set! stack (stack:elem item state stack)))
(define (pop)
(match stack with
(stack:elem item _ rest) -> (begin (set! stack rest) item)
(stack:empty) -> (error "stack underflow")))
;; (let loop ((tok (next-token)))
;; (match tok with
;; (token:t 'eof _)
;; -> (begin (pop) (pop))
;; (token:t kind val)
;; -> (match (lookup-action (get-state) kind) with
;; (action:shift state)
;; -> (begin (push (item:t kind val) state) (loop (next-token)))
;; (action:reduce plen nt)
;; -> (let ((args (pop-n plen))
;; (next-state (lookup-goto (get-state) nt)))
;; (push (item:nt non-terminals[nt] args) next-state)
;; (loop tok)))))
(let loop ((tok (next-token)))
(cond ((eq? tok eof-token) (pop) (pop))
(else
(vcase token tok
((:t kind val)
(let ((a (lookup-action (get-state) kind)))
(vcase action a
((:shift state)
(push (item:t kind val) state)
(loop (next-token)))
((:reduce plen nt)
(let ((args (pop-n plen))
(next-state (lookup-goto (get-state) nt)))
(push (item:nt non-terminals[nt] args) next-state))
(loop tok)))))))))
)))
(define (print-parse-tree t)
(define (indent n)
(let loop ((n n))
(cond ((= n 0) #t)
(else
(print-string " ")
(loop (- n 1))))))
(let loop0 ((d 0)
(t t))
(indent d)
(vcase item t
((:t sym str)
(print sym) (print-string " ") (printn str))
((:nt sym items)
(printn sym)
(let loop1 ((l items))
(vcase list l
((:nil) #u)
((:cons item tail)
(loop0 (+ d 1) item)
(loop1 tail)))))))
)
;; print a parse tree out in a way that facilitates writing patterns for it.
(define ppt
(item:nt sym items) -> (begin (print-string "(item:nt ") (print sym) (print-string " ") (ppt-list items) (print-string ")"))
(item:t sym str) -> (begin (print-string "(item:t ") (print sym) (print-string " \"") (print-string str) (print-string "\")"))
)
(define (ppt-list l)
(print-string "(")
(ppt-list2 l))
(define ppt-list2
() -> (print-string ")")
(hd . tl) -> (begin (ppt hd) (print-string " ") (ppt-list2 tl))
)
;; AST
(datatype atom
(:name string)
(:number int)
(:string string)
)
(datatype expr
(:pred string (list (expr)))
(:atom (atom))
)
;; parse-tree->AST
(define p-atom
(item:t 'NAME x) -> (atom:name x)
(item:t 'NUMBER x) -> (atom:number (string->int x))
(item:t 'STRING x) -> (atom:string x)
_ -> (error "p-atom")
)
(define p-list
(item:nt _ ((item:t 'comma _) expr recur)) -> (list:cons (p-expr expr) (p-list recur))
(item:nt _ ()) -> (list:nil)
_ -> (error "p-list2")
)
(define p-args
(item:nt _ ((item:nt 'list (expr rest)))) -> (list:cons (p-expr expr) (p-list rest))
(item:nt _ ()) -> (list:nil)
_ -> (error "p-args")
)
(define p-expr2
(item:nt 'predicate ((item:t 'NAME name) _ args _)) -> (let ((r {t='pred}))
(set! r.args (p-args args))
(set! r.pos 33)
r
)
(item:nt 'atom (x)) -> (let ((r {t='atom}))
(set! r.val (p-atom x))
(set! r.pos 34)
r)
_ -> (error "p-expr2")
)
(define p-expr
(item:nt 'expr (x)) -> (p-expr2 x)
_ -> (error "p-expr")
)
;; an unparser
;; (define print-expr
;; (expr:pred name args) -> (begin (print-string name) (print-string "(") (print-args args) (print-string ")") #u)
;; (expr:atom atom) -> (print-atom atom))
;; (define print-args
;; (arg0) -> (print-expr arg0)
;; (arg0 . rest) -> (begin (print-expr arg0) (print-string ", ") (print-args rest))
;; () -> (begin (print-string "") #u)
;; )
;; (define print-atom
;; (atom:string s) -> (begin (print-string s) #u)
;; (atom:number n) -> (begin (print-string (int->string n)) #u)
;; (atom:name s) -> (begin (print-string s) #u)
;; )
(let ((t (if (> (sys:argc) 1) (parse sys:argv[1]) (parse "tests/parse_0.py"))))
(printn t)
(print-parse-tree t)
(ppt t)
(terpri)
(let ((parsed (p-expr t)))
(printn parsed)
; (print-expr parsed)
)
)