(include "lib/core.scm") (include "lib/pair.scm") (include "lib/alist.scm") (include "lib/string.scm") (include "lib/frb.scm") (include "lib/symbol.scm") (include "lib/io.scm") (include "parse/lexstep.scm") (include "lib/lexer.scm") ;; parser tables (datatype action (:shift int) (:reduce int int)) (datatype action-list (:nil) (:cons int (action) (action-list))) (datatype goto-list (:nil) (:cons int int (goto-list))) (include "parse/t2.scm") (datatype item (:nt symbol (list (item))) (:t symbol string (range)) ) (datatype stack (:empty) (:elem (item) int (stack)) ) ;; this isn't very modular. yet. I'd like to get a generator-based parse going on here. ;; might even obviate the need for tracking position in the AST. [since lexer position ;; can propagate to the current parse error]. (define (parse path) (let ((file (file:open-read path)) (token-gen (make-lex-generator file)) (paren-stack (list:nil)) (indents (list:cons 0 (list:nil))) (start-of-line #t) (held-token eof-token) (tok eof-token) ) (define get-indent ;; XXX handle or disallow tabs (token:t 'whitespace str _) -> (string-length str) ;; non-whitespace at the front of a line (token:t _ _ _ _) -> 0) (define (get-top-indent) (match indents with () -> 0 (indent . _) -> indent)) (define (next-token0) ;; process (i.e., filter/synthesize) the token stream (let loop () (cond ((not (eq? held-token eof-token)) (set! tok held-token) (set! held-token eof-token)) (else (set! tok (token-gen)) ;;(print "token-gen: ") (printn tok) )) ;;(print "next-token loop ") (printn start-of-line) (if start-of-line ;; in this state we might emit INDENT/DEDENT (match tok with (token:t sym val range) -> (let ((this-indent (get-indent tok)) (top-indent (get-top-indent))) (set! start-of-line #f) (set! held-token tok) (cond ((> this-indent top-indent) (set! indents (list:cons this-indent indents)) (token:t 'INDENT "" range)) ((< this-indent top-indent) (set! indents (cdr indents)) ;; go around again, might be more DEDENT (set! start-of-line #t) (token:t 'DEDENT "" range)) (else (loop))))) ;; in the middle of a line somewhere (match tok with (token:t 'NEWLINE _ _) -> (match paren-stack with () -> (begin (set! start-of-line #t) tok) _ -> (loop)) (token:t 'whitespace _ _) -> (loop) (token:t 'comment _ _) -> (loop) (token:t _ _ _) -> tok )) )) (define (next-token) (let ((t (next-token0))) (print-string "next-token: ") (printn t) t)) (let ((stack (stack:empty))) (define (get-state) (match stack with (stack:empty) -> 0 (stack:elem _ state _) -> state )) (define (lookup-action state kind) (let loop ((l actions[state])) (match l with (action-list:nil) -> (error "missing action?") (action-list:cons tkind action tl) -> (if (eq? terminals[tkind] kind) action (loop tl))))) (define (lookup-goto state nt) (let loop ((l goto[state])) (match l with (goto-list:nil) -> (error "missing goto?") (goto-list:cons nt0 new-state tl) -> (if (eq? nt0 nt) new-state (loop tl))))) (define (pop-n n) (let loop ((n n) (result (list:nil))) (if (= n 0) result (loop (- n 1) (list:cons (pop) result))))) (define (push item state) (set! stack (stack:elem item state stack))) (define (pop) (match stack with (stack:elem item _ rest) -> (begin (set! stack rest) item) (stack:empty) -> (error "stack underflow"))) (let loop ((tok (next-token))) (cond ((eq? tok eof-token) (pop) (pop)) (else (print-string "token: ") (printn tok) ;;(print-string "state: ") (printn (get-state)) ;;(print "indentation: ") (printn indentation) (vcase token tok ((:t kind val range) (let ((a (lookup-action (get-state) kind))) (vcase action a ((:shift state) (push (item:t kind val range) state) (loop (next-token))) ((:reduce plen nt) (let ((args (pop-n plen)) (next-state (lookup-goto (get-state) nt))) (push (item:nt non-terminals[nt] args) next-state)) (loop tok))) ))) ))) ))) (define (indent n) (let loop ((n n)) (cond ((= n 0) #t) (else (print-string " ") (loop (- n 1)))))) (define (print-parse-tree t) (let loop0 ((d 0) (t t)) (indent d) (match t with (item:t sym str range) -> (begin (print range) (print-string " ") (print sym) (print-string " ") (printn str)) (item:nt sym items) -> (begin (printn sym) (let loop1 ((l items)) (match l with () -> #u (hd . tl) -> (begin (loop0 (+ d 1) hd) (loop1 tl))))) ))) ;; print a parse tree out in a way that facilitates writing patterns for it. (define ppt (item:nt sym items) -> (begin (print-string "(item:nt ") (print sym) (print-string " ") (ppt-list items) (print-string ")")) (item:t sym str range) -> (begin (print-string "(item:t ") (print sym) (print-string " \"") (print-string str) (print-string "\")")) ) (define (ppt-list l) (print-string "(") (ppt-list2 l)) (define ppt-list2 () -> (print-string ")") (hd . tl) -> (begin (ppt hd) (print-string " ") (ppt-list2 tl)) ) ;; (datatype formal ;; (:var string) ;; ;;(:var-with-default string (expr)) ;; ) ;; (datatype ifclause ;; (:case (expr) (expr)) ;; ) ;; (datatype expr ;; (:int int) ;; (:string string) ;; (:varref string) ;; (:binary string (expr) (expr)) ;; (:unary string (expr)) ;; (:funcall (expr) (list (expr))) ;; (:getitem (expr) (expr)) ;; (:getattr (expr) string) ;; (:lambda (list (formal)) (expr)) ;; (:sequence (list (expr))) ;; (:function string (list (formal)) (expr)) ;; (:if (list (ifclause)) (expr)) ;; (:while (expr) (expr) (expr)) ;; (:for (expr) (expr) (expr) (expr)) ;; (:break) ;; (:continue) ;; (:pass) ;; (:raise (expr)) ;; (:return (expr)) ;; (:unparsed symbol (list (expr))) ;; ) (define (perror where x) (print-string "decode error in ") (print-string where) (print-string ": ") (printn x) (error "decode error")) (define p-expr (let ((l (alist/new))) ;; store the parsing functions in an alist keyed by production rule. (define (A key val) (set! l (alist/add l key val))) (A 'atom p-atom) (lambda (x) (match x with (item:t _ _ _) -> (perror "p-expr" x) (item:nt kind val) -> (let ((probe (alist/lookup l kind))) (match probe with (maybe:no) -> (expr:unparsed kind (p-list val)) (maybe:yes fun) -> (fun val) )))))) (let ((t (if (> (sys:argc) 1) (parse sys:argv[1]) (parse "tests/parse_2.py")))) (printn t) (print-parse-tree t) (ppt t) (terpri) (let ((exp (p-expr t))) (ppt-expr 0 exp) (print-string "\n") exp ) )