(include "lib/core.scm") (include "lib/pair.scm") (include "lib/string.scm") (include "lib/frb.scm") (include "lib/symbol.scm") (include "lib/io.scm") (datatype token ;; (:t symbol string) ) (define eof-token (token:t 'eof "eof")) (define (lex producer consumer) ;; producer gives us characters ;; consumer takes tokens (let ((action 'not-final) (state 0)) (define (final? action) (not (eq? action 'not-final))) ;; defines the function (DFA) from the lexer generator (include "parse/lexstep.scm") (let loop ((ch (producer)) (last 'not-final) (current (list:nil))) (cond ((char=? ch #\eof) (consumer eof-token) #t) (else (set! state (step ch state)) (set! action finals[state]) (cond ((and (not (final? last)) (final? action)) ;; we've entered a new final state (loop (producer) action (list:cons ch current))) ((and (final? last) (not (final? action))) ;; we've left a final state - longest match - emit token (consumer (token:t last (list->string (reverse current)))) (set! state 0) (loop ch 'not-final (list:nil))) (else ;; accumulate this character (loop (producer) action (list:cons ch current))))))) )) (define (make-lex-generator file) (define (producer) (file/read-char file)) (make-generator (lambda (consumer) (lex producer consumer) (let forever () (consumer eof-token) (forever)) ))) (let ((f (file/open-read "nodes.py"))) (define g (make-lex-generator f)) (let loop ((tok (g))) (cond ((eq? tok eof-token) "done") (else (printn tok) (loop (g))))))