;; -*- Mode: Irken -*-
(include "lib/core.scm")
(include "lib/pair.scm")
(include "lib/string.scm")
(include "lib/io.scm")
(include "lib/os.scm")
(include "lib/alist.scm")
(include "lib/aa_map.scm")
(include "lib/symbol.scm")
;; XXX consider rewriting with more experience
(datatype field
(:t symbol sexp)
)
;; an s-expression datatype.
(datatype sexp
(:list (list sexp))
(:symbol symbol)
(:string string)
(:char char)
(:bool bool)
(:int int)
(:undef)
(:vector (list sexp))
(:record (list field))
(:cons symbol symbol) ;; constructor ':' syntax
(:attr sexp symbol) ;; attribute '.' syntax
)
;; In retrospect, I think it may have been a mistake to embed 'list'
;; into sexp. It forces all sexp-handling code to cover two cases,
;; often triggering the need for an auxiliary function. Might be
;; cleaner to just have (sexp:nil) and (sexp:cons)...
;; idea: how about a set of macros, similar to the format macro,
;; to make sexps easier to build? worth it?
;; similar to the list macro. think of this as the 'list' function
;; for s-expressions.
(defmacro sexp
(sexp) -> (sexp:list '())
(sexp x ...) -> (sexp:list (LIST x ...))
)
(define (char-class char-list)
(let ((v (make-vector 256 #f)))
(define (in-class? ch)
v[(char->ascii ch)])
(let loop ((l char-list))
(match l with
() -> in-class?
(hd . tl) -> (begin (set! v[(char->ascii hd)] #t) (loop tl))
))))
(define hex-map
(literal
(alist/make
(#\0 0) (#\1 1) (#\2 2) (#\3 3) (#\4 4) (#\5 5) (#\6 6) (#\7 7) (#\8 8) (#\9 9)
(#\a 10) (#\b 11) (#\c 12) (#\d 13) (#\e 14) (#\f 15)
(#\A 10) (#\B 11) (#\C 12) (#\D 13) (#\E 14) (#\F 15)
)))
(define dec-map
(literal
(alist/make
(#\0 0) (#\1 1) (#\2 2) (#\3 3) (#\4 4) (#\5 5) (#\6 6) (#\7 7) (#\8 8) (#\9 9)
)))
(define oct-map
(literal
(alist/make
(#\0 0) (#\1 1) (#\2 2) (#\3 3) (#\4 4) (#\5 5) (#\6 6) (#\7 7)
)))
(define whitespace '(#\space #\tab #\newline #\return))
(define delimiters (string->list "()[]{}:"))
(define letters (string->list "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"))
(define all-delimiters (append whitespace delimiters))
(define digits (string->list "0123456789"))
(define whitespace? (char-class '(#\space #\tab #\newline #\return)))
(define delim? (char-class all-delimiters))
(define digit? (char-class digits))
(define letter? (char-class letters))
(define field? (char-class (cons #\- (append letters digits))))
(define (reader read-char)
(let ((char #\eof)) ;; one-character buffer
(define (peek)
(if (eq? char #\eof)
(set! char (read-char))
#u)
char)
(define (next)
(let ((result char))
;;(print result)
(set! char (read-char))
result))
(define (skip-peek)
(next)
(peek)
)
(define (skip-comment)
(let loop ((ch (next)))
(match ch with
#\return -> #u
#\newline -> #u
_ -> (loop (skip-peek)))))
(define (skip-whitespace)
(let loop ((ch (peek)))
(cond ((eq? ch #\eof) #u)
((eq? ch #\;) (skip-comment) (loop (peek)))
((whitespace? ch) (loop (skip-peek)))
(else #u))))
;; very tricky, using a state machine
(define (read-atom)
(let ((state 0)
(dot-count 0)
(ch #\0))
(let loop ((result '()))
(set! ch (peek))
(if (eq? ch #\.)
(set! dot-count (+ dot-count 1)))
(set! state
(match state with
0 -> (cond ((eq? ch #\eof) 4)
((eq? ch #\-) 1)
((digit? ch) 2)
((delim? ch) 7)
(else 3))
1 -> (cond ((eq? ch #\eof) 5)
((delim? ch) 5)
((digit? ch) 2)
(else 3))
2 -> (cond ((eq? ch #\eof) 6)
((delim? ch) 6)
((digit? ch) 2)
(else 3))
3 -> (cond ((eq? ch #\eof) 5)
((delim? ch) 5)
(else 3))
_ -> (impossible)))
(cond ((< state 4) (loop (list:cons (next) result))) ;; non-final
((= state 4) (error "unexpected end-of-file")) ;; error final
(else ;; all other finals: 5,6,7
;; single-character - for #\A
(if (= state 7) (set! result (list:cons (next) result)))
(:atom (list->string (reverse result)) ;; result string
(= state 6) ;; number?
(length result) ;; #chars
dot-count)))))) ;; #dots
(define (dotted-symbol s n)
;; handle dots in a symbol
;; as a special case, allow all-dots symbols (like '...) through unscathed
(if (= n (string-length s))
(sexp:symbol (string->symbol s))
(let loop ((parts (reverse (string-split s #\.))))
;; a.b.c => (get (get a b) c)
(match parts with
;; c b a
(base) -> (sexp:symbol (string->symbol base))
(attr . rest) -> (sexp:attr (loop rest) (string->symbol attr))
() -> (impossible)
))))
(define (read-symbol)
(match (read-atom) with
(:atom sym #t _ _) -> (error1 "expected symbol" sym)
(:atom sym #f _ 0) -> (string->symbol sym)
(:atom sym #f _ _) -> (error1 "no dots allowed in constructor names" sym)
))
(define (read1)
(skip-whitespace)
(let ((ch (peek)))
(match ch with
#\eof -> (error "unexpected end of file")
#\( -> (sexp:list (read-list))
#\{ -> (read-record)
#\" -> (read-string)
#\' -> (begin (next) (sexp (sexp:symbol 'quote) (read)))
#\, -> (begin (next) (sexp (sexp:symbol 'comma) (read)))
#\: -> (begin (next) (sexp:cons 'nil (read-symbol)))
#\# -> (begin
(next)
(set! ch (peek))
(match ch with
#\\ -> (begin
(next) ;; skip backslash
(match (read-atom) with
(:atom atom _ 1 _) -> (sexp:char (string-ref atom 0))
(:atom "newline" _ _ _) -> (sexp:char #\newline)
(:atom "space" _ _ _) -> (sexp:char #\space)
(:atom "return" _ _ _) -> (sexp:char #\return)
(:atom "tab" _ _ _) -> (sexp:char #\tab)
(:atom "eof" _ _ _) -> (sexp:char #\eof)
(:atom "nul" _ _ _) -> (sexp:char #\nul)
x -> (error1 "bad character constant" x)
))
;; Bb
#\X -> (begin (next) (sexp:int (read-hex-int)))
#\x -> (begin (next) (sexp:int (read-hex-int)))
#\O -> (begin (next) (sexp:int (read-oct-int)))
#\o -> (begin (next) (sexp:int (read-oct-int)))
#\T -> (begin (next) (sexp:bool #t))
#\t -> (begin (next) (sexp:bool #t))
#\F -> (begin (next) (sexp:bool #f))
#\f -> (begin (next) (sexp:bool #f))
#\U -> (begin (next) (sexp:undef))
#\u -> (begin (next) (sexp:undef))
#\( -> (sexp:vector (read-list))
x -> (error1 "syntax error" x)
))
#\) -> (error "unexpected close-paren")
_ -> (match (read-atom) with
(:atom chars #t n _) -> (sexp:int (read-int chars n))
(:atom chars #f _ 0) -> (sexp:symbol (string->symbol chars))
(:atom chars #f _ n) -> (dotted-symbol chars n)
)
)
)
)
(define (read)
(let ((result (read1)))
(skip-whitespace)
(let ((ch (peek)))
(match ch with
;; postfix array-reference syntax
#\[ -> (let ((index (read-array-index)))
;; primops take a parameter---------V
(sexp (sexp:symbol '%array-ref) (sexp:bool #f) result index))
;; infix colon syntax
#\: -> (begin
(next)
(match result (read) with
(sexp:symbol dt) (sexp:symbol alt) -> (sexp:cons dt alt)
;; not forcing (sexp:symbol) on <ob> might allow 'builtin method calls'...
;;ob (sexp:cons 'nil method) -> (sexp:attr (sexp:attr ob 'o) method)
ob (sexp:cons 'nil method) -> (sexp (sexp:symbol '%method) (sexp:symbol method) ob)
;; object : type syntax
ob type -> (sexp (sexp:symbol '%typed) ob type)))
;;x y -> (error1 "colon syntax" (:pair x y))))
;; infix 'get' syntax (i.e., attribute access)
;; XXX this is disabled because it breaks symbols like '...
;; so we'll probably need to do the same hack as the python version
;;#\. -> (begin (next) (sexp:attr result (read-symbol)))
_ -> result
))))
(define (read-array-index)
(next) ;; skip open-left-bracket
(let ((exp (read)))
(skip-whitespace)
(if (eq? (peek) #\])
(begin (next) exp)
(error "expected closing ]/} character"))))
(define (read-hex-digit ch)
(match (alist/lookup hex-map ch) with
(maybe:no) -> (error "bad hex digit")
(maybe:yes num) -> num))
(define (read-hex-code)
(let ((n0 (read-hex-digit (next)))
(n1 (read-hex-digit (next))))
(ascii->char (+ (<< n0 8) n1))))
(define (read-string)
(next) ;; throw away the opening quote
(let loop ((ch (peek))
(result '()))
(match ch with
#\" -> (begin
(next) ;; throw away the close-quote
(sexp:string (list->string (reverse result))))
#\\ -> (begin
;; ignore this backslash, read the next char
(next)
(set! ch (next))
(match ch with
#\x -> (loop (peek) (list:cons (read-hex-code) result))
#\X -> (loop (peek) (list:cons (read-hex-code) result))
#\r -> (loop (peek) (list:cons #\return result))
#\n -> (loop (peek) (list:cons #\newline result))
#\t -> (loop (peek) (list:cons #\tab result))
#\" -> (loop (peek) (list:cons #\" result))
#\\ -> (loop (peek) (list:cons #\\ result))
_ -> (error1 "bad backslash escape in string" result)
))
_ -> (loop (skip-peek) (list:cons ch result))
)))
(define (read-list)
;; throw away the open paren
(next)
(let loop ((result '()))
(skip-whitespace)
(let ((ch (peek)))
(if (eq? ch #\))
;; throw away the paren
(begin (next) (reverse result))
(let ((exp (read)))
;; XXX should I check for <include> here?
(loop (list:cons exp result)))
))))
(define (read-record)
;; { label=value label=value ...}
(next)
(let loop ((result '()))
(skip-whitespace)
(let ((p (peek)))
(if (eq? p #\})
(begin (next) (sexp:record (reverse result)))
(let ((name (read-name)))
(cond ((eq? name '...)
(loop (list:cons (field:t name (sexp:bool #f)) result)))
(else
(skip-whitespace)
(if (not (eq? (peek) #\=))
(error1 "expected '=' in record literal" result)
(begin
(next)
(let ((val (read)))
(loop (list:cons (field:t name val) result))))))))
))))
(define (read-name)
(let loop ((result '())
(ch (peek))
(dots #f))
(cond ((or (field? ch) (eq? ch #\.))
(loop (list:cons ch result) (skip-peek) #f))
(else
(string->symbol (list->string (reverse result)))))))
(define (read-int s n)
(let ((neg? (eq? (string-ref s 0) #\-))
(start (if neg? 1 0)))
(let loop ((i start)
(r 0))
(if (= i n)
(if neg? (- 0 r) r)
(match (alist/lookup dec-map (string-ref s i)) with
(maybe:no) -> (error1 "bad decimal digit?" s)
(maybe:yes digit) -> (loop (+ i 1) (+ (* r 10) digit)))
))))
(define (read-hex-int)
(let ((neg? (eq? (peek) #\-)))
(if neg? (begin (next) #u))
(let loop ((r 0) (ch (peek)))
(match (alist/lookup hex-map ch) with
(maybe:yes digit) -> (loop (+ (* r 16) digit) (skip-peek))
(maybe:no) -> (if neg? (- 0 r) r)))))
(define (read-oct-int)
(let ((neg? (eq? (peek) #\-)))
(if neg? (begin (next) #u))
(let loop ((r 0) (ch (peek)))
(match (alist/lookup oct-map ch) with
(maybe:yes digit) -> (loop (+ (* r 8) digit) (skip-peek))
(maybe:no) -> (if neg? (- 0 r) r)))))
(define (read-include path result)
;; cons the forms from this file onto result, in reverse order...
(append (reverse (read-file path)) result))
(define (read-all)
(let loop ((result '()))
(skip-whitespace)
(if (eq? (peek) #\eof)
(reverse result)
(let ((form (read)))
(match form with
(sexp:list ((sexp:symbol 'include) (sexp:string path))) -> (loop (read-include path result))
_ -> (loop (list:cons form result)))))))
(read-all)
))
(define (read-file path)
(print-string "reading file ") (printn path)
(let ((file (file/open-read path)))
(reader (lambda () (file/read-char file)))))
(define (read-string s)
(reader (string-reader s)))
(define sexp->symbol
(sexp:symbol s) -> s
x -> (error1 "sexp->symbol" x))
(define sexp->int
(sexp:int n) -> n
x -> (error1 "sexp->int" x))
;; utility functions
(define field=?
(field:t sa va) (field:t sb vb)
-> (and (eq? sa sb) (sexp=? va vb)))
;; XXX consider eq? shortcut
(define sexp=?
(sexp:undef) (sexp:undef) -> #t
(sexp:symbol a) (sexp:symbol b) -> (eq? a b)
(sexp:bool a) (sexp:bool b) -> (eq? a b)
(sexp:int a) (sexp:int b) -> (= a b)
(sexp:string a) (sexp:string b) -> (string=? a b)
(sexp:char a) (sexp:char b) -> (char=? a b)
(sexp:list l0) (sexp:list l1) -> (every2? sexp=? l0 l1)
(sexp:vector a) (sexp:vector b) -> (every2? sexp=? a b)
(sexp:record a) (sexp:record b) -> (every2? field=? a b)
(sexp:cons a0 a1) (sexp:cons b0 b1) -> (and (eq? a0 b0) (eq? a1 b1))
(sexp:attr a0 a1) (sexp:attr b0 b1) -> (and (sexp=? a0 b0) (eq? a1 b1))
_ _ -> #f
)
(define (sexp1 sym rest)
;; build an s-expression with <sym> at the front followed by <rest>
(sexp:list (list:cons (sexp:symbol sym) rest)))
(define repr-field
(field:t '... _) -> "..."
(field:t name val) -> (format (sym name) "=" (p repr val)))
(define repr
(sexp:list ((sexp:symbol 'quote) x)) -> (format "'" (repr x))
(sexp:list l) -> (format "(" (join repr " " l) ")")
(sexp:symbol s) -> (format (sym s))
(sexp:string s) -> (format "\"" s "\"") ;; XXX escape backslashes...
(sexp:char ch) -> (format "#\\" (char ch))
(sexp:bool #t) -> "#t"
(sexp:bool #f) -> "#f"
(sexp:int n) -> (format (int n))
(sexp:undef) -> "#u"
(sexp:vector v) -> (format "#(" (join repr " " v) ")")
(sexp:record fl) -> (format "{" (join repr-field " " fl) "}")
(sexp:cons dt c) -> (format (if (eq? dt 'nil) "" (symbol->string dt)) ":" (sym c))
(sexp:attr lhs a) -> (format (p repr lhs) "." (sym a))
)
(define indent
0 -> #t
n -> (begin (print-string " ") (indent (- n 1))))
(define pp-size-field
(field:t name val) -> (+ (+ (string-length (symbol->string name))
1) (pp-size val)))
(define pp-size
(sexp:list l) -> (foldr + (+ 1 (length l)) (map pp-size l))
(sexp:symbol s) -> (string-length (symbol->string s))
(sexp:string s) -> (+ 2 (string-length s)) ;; escaped backslashes!
(sexp:char ch) -> (string-length (repr (sexp:char ch)))
(sexp:bool _) -> 2
(sexp:int n) -> (string-length (int->string n))
(sexp:undef) -> 2
(sexp:vector v) -> (foldr + (+ 2 (length v)) (map pp-size v))
(sexp:record fl) -> (foldr + (+ (length fl) 1) (map pp-size-field fl))
(sexp:cons dt c) -> (+ 1 (+ (string-length (symbol->string dt)) (string-length (symbol->string c))))
(sexp:attr lhs a) -> (+ 1 (+ (pp-size lhs) (string-length (symbol->string a))))
)
(define (pp d exp)
(let ((size (pp-size exp)))
(if (< size 80)
(print-string (repr exp))
(match exp with
(sexp:list ()) -> (print-string "()")
(sexp:list (hd . tl)) -> (begin (print-string "(")
(pp d hd)
(for-each
(lambda (x)
(newline)
(indent (+ d 1))
(pp (+ d 1) x)) tl)
(print-string ")"))
;; XXX complete for vector & record.
_ -> (print-string (repr exp))))))
(define (test-file)
(let ((t (read-file
(if (> sys.argc 1)
sys.argv[1]
"lib/core.scm"))))
;; (printn t)
;;(for-each (lambda (x) (printn x) (pp 0 x) (newline)) t)
(printn t)
(for-each (lambda (x) (pp 0 x) (newline)) t)
#u
))
;(test-file)