# -*- Mode: Python; tab-width: 4 -*-
import string
import sys
is_a = isinstance
# XXX I notice that the <atom> class is used for all the atoms
# except for symbols (which are simply unadulterated strings).
# should fix this.
class atom:
def __init__ (self, kind, value):
self.kind = kind
self.value = value
def __repr__ (self):
return '<A %s %r>' % (self.kind, self.value)
class reader:
def __init__ (self, file):
self.file = file
self.char = None
self.line = 1
def peek (self):
if self.char is None:
self.char = self.file.read (1)
return self.char
def next (self):
result, self.char = self.char, self.file.read (1)
if result == '\n':
self.line += 1
return result
def skip_whitespace (self):
while 1:
ch = self.peek()
if not ch:
break
elif ch not in string.whitespace:
if ch == ';':
while self.next() not in '\r\n':
pass
else:
break
else:
self.next()
def read (self):
self.skip_whitespace()
ch = self.peek()
if ch == '':
raise EOFError, "Unexpected end of file"
elif ch == '(':
result = self.read_list()
elif ch == '"':
result = self.read_string()
elif ch == "'":
self.next()
result = ['quote', self.read()]
elif ch == ",":
self.next()
result = ['comma', self.read()]
# unquote, etc.. can be found in old lumberjack code if needed.
elif ch == '#':
self.next()
ch = self.peek()
if ch == '\\':
self.next()
probe = self.read_atom()
if probe == 'newline':
ch = '\n'
elif probe == 'space':
ch = ' '
else:
ch = probe[0]
result = atom ('char', ch)
elif ch in 'Xx':
self.next()
result = atom ('int', string.atoi (self.read_atom(), 16))
elif ch in 'Oo':
self.next()
result = atom ('int', string.atoi (self.read_atom(), 8))
elif ch in 'Bb':
self.next()
result = atom ('int', string.atoi (self.read_atom(), 2))
elif ch in 'Tt':
self.next()
result = atom ('bool', 'true')
elif ch in 'Ff':
self.next()
result = atom ('bool', 'false')
elif ch in 'Uu':
self.next()
result = atom ('undefined', 'undefined')
elif ch == '(':
result = atom ('vector', self.read_list())
else:
raise SyntaxError, 'Illegal #-escape character: "%s"' % ch
elif ch in '-0123456789':
a = self.read_atom()
if a == '-':
# bad, bad, bad
result = '-'
else:
all_digits = 1
for ch in a:
if ch not in '-0123456789':
all_digits = 0
break
if all_digits:
result = atom ('int', string.atoi (a))
else:
result = a
else:
result = self.read_atom()
# hack to support postfix array-reference syntax
self.skip_whitespace()
ch = self.peek()
if ch != '' and ch in '[{':
index = self.read_array_index()
if ch == '[':
return ['%%array-ref', result, index]
else:
return ['%%product-ref', result, index]
else:
return result
def read_atom (self):
# read at least one character
line = self.line
result = self.next()
while 1:
ch = self.peek()
if ch in string.whitespace or ch in '()[]{}':
return result
else:
result = result + self.next()
special = {'n':'\n','t':'\t'}
def read_string (self):
result = ''
line = self.line
# throw away the quote.
ch = self.next()
while 1:
ch = self.peek()
if ch == '"':
# throw away the close-quote
ch = self.next()
return atom ('string', result)
elif ch == '\\':
# ignore this backslash, read the next char
self.next()
ch = self.next()
if ch in 'xX':
# ascii escapes introduced only R6RS, *however*, theirs
# is terminated by a semicolon and can be more than two hex
# digits.
hex0 = self.next()
hex1 = self.next()
ch = chr (string.atoi (hex0 + hex1, 16))
result += ch
else:
result += self.special.get (ch, ch)
else:
result += self.next()
def read_list (self):
result = []
# throw away the paren
paren = self.next()
while 1:
self.skip_whitespace()
p = self.peek()
if p == ')':
# throw away the paren
ch = self.next()
return result
else:
exp = self.read()
if is_a (exp, list) and len(exp) and exp[0] == 'include':
self.read_include (exp, result)
else:
result.append (exp)
def read_array_index (self):
# throw away open bracket
self.next()
exp = self.read()
if self.read() not in ']}':
raise SyntaxError ("expected closing ']/}' character")
return exp
def read_all (self):
forms = []
try:
while 1:
form = self.read()
if is_a (form, list) and form[0] == 'include':
self.read_include (form, forms)
else:
forms.append (form)
except EOFError:
return forms
# XXX I'm not happy with this here, but if I put it in the transformer, it
# will require an extra pass *before* the transformer, because expand_body()
# will not recognize things hidden in an include (e.g., 'define' forms).
def read_include (self, exp, result):
filename = exp[1].value
for sub in reader (open (filename, 'rb')).read_all():
result.append (sub)
if __name__ == '__main__':
import pprint
import sys
if len (sys.argv) < 2:
file = sys.stdin
else:
file = open (sys.argv[1], 'r')
p = reader (file)
pprint.pprint (p.read_all())