Monday, April 12, 2010

I had never taken a serious look at the Python programming language, and I really only knew it for its usage of indentation for scoping. So I decided to try using it to implement an interpreter for my 01_ programming language, which I had implemented in Java, C, and Haskell earlier. It took a few days of my spare time.

Python is dynamically typed, and I don't like dynamic typing. The static typing of Haskell, and even Java, helps catch stupid little errors.

Python classes can't be used to create abstract data types, and only naming conventions and the double-underscore name-mangling hack are used to indicate (unenforcibly) private data.

I neither like nor dislike the syntactically significant indentation. It's just another syntax. However, if I had to work with large blocks of code, I'd prefer braces, because editors can match distant pairs of braces. But maybe people using Python would avoid writing large blocks of code. People using Java certainly do write large blocks of code, though.

Python has lots of nifty syntactical sugar enabling concise code, such as with statements, list comprehensions, and iterators. The generator coroutines for writing iterators is also nice for making readable code.

The Python library seems pretty comprehensive. I don't think it matches the Java library, but I haven't really looked at it.

In any case, I've demonstrated to myself that I can pick up a new programming language. Though, in the case of Python, I didn't learn any new programming concepts, certainly nothing mind-bending like call-with-current-continuation.

In line count, the 01_ interpreter in Python is about twice the size of the interpreter in Haskell, but about half the size of the interpreter in Java, and about a fourth the size of the interpreter in C.

import os
import sys

def tokens(chars):
lookahead = None
while True:
if lookahead == None:
char = chars.__next__()
else:
char = lookahead
lookahead = None
if char == '0' or char == '1' or char == '.' or char == '_':
yield char
elif char == '=':
try:
lookahead = chars.__next__()
except StopIteration:
yield '='
raise StopIteration
if lookahead != '=':
yield char
else:
lookahead = None
while chars.__next__() != '\n':
pass
elif char != ' ' and char != '\t' and char != '\r' and char != '\n':
symbol = char
while True:
try:
char = chars.__next__()
except StopIteration:
yield symbol
raise StopIteration
if (char == '0' or char == '1' or char == '.' or char == '='
or char == '_' or char == ' ' or char == '\t'
or char == '\r' or char == '\n'):
yield symbol
lookahead = char
break
else:
symbol = symbol + char

class LiteralExpr:
def __init__(self, bits):
self.bits = bits

class BoundExpr:
def __init__(self, index):
self.index = index

class ConcatExpr:
def __init__(self, head, tail):
self.head = head
self.tail = tail

class CallExpr:
def __init__(self, fn, args):
self.fn = fn
self.args = args

class ParseError(BaseException):
pass

class Pattern:
def __init__(self, match, binding, index):
self.match = match
self.binding = binding
self.index = index
def is_wild(self):
return self.binding == '.'
def is_literal(self):
return self.binding == '_'

class Defn:
def __init__(self, tokens):
self.name = tokens.__next__()
if self.name == '.' or self.name == '=' or self.name == '_' \
or self.name == '0' or self.name == '1':
raise ParseError
self.patterns = self._parse_patterns(tokens)
self.body = self._collect_body(tokens)

def _parse_patterns(self, tokens):
patterns = []
match = []
index = 0
while True:
token = tokens.__next__()
if token == '=':
if match != []:
patterns.append(Pattern(match, ".", None))
return patterns
elif token == '0':
match.append(False)
elif token == '1':
match.append(True)
elif token == '.' or token == '_':
patterns.append(Pattern(match, token, None))
match = []
else:
patterns.append(Pattern(match, token, index))
match = []
index = index + 1

def _collect_body(self, tokens):
body = []
while True:
token = tokens.__next__()
if token == '.':
return body
body.append(token)

def _arity(self, fns, name):
return len(fns[name][0].patterns) if name in fns else None

def _binding(self, patterns, name):
for pattern in patterns:
if name == pattern.binding:
return pattern.index
return None

def parse_body(self, fns):
if len(self.body) == 0:
self.expr = LiteralExpr([])
else:
self.expr = self._parse_exprs(self.body, 0, self.patterns, fns)
del self.body

def _parse_exprs(self, tokens, index, patterns, fns):
exp, index = self._parse_expr(tokens, index, patterns, fns)
if index >= len(tokens):
return exp
return ConcatExpr(exp, self._parse_exprs(tokens, index, patterns, fns))

def _parse_expr(self, tokens, index, patterns, fns):
token = tokens[index]
if token == '0' or token == '1' or token == '_':
return self._parse_literal(tokens, index)
binding = self._binding(patterns, token)
if binding != None:
return BoundExpr(binding), index + 1
arity = self._arity(fns, token)
if arity == None:
raise ParseError
args = []
index = index + 1
while len(args) < arity:
if index >= len(tokens):
raise ParseError
arg, index = self._parse_expr(tokens, index, patterns, fns)
args.append(arg)
return CallExpr(token, args), index

def _parse_literal(self, tokens, index):
bits = []
while True:
if index >= len(tokens):
return LiteralExpr(bits), index
elif tokens[index] == '0':
bits.append(False)
elif tokens[index] == '1':
bits.append(True)
elif tokens[index] == '_':
return LiteralExpr(bits), index + 1
else:
return LiteralExpr(bits), index
index = index + 1

def collect_defns(fns, tokens):
def defns_list():
while True:
yield Defn(tokens)
for defn in defns_list():
if defn.name in fns:
if len(fns[defn.name][0].patterns) != len(defn.patterns):
raise ParseError
fns[defn.name].append(defn)
else:
fns[defn.name] = [defn]

def parse_file(fns, file):
def chars():
for line in file:
for char in line:
yield char
collect_defns(fns, tokens(chars()))

def parse_bodies(fns):
for name, defns in fns.items():
for defn in defns:
defn.parse_body(fns)

class UndefinedFunctionError(BaseException):
pass

class NilVal:
def value():
return None

def next():
assert False

class LiteralVal:
def __init__(self, bits, index):
self._bits = bits
self._index = index
self._next = None

def value(self):
if self._index >= len(self._bits):
return None
else:
return self._bits[self._index]

def next(self):
if self._next == None:
assert self._index < len(self._bits)
self._next = LiteralVal(self._bits, self._index + 1)
return self._next

class FileVal:
def __init__(self, file, index = 7, byte = 0):
self._file = file
self._index = index
self._byte = byte
self._next = None

def value(self):
if self._byte == None:
assert self._index == 7
bytes = self._file.read(1)
if len(bytes) == 0:
self._byte = -1
self._file.close()
else:
self._byte = bytes[0]
if self._byte < 0:
return None
return (self._byte & (1 << self._index)) != 0

def next(self):
if self._next == None:
self.value()
assert self.value() != None
if self._index > 0:
self._next = FileVal(self._file, self._index - 1, self._byte)
else:
self._next = FileVal(self._file, 7, None)
return self._next

class ConcatVal:
def __init__(self, head, tail):
self._head = head
self._tail = tail
self._value = None
self._next = None

def value(self):
if self._head != None:
bit = self._head.value()
if bit != None:
self._value = bit
self._next = ConcatVal(self._head.next(), self._tail)
else:
self._value = self._tail.value()
if self._value != None:
self._next = self._tail.next()
self._head = None
self._tail = None
return self._value

def next(self):
if self._head != None:
self.value()
assert self._value != None
return self._next

class ExprVal:
def __init__(self, fns, expr, bindings):
self._fns = fns
self._expr = expr
self._bindings = bindings
self._value = None
self._next = None

def value(self):
if self._expr != None:
if self._expr.__class__ is LiteralExpr:
self._value, self._next = self._eval_literal()
elif self._expr.__class__ is BoundExpr:
self._value, self._next = self._eval_bound()
elif self._expr.__class__ is ConcatExpr:
self._value, self._next = self._eval_concat()
elif self._expr.__class__ is CallExpr:
self._value, self._next = self._eval_call()
else:
assert False
self._expr = None
return self._value

def next(self):
if self._expr != None:
self.value()
assert self._value != None
return self._next

def _eval_literal(self):
if self._expr.bits == []:
return None, None
else:
return self._expr.bits[0], LiteralVal(self._expr.bits, 1)

def _eval_bound(self):
val = self._bindings[self._expr.index]
bit = val.value()
return bit, val.next() if bit != None else None

def _eval_concat(self):
head = ExprVal(self._fns, self._expr.head, self._bindings)
tail = ExprVal(self._fns, self._expr.tail, self._bindings)
bit = head.value()
if bit != None:
return bit, ConcatVal(head.next(), tail)
else:
bit = tail.value()
return bit, tail.next() if bit != None else None

def _eval_call(self):
val = apply(self._fns, self._expr.fn,
[ExprVal(self._fns, arg, self._bindings)
for arg in self._expr.args])
bit = val.value()
return bit, val.next() if bit != None else None

def apply(fns, fn, args):
def match(pattern, val):
for bit in pattern.match:
b = val.value()
if b == None or b != bit:
return False
val = val.next()
if pattern.is_literal():
return val.value() == None
elif pattern.is_wild():
return True
else:
return val

for defn in fns[fn]:
bindings = []
for pattern, arg in zip(defn.patterns, args):
binding = match(pattern, arg)
if binding == False:
break
elif binding == True:
continue
else:
bindings.append(binding)
else:
return ExprVal(fns, defn.expr, bindings)
raise UndefinedFunctionError

def write_value(value, stream):
b = 0
bit = 128
while value.value() != None:
if value.value():
b = b | bit
if bit == 1:
stream.write(bytes([b]))
bit = 128
b = 0
else:
bit = bit >> 1
value = value.next()

def parse_sources(files):
fns = {}
for file in files:
with open(file) as f:
parse_file(fns, f)
parse_bodies(fns)
return fns

def interpreter():
source_files = []
fn = None
args = None
stdin = FileVal(sys.stdin.buffer)
for arg in sys.argv[1:]:
if fn == None:
if arg != '-':
source_files.append(arg)
else:
fn = os.path.basename(source_files[0])
if fn.find('.') > 0:
fn = fn[0:fn.find('.')]
elif args == None:
args = []
fn = arg
elif arg == '-':
args.append(stdin)
else:
args.append(FileVal(open(arg, "rb")))
if fn == None:
fn = os.path.basename(source_files[-1])
if fn.find('.') > 0:
fn = fn[0:fn.find('.')]
if args == None:
args = []
fns = parse_sources(source_files)
arity = len(fns[fn][0].patterns)
if len(args) < arity:
args.append(stdin)
while len(args) < arity:
args.append(NilVal())
write_value(apply(fns, fn, args[0:arity]), sys.stdout.buffer)

interpreter()

No comments:

Post a Comment