Friday, June 19, 2009

The 01_ programming language

The 01_ programming language is an esoteric programming language in which the only data type is a list of bits. 01_ programs consist of function definitions. Functions are made of function calls, literal lists of bits, and list concatenation. There are no variables or side-effects. Pattern matching is used for conditionals. Evaluation is lazy.

Syntax and grammar

Whitespace separates tokens and is otherwise ignored. The characters 0, 1, ., _, and = are special tokens. The character sequence == introduces a comment, where the remaining characters in the line are ignored. Comments also separate tokens. Uninterrupted sequences of all non-whitespace characters other than the special tokens form symbol tokens.

Program = Definition*

Definition = Symbol Pattern* '=' Expression* '.'

Expression = Bit* '_' | Symbol | Symbol Expression*

Pattern = Bit* ( Symbol | '.' | '_' )

Bit = '0' | '1'
In other words, a program consists of a list of definitions. A definition defines a function for a given pattern to evaluate to a list of expressions, which are concatenated as the result of function's evaluation. An expression can be a literal list of bits, a symbol bound in the pattern, or a function call. Functions are right-associative.

A pattern starts with 0 or more literal bits which must be matched, followed by a symbol, which matches any remaining bits and binds the symbol to the remaining bits for the body of the function, or by ., which matches any remaining bits that will be ignored and is equivalent to binding a symbol that is not referenced in the body, or by _, which matches nil, the empty list, which also ends all finite lists.

As shorthand, if the final pattern before the = is at least one literal bit followed by ., the . may be omitted. Likewise, the final _ of an expression of at least one literal bit may be omitted if the expression is followed by a symbol or ., i.e. it is not followed by another literal list of bits.

Functions

A function consists of one or more definitions of a symbol with the same number of arguments. It is an error to add a definition of a symbol with a number of arguments other than the number it was first defined with.

When a function is evaluated, its arguments are matched against the patterns of its definitions in the order that the definitions were provided. The body of the first definition for which all the patterns are matched by their arguments is result. It is an error if none of the definitions match.

Interpreter implementation

Here is an implementation of a 01_ interpreter:

import java.io.FileInputStream;
import java.io.FileReader;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;

/**
* Interpreter for the 01_ language.
* Usage:
* java in01_ [-0 MAXBITS] [-1 INPUTFILE1] [-2 INPUTFILE2] ...
* SRCFILE ... FUNCTION
* Evaluates the specified FUNCTION after reading the specified
* SRCFILEs. The result is written to stdout. If MAXBITS is
* specified, the result is written as a string of 0s and 1s with a
* maximum length of MAXBITS. The first argument, if used, is stdin.
* The second argument, if used, is the file INPUTFILE1 if specified,
* and nil otherwise. The 3rd argument, if used, is the file
* INPUTFILE2, etc.
*/
public class in01_ {
public static void main(String[] args) throws Exception {
HashMap<Integer,String> inputfiles = new HashMap<Integer,String>();
int index = 0;
int maxbits = 0;
if (args.length >= 2 && "-0".equals(args[0])) {
index += 2;
maxbits = Integer.parseInt(args[1]);
}
while (index < args.length - 2) {
int inputfile;
try {
inputfile = Integer.parseInt(args[index]);
} catch (NumberFormatException e) {
break;
}
if (inputfile >= 0)
break;
inputfiles.put(-inputfile, args[index + 1]);
index += 2;
}
HashMap<String,Function> functions = new HashMap<String,Function>();
while (index < args.length - 1) {
readFile(functions, new FileReader(args[index]));
index++;
}
for (Function function : functions.values())
function.parseBodies(functions);
Function function = functions.get(args[args.length - 1]);
if (function == null)
throw new RuntimeException("Undefined symbol: " + args[args.length - 1]);
ArrayList<Value> funargs = new ArrayList<Value>();
if (function.getArity() > 0)
funargs.add(new StreamValue(System.in));
for (int i = 1; i < function.getArity(); i++) {
String inputfile = inputfiles.get(i);
if (inputfile == null)
funargs.add(Value.NIL);
else
funargs.add(new StreamValue(new FileInputStream(inputfile)));
}
if (maxbits > 0)
writeBits(maxbits, function.eval(funargs));
else
writeValue(System.out, function.eval(funargs));
System.out.flush();
}

private static void readFile(HashMap<String,Function> functions, Reader reader) {
DefReader defReader = new DefReader(reader);
for (Def def = defReader.nextDef(); def != null; def = defReader.nextDef()) {
Function function = functions.get(def.getName());
if (function == null)
functions.put(def.getName(), new Function(def));
else
function.addDef(def);
}
}

public static void writeBits(int maxbits, Value value) {
value = forceValue(value);
while (maxbits-- > 0 && value.head() != HeadValue.NIL) {
System.out.print(value.head() == HeadValue.ONE ? '1' : '0');
value = forceValue(value.tail());
}
}

public static void writeValue(OutputStream out, Value value) throws Exception {
int output = 0;
int bit = 7;
value = forceValue(value);
while (value.head() != HeadValue.NIL) {
if (value.head() == HeadValue.ONE)
output |= 1<<bit;
if (bit > 0) {
bit--;
} else {
out.write(output);
bit = 7;
output = 0;
}
value = forceValue(value.tail());
}
}

public static Value forceValue(Value value) {
while (value.head() == HeadValue.DELAYED)
value = value.force();
return value;
}

public static enum HeadValue {
ZERO, ONE, NIL, DELAYED;
}

public static abstract class Value {
public static final Value NIL = new Value() {
public HeadValue head() {
return HeadValue.NIL;
}

public Value tail() {
throw new RuntimeException();
}

public Value force() {
return this;
}
};

public abstract HeadValue head();
public abstract Value tail();
public abstract Value force();
public void makeForced() {
}
}

public static class StreamValue extends Value {
private InputStream in;
private HeadValue head;
private StreamValue tail;
private int bit;
private int inputByte;

private StreamValue(InputStream in, HeadValue head, int bit, int inputByte) {
assert head != HeadValue.NIL;
assert head != HeadValue.DELAYED || bit == 7;
this.in = in;
this.head = head;
this.tail = null;
this.bit = bit;
this.inputByte = inputByte;
}

public StreamValue(InputStream in) {
this(in, HeadValue.DELAYED, 7, 0);
}

public HeadValue head() {
return head;
}

public Value tail() {
assert head != HeadValue.DELAYED && head != HeadValue.NIL;
if (tail != null)
return tail;
if (bit == 0)
tail = new StreamValue(in);
else
tail = new StreamValue(in, bitValue(bit - 1), bit - 1, inputByte);
return tail;
}

private HeadValue bitValue(int bit) {
return (inputByte & (1 << bit)) != 0 ? HeadValue.ONE : HeadValue.ZERO;
}

public Value force() {
if (head != HeadValue.DELAYED)
return this;
try {
inputByte = in.read();
} catch (Exception e) {
throw new RuntimeException(e);
}
if (inputByte < 0)
head = HeadValue.NIL;
else
head = bitValue(bit);
return this;
}
}

public static class LiteralValue extends Value {
private HeadValue head;
private Value tail;

public LiteralValue(Token token, Value tail) {
assert token.getTokenType() == TokenType.ONE || token.getTokenType() == TokenType.ZERO;
head = token.getTokenType() == TokenType.ONE ? HeadValue.ONE : HeadValue.ZERO;
this.tail = tail;
}

public HeadValue head() {
return head;
}

public Value tail() {
return tail;
}

public Value force() {
return this;
}
}

public static class ConcatValue extends Value {
private Value headList;
private Value tailList;
private Value tail = null;

public ConcatValue(Value headList, Value tailList) {
assert headList.head() != HeadValue.NIL;
this.headList = headList;
this.tailList = tailList;
}

public HeadValue head() {
return headList.head() != HeadValue.NIL ? headList.head() : tailList.head();
}

public Value tail() {
assert head() != HeadValue.DELAYED;
if (tail == null) {
if (headList.head() == HeadValue.NIL) {
assert tailList.head() != HeadValue.DELAYED;
return tailList.tail();
} else if (headList.tail().head() == HeadValue.NIL) {
tail = tailList;
} else {
tail = new ConcatValue(headList.tail(), tailList);
}
}
return tail;
}

public Value force() {
headList = headList.force();
if (headList.head() == HeadValue.NIL)
return tailList;
if (!(headList instanceof ConcatValue))
return this;
ConcatValue head = (ConcatValue) headList;
return new ConcatValue(head.headList, new ConcatValue(head.tailList, tailList));
}
}

public static class FuncallValue extends Value {
private Function function;
private ArrayList<Value> args;
private Value forced = null;

public FuncallValue(Function function, ArrayList<Value> args) {
this.function = function;
this.args = args;
}

public HeadValue head() {
if (forced != null)
return forced.head();
return HeadValue.DELAYED;
}

public Value tail() {
if (forced != null)
return forced.tail();
assert false;
throw new RuntimeException();
}

public Value force() {
if (forced != null)
return forced;
return function.eval(args);
}

public void makeForced() {
if (forced == null)
forced = forceValue(this);
}
}

public static abstract class Expr {
public static final Expr NIL = new Expr() {
public Value eval(HashMap<String,Value> bindings) {
return Value.NIL;
}
};

public abstract Value eval(HashMap<String,Value> bindings);
}

public static class LiteralExpr extends Expr {
private Value value;

public LiteralExpr(ArrayList<Token> tokens, int start, int end) {
value = getValue(tokens, start, end);
}

public Value eval(HashMap<String,Value> bindings) {
return value;
}

private Value getValue(ArrayList<Token> tokens, int start, int end) {
if (start >= end)
return Value.NIL;
return new LiteralValue(tokens.get(start), getValue(tokens, start + 1, end));
}
}

public static class ConcatExpr extends Expr {
private Expr head;
private Expr tail;

public ConcatExpr(Expr head, Expr tail) {
this.head = head;
this.tail = tail;
}

public Value eval(HashMap<String,Value> bindings) {
Value headVal = head.eval(bindings);
if (headVal.head() == HeadValue.NIL)
return tail.eval(bindings);
return new ConcatValue(headVal, tail.eval(bindings));
}
}

public static class BoundExpr extends Expr {
private String name;

public BoundExpr(String name) {
this.name = name;
}

public Value eval(HashMap<String,Value> bindings) {
return bindings.get(name);
}
}

public static class FuncallExpr extends Expr {
private Function function;
private ArrayList<Expr> args;

public FuncallExpr(Function function, ArrayList<Expr> args) {
this.function = function;
this.args = args;
}

public Value eval(HashMap<String,Value> bindings) {
ArrayList<Value> argValues = new ArrayList<Value>(args.size());
for (Expr arg : args)
argValues.add(arg.eval(bindings));
return new FuncallValue(function, argValues);
}
}

public static class Function {
private ArrayList<Def> defs = new ArrayList<Def>();

public Function(Def def) {
defs.add(def);
}

public int getArity() {
return defs.get(0).getArity();
}

public void addDef(Def def) {
assert def.getName().equals(defs.get(0).getName());
if (def.getArity() != defs.get(0).getArity())
throw new RuntimeException(def.getName() + " defined as " + def.getArity() + "-ary, originally " + defs.get(0).getArity() + "-ary");
defs.add(def);
}

public void parseBodies(HashMap<String,Function> functions) {
for (Def def : defs)
def.parseBody(functions);
}

public Value eval(ArrayList<Value> args) {
for (Def def : defs) {
Value value = def.eval(args);
if (value != null)
return value;
}
throw new RuntimeException("No matching pattern for " + defs.get(0).getName());
}
}

public static class Def {
private String name;
private ArrayList<Pattern> patterns;
private ArrayList<Token> bodyTokens;
private Expr body = null;

public Def(String name, ArrayList<Pattern> patterns, ArrayList<Token> bodyTokens) {
this.name = name;
this.patterns = patterns;
this.bodyTokens = bodyTokens;
}

public String getName() {
return name;
}

public int getArity() {
return patterns.size();
}

public void parseBody(HashMap<String,Function> functions) {
HashSet<String> bindings = new HashSet<String>();
for (Pattern pattern : patterns)
if (pattern.getBinding().getTokenType() == TokenType.SYMBOL)
bindings.add(pattern.getBinding().getName());
body = new Parser(functions, bindings).getExpr();
if (body == null)
body = Expr.NIL;
}

private class Parser {
private int index = 0;
private HashMap<String,Function> functions;
private HashSet<String> bindings;

public Parser(HashMap<String,Function> functions, HashSet<String> bindings) {
this.functions = functions;
this.bindings = bindings;
}

public Expr getExpr() {
Expr expr = nextExpr();
if (index < bodyTokens.size())
return new ConcatExpr(expr, getExpr());
return expr;
}

public Expr nextExpr() {
if (index >= bodyTokens.size())
return null;
Token token = bodyTokens.get(index);
switch (token.getTokenType()) {
case ONE: case ZERO:
return literalExpr();
case NIL:
index++;
return Expr.NIL;
case SYMBOL:
index++;
if (bindings.contains(token.getName()))
return new BoundExpr(token.getName());
return funcallExpr(token);
default:
assert false;
throw new RuntimeException();
}
}

public LiteralExpr literalExpr() {
int start = index;
while (++index < bodyTokens.size()) {
switch (bodyTokens.get(index).getTokenType()) {
case ZERO: case ONE:
break;
case NIL:
return new LiteralExpr(bodyTokens, start, index++);
default:
return new LiteralExpr(bodyTokens, start, index);
}
}
return new LiteralExpr(bodyTokens, start, bodyTokens.size());
}

public FuncallExpr funcallExpr(Token token) {
Function function = functions.get(token.getName());
if (function == null)
throw new RuntimeException("Undefined symbol: " + token.getName() + " in body of " + name);
ArrayList<Expr> args = new ArrayList<Expr>(function.getArity());
for (int i = 0; i < function.getArity(); i++) {
Expr arg = nextExpr();
if (arg == null)
throw new RuntimeException("Not enough arguments for " + token.getName() + " in body of " + name + ": needs " + function.getArity());
args.add(arg);
}
return new FuncallExpr(function, args);
}
}

public Value eval(ArrayList<Value> args) {
HashMap<String,Value> bindings = new HashMap<String,Value>();
for (int i = 0; i < args.size(); i++) {
Pattern pattern = patterns.get(i);
Value match = pattern.tryMatch(args.get(i));
if (match == null)
return null;
if (pattern.getBinding().getTokenType() == TokenType.SYMBOL)
bindings.put(pattern.getBinding().getName(), match);
}
return body.eval(bindings);
}
}

public static class Pattern {
private Token binding;
private ArrayList<Boolean> bits;

public Pattern(Token binding, ArrayList<Boolean> bits) {
this.binding = binding;
this.bits = bits;
}

public Token getBinding() {
return binding;
}

public Value tryMatch(Value value) {
for (boolean bit : bits) {
value.makeForced();
value = forceValue(value);
switch (value.head()) {
case ZERO:
if (bit)
return null;
break;
case ONE:
if (!bit)
return null;
break;
case DELAYED:
assert false;
default:
return null;
}
value = value.tail();
}
if (binding.getTokenType() == TokenType.NIL) {
value.makeForced();
value = forceValue(value);
return value.head() == HeadValue.NIL ? value : null;
}
return value;
}
}

public static class DefReader {
private Tokenizer tokenizer;

public DefReader(Reader reader) {
this.tokenizer = new Tokenizer(reader);
}

public Def nextDef() {
Token name = tokenizer.nextToken();
if (name == null)
return null;
if (name.getTokenType() != TokenType.SYMBOL)
throw new RuntimeException("Expected symbol, got " + name.getName());
ArrayList<Pattern> patterns = getPatterns();
ArrayList<Token> body = getBody();
return new Def(name.getName(), patterns, body);
}

private ArrayList<Pattern> getPatterns() {
ArrayList<Pattern> patterns = new ArrayList<Pattern>();
ArrayList<Boolean> pattern = new ArrayList<Boolean>();
for (;;) {
Token token = tokenizer.nextToken();
if (token == null)
throw new RuntimeException("Expected =, got EOF");
switch (token.getTokenType()) {
case ZERO:
pattern.add(false);
break;
case ONE:
pattern.add(true);
break;
case SYMBOL:
for (Pattern p : patterns)
if (token.getName().equals(p.getBinding().getName()))
throw new RuntimeException("Multiple patterns bind " + token.getName());
/*FALLTHROUGH*/
case DOT:
case NIL:
patterns.add(new Pattern(token, pattern));
pattern = new ArrayList<Boolean>();
break;
case EQ:
if (pattern.size() > 0)
patterns.add(new Pattern(Token.DOT, pattern));
return patterns;
}
}
}

private ArrayList<Token> getBody() {
ArrayList<Token> body = new ArrayList<Token>();
for (;;) {
Token token = tokenizer.nextToken();
if (token == null)
throw new RuntimeException("Got unexpected EOF");
switch (token.getTokenType()) {
case EQ:
throw new RuntimeException("Got unexpected =");
case DOT:
return body;
default:
body.add(token);
break;
}
}
}
}

public static enum TokenType {
ZERO, ONE, DOT, NIL, EQ, SYMBOL;
}

public static class Token {
public static Token ZERO = new Token(TokenType.ZERO, "0");
public static Token ONE = new Token(TokenType.ONE, "1");
public static Token DOT = new Token(TokenType.DOT, ".");
public static Token NIL = new Token(TokenType.NIL, "_");
public static Token EQ = new Token(TokenType.EQ, "=");

private TokenType tokenType;
private String name;

private Token(TokenType tokenType, String name) {
this.tokenType = tokenType;
this.name = name;
}

public Token(String name) {
this(TokenType.SYMBOL, name);
}

public TokenType getTokenType() {
return tokenType;
}

public String getName() {
return name;
}
}

public static class Tokenizer {
private Reader reader;
private int nextChar = -1;

public Tokenizer(Reader reader) {
this.reader = reader;
}

private int getNextChar() {
if (nextChar < 0)
try {
return reader.read();
} catch (Exception e) {
throw new RuntimeException(e);
}
int result = nextChar;
nextChar = -1;
return result;
}

private void pushback(int pushedChar) {
assert nextChar < 0;
nextChar = pushedChar;
}

public Token nextToken() {
for (;;) {
int ch = getNextChar();
if (ch < 0)
return null;
switch (ch) {
case ' ': case '\t': case '\r': case '\n':
break;
case '0':
return Token.ZERO;
case '1':
return Token.ONE;
case '.':
return Token.DOT;
case '_':
return Token.NIL;
case '=':
ch = getNextChar();
if (ch != '=') {
pushback(ch);
return Token.EQ;
}
discardComment();
break;
default:
pushback(ch);
return readSymbol();
}
}
}

private void discardComment() {
for (;;) {
int ch = getNextChar();
if (ch < 0 || ch == '\n')
return;
}
}

private Token readSymbol() {
StringBuilder sb = new StringBuilder();
for (;;) {
int ch = getNextChar();
if (ch < 0)
return new Token(sb.toString());
switch (ch) {
case ' ': case '\t': case '\r': case '\n':
case '0': case '1': case '.': case '_': case '=':
pushback(ch);
return new Token(sb.toString());
default:
sb.append((char) ch);
break;
}
}
}
}
}

No comments:

Post a Comment