Monday, February 22, 2010

Here is an interpreter for 01_ that I wrote in 01_. It compiles the 01_ source code into a bit code, and then interprets that bit code.

One limitation is that the top-level function evaluated is the first function defined in the source, instead of the function specified on the command line or inferred from the filename. This could easily be coded to be, in a fashion, specified from the command line. This would be somewhat awkward, since the 01_ code does not get the actual command line data, but rather, the contents of the file named in the command line, so the name of the function to be evaluated would have to be put into a file or piped into stdin to be received by the interpreter.

Another limitation is that it can only pass in a maximum of 9 arguments to the top level function. (Any additional arguments will be given nil.) This limit can be easily changed by changing the arglist function (and the interp and bitcode functions) to take a different number of arguments.
Bit code format

Here is the bit code format used by my 01_ interpreter.

Parenthesis indicates a list. Ellipses indicates zero or more of the
preceding item. A vertical bar indicates alternatives.

program = (name ones names functions)
names = (name name ...)
functions = (definitions definitions ...)
definitions = (definition definition ...)
definition = (patterns body)
patterns = (pattern ...)
pattern = bound-pattern | literal-pattern | wild-pattern
bound-pattern = 0 bits
literal-pattern = 1 0 bits
wild-pattern = 1 1 bits
bits = bit ...
bit = 0 | 1
ones = 1 ...
body = op op ...
op = push | concat
push = 0 value
concat = 1 value
value = literal | binding | funcall
binding = 0 ones
literal = 1 0 bits
funcall = 1 1 (bits ones)

The program has the name of the function to apply, the number of arguments that function takes, a pair of lists forming an association list of names to functions.

A push op pushes its value onto the local stack to be used as an argument to a function call. A concat op concatenates its result to the value that is returned by the current function.

A binding value has an index to the bound parameters of the function. Let n = the number of bound parameters, then 0 is the last parameter, and n - 1 is the first parameter.

A funcall value has the name of the function and the number of arguments that function takes.

All the counts, the binding index and the number of arguments taken by a function, are in base 1.
Library functions


== library

== force a pattern match failure
error 0 = error _.

concat a b = a b.

== lists

== first item in the list
head 0. = _.
head 11rest = 1 head rest.
head 10rest = 0 head rest.
head _ = _.

== the list with the first item removed
tail 0rest = rest.
tail 11rest = tail rest.
tail 10rest = tail rest.
tail _ = _.

list-encode _ = 0.
list-encode 1x = 11 list-encode x.
list-encode 0x = 10 list-encode x.

== association lists

== association list lookup
alist-lookup key klist dlist =
if-equal key head klist
head dlist
alist-lookup key tail klist tail dlist.
alist-lookup . _ . = _.

== association list update
alist-update key data klist dlist =
if-equal key head klist
concat list-encode data
tail dlist
concat list-encode head dlist
alist-update key data
tail klist
tail dlist.

alist-append key data klist dlist =
if-equal key head klist
concat list-encode concat head dlist
list-encode data
tail dlist
concat list-encode head dlist
alist-append key data
tail klist
tail dlist.

== stacks

== new empty stack
empty-stack = 0_.

== number of items in the stack (in base 1)
stack-count 0stack = _.
stack-count 1stack = 1 stack-count stack.

drop-stack-count 0stack = stack.
drop-stack-count 1stack = drop-stack-count stack.

== top of the stack
top 0. = _. == stack underflow
top stack = top' decrement-count stack-count stack drop-stack-count stack.

top' count 0stack-data = _.
top' count 10stack-data = 0 top' count drop-stack-bit count stack-data.
top' count 11stack-data = 1 top' count drop-stack-bit count stack-data.

decrement-count 1count = count.

drop-stack-bit _ stack-data = stack-data.
drop-stack-bit 1count 0stack-data = drop-stack-bit count stack-data.
drop-stack-bit 1count 10stack-data = drop-stack-bit count stack-data.
drop-stack-bit 1count 11stack-data = drop-stack-bit count stack-data.

== returns stack with data pushed on top
push data stack =
1 stack-count stack 0 push' data stack-count stack drop-stack-count stack.

push' _ count stack-data =
0 take-stack-bit count stack-data
push' _ count drop-stack-bit count stack-data.
push' 0data count stack-data =
10 take-stack-bit count stack-data
push' data count drop-stack-bit count stack-data.
push' 1data count stack-data =
11 take-stack-bit count stack-data
push' data count drop-stack-bit count stack-data.

take-stack-bit _ . = _.
take-stack-bit 1count 0stack-data = 0 take-stack-bit count stack-data.
take-stack-bit 1count 10stack-data = 10 take-stack-bit count stack-data.
take-stack-bit 1count 11stack-data = 11 take-stack-bit count stack-data.

== returns stack with the top item popped
pop 0. = 0_. == stack underflow
pop 10. = 0_. == 1 item optimization
pop 1stack = stack-count stack 0 pop' stack-count stack drop-stack-count stack.

pop' count 0stack-data =
take-stack-bit count stack-data
pop' count drop-stack-bit count stack-data.
pop' count 10stack-data =
take-stack-bit count stack-data
pop' count drop-stack-bit count stack-data.
pop' count 11stack-data =
take-stack-bit count stack-data
pop' count drop-stack-bit count stack-data.

== conditional
if-equal 0x 0y true false = if-equal x y true false.
if-equal 1x 1y true false = if-equal x y true false.
if-equal _ _ true . = true.
if-equal . . . false = false.

==
take-bits _ . = _.
take-bits . _ = _.
take-bits 0bits 0data = 0 take-bits bits data.
take-bits 1bits 0data = 0 take-bits bits data.
take-bits 0bits 1data = 1 take-bits bits data.
take-bits 1bits 1data = 1 take-bits bits data.

drop-bits _ data = data.
drop-bits . _ = _.
drop-bits 0bits 0data = drop-bits bits data.
drop-bits 1bits 0data = drop-bits bits data.
drop-bits 0bits 1data = drop-bits bits data.
drop-bits 1bits 1data = drop-bits bits data.

Tokenizer


== 01_ tokenizer
== token tokenized
== 0 0
== 1 1
== _ 00
== . 01
== = 10

== input stream to list of tokens
tokenize in = tokenize' in _.

tokenize' _ . = _.

== comment
tokenize' 00111101 00111101in flag = end-symbol flag
drop-comment in.

== 0
tokenize' 00110000in flag = end-symbol flag
100 tokenize' in _.

== 1
tokenize' 00110001in flag = end-symbol flag
110 tokenize' in _.

== _
tokenize' 01011111in flag = end-symbol flag
10100 tokenize' in _.

== .
tokenize' 00101110in flag = end-symbol flag
10110 tokenize' in _.

== =
tokenize' 00111101in flag = end-symbol flag
11100 tokenize' in _.

== whitespace
tokenize' 00100000in flag = end-symbol flag tokenize' in _. == SPC
tokenize' 00001001in flag = end-symbol flag tokenize' in _. == TAB
tokenize' 00001010in flag = end-symbol flag tokenize' in _. == LF
tokenize' 00001101in flag = end-symbol flag tokenize' in _. == CR

== symbol
tokenize' in . = take-symbol 00000000 in.

end-symbol _ = _.
end-symbol . = 0.

take-symbol _ in = tokenize' in 0.
take-symbol 0bits 0in = 10 take-symbol bits in.
take-symbol 0bits 1in = 11 take-symbol bits in.

== comment
drop-comment _ = _.
drop-comment 00001010in = tokenize' in _.
drop-comment in = drop-comment drop-bits 00000000 in.


==
test-tokenize in = test-tokenize-out tokenize in _.

test-tokenize-out _ . = _.
test-tokenize-out list flag = test-tokenize-out-one head list tail list flag.

test-tokenize-out-one 0_ list . = 00110000 test-tokenize-out list _.
test-tokenize-out-one 1_ list . = 00110001 test-tokenize-out list _.
test-tokenize-out-one 00_ list . = 01011111 test-tokenize-out list _.
test-tokenize-out-one 01_ list . = 00101110 test-tokenize-out list _.
test-tokenize-out-one 10_ list . = 00111101 test-tokenize-out list _.

test-tokenize-out-one one list flag = test-tokenize-out-sep flag one test-tokenize-out list 0.

test-tokenize-out-sep _ = _.
test-tokenize-out-sep . = 00100000.

Parser


== parse a list of 01_ tokens into a list of function names

parse-names tokens = parse-names' head tokens tail tokens _.

parse-names' _ . list = list.
parse-names' 0_ . . = error _. == expecting symbol
parse-names' 1_ . . = error _. == expecting symbol
parse-names' 00_ . . = error _. == expecting symbol
parse-names' 01_ . . = error _. == expecting symbol
parse-names' 10_ . . = error _. == expecting symbol
parse-names' name tokens list =
parse-names' head parse-drop-def head tokens tail tokens
tail parse-drop-def head tokens tail tokens
add-unique name list.

add-unique item _ = list-encode item.
add-unique item list = list-encode head list
if-equal item head list
tail list
add-unique item
tail list.

parse-drop-def 10_ tokens = parse-drop-def-body head tokens tail tokens.
parse-drop-def . tokens = parse-drop-def head tokens tail tokens.

parse-drop-def-body 01_ tokens = tokens.
parse-drop-def-body . tokens = parse-drop-def-body head tokens tail tokens.

test-parse-names in = test-tokenize-out parse-names tokenize in _.

== given a list of items, return a list of empty lists of equal length
== this can be used to initialize association lists
== keys cannot be nil
alist-empty-values keys = alist-empty-values' head keys tail keys.

alist-empty-values' _ . = _.
alist-empty-values' . keys = 0 alist-empty-values' head keys tail keys.

== collect arities from list of 01_ tokens
parse-arities names tokens = parse-arities' names
alist-empty-values names
head tokens
tail tokens.

parse-arities' names arities _ . = arities.
parse-arities' names arities name tokens =
parse-arities-count-args names arities name _
head tokens
tail tokens
_.

parse-arities-count-args names arities name count _ tokens flag = error _.
== unexpected EOF

parse-arities-count-args names arities name count 10_ tokens _ =
== got = without any bits
parse-arities' names
alist-update name count names arities
head parse-drop-def-body head tokens tail tokens
tail parse-drop-def-body head tokens tail tokens.

parse-arities-count-args names arities name count 10_ tokens . =
== got = with leading bits
parse-arities' names
alist-update name
concat 1 count
names arities
head parse-drop-def-body head tokens tail tokens
tail parse-drop-def-body head tokens tail tokens.

parse-arities-count-args names arities name count 0_ tokens . =
parse-arities-count-args names arities name count
head tokens
tail tokens
0.

parse-arities-count-args names arities name count 1_ tokens . =
parse-arities-count-args names arities name count
head tokens
tail tokens
0.

parse-arities-count-args names arities name count . tokens . =
parse-arities-count-args names arities name
concat 1 count
head tokens
tail tokens
_.

test-parse-arities in = test-parse-arities' parse-names tokenize in
tokenize in.
test-parse-arities' names tokens =
test-alist-out names parse-arities names tokens.

test-alist-out _ . = _.
test-alist-out klist dlist =
head klist
00111101
test-bits-out head dlist
00001010
test-alist-out tail klist tail dlist.

test-bits-out _ = _.
test-bits-out 0b = 00110000 test-bits-out b.
test-bits-out 1b = 00110001 test-bits-out b.

== parse
parse tokens = parse' parse-names tokens tokens.
parse' names tokens = parse'' names parse-arities names tokens tokens.
parse'' names arities tokens =
list-encode head names
list-encode head arities
list-encode names
list-encode parse-functions names arities
alist-empty-values names
tokens.

parse-functions names arities functions _ = functions.

parse-functions names arities functions tokens =
parse-functions names arities
alist-append head tokens
parse-def-args names arities _ _ _
head tail tokens
tail tail tokens
names
functions
parse-drop-def head tokens tail tokens.

parse-def-args names arities bindings bits patterns 10_ tokens = == =
if-equal bits _
list-encode patterns
list-encode concat patterns
list-encode concat 10 bits == literal pattern
parse-def-body names arities bindings _ _ _ head tokens tail tokens.

parse-def-args names arities bindings bits patterns 0_ tokens = == 0
parse-def-args names arities bindings
concat bits 0
patterns
head tokens
tail tokens.

parse-def-args names arities bindings bits patterns 1_ tokens = == 1
parse-def-args names arities bindings
concat bits 1
patterns
head tokens
tail tokens.

parse-def-args names arities bindings bits patterns 00_ tokens = == _
== literal pattern
parse-def-args names arities bindings
_
concat patterns
list-encode concat 10 bits
head tokens
tail tokens.

parse-def-args names arities bindings bits patterns 01_ tokens = == .
== wild pattern
parse-def-args names arities bindings
_
concat patterns
list-encode concat 11 bits
head tokens
tail tokens.

parse-def-args names arities bindings bits patterns arg tokens =
== bound pattern
parse-def-args names arities
concat list-encode arg
bindings
_
concat patterns
list-encode concat 0 bits
head tokens
tail tokens.

parse-def-body names arities bindings argcounts funcalls bits _ . = error _.

parse-def-body names arities bindings argcounts funcalls bits 0_ tokens =
parse-def-body names arities bindings
argcounts funcalls
concat bits 0
head tokens
tail tokens.

parse-def-body names arities bindings argcounts funcalls bits 1_ tokens =
parse-def-body names arities bindings
argcounts funcalls
concat bits 1
head tokens
tail tokens.

parse-def-body names arities bindings argcounts funcalls _ 01_ . = _.
== . end of definition

parse-def-body names arities bindings argcounts funcalls _ 00_ tokens =
== _ nil constant
list-encode if-equal argcounts _
110_ == concat nil
010 == push nil
check-argcounts argcounts funcalls
head argcounts
parse-def-body names arities bindings
decrement-argcounts argcounts head argcounts
decrement-funcalls argcounts funcalls head argcounts
_
head tokens
tail tokens.

parse-def-body names arities bindings argcounts funcalls _ token tokens =
== symbol: either binding or funcall
parse-def-try-binding names arities bindings argcounts funcalls tokens
token bindings _
alist-lookup token names arities.

parse-def-body names arities bindings argcounts funcalls _ token tokens =
== symbol: either binding or funcall
parse-def-try-binding names arities bindings argcounts funcalls
tokens
token bindings _
alist-lookup token names arities.

parse-def-body names arities bindings argcounts funcalls bits 00_ tokens =
== _ terminated constant
list-encode if-equal argcounts _
concat 110 bits == concat nil
concat 010 bits == push nil
check-argcounts argcounts funcalls
head argcounts
parse-def-body names arities bindings
decrement-argcounts argcounts head argcounts
decrement-funcalls argcounts funcalls head argcounts
_
head tokens
tail tokens.

parse-def-body names arities bindings argcounts funcalls bits token tokens =
== end of constant
list-encode if-equal argcounts _
concat 110 bits == concat nil
concat 010 bits == push nil
check-argcounts argcounts funcalls
head argcounts
parse-def-body names arities bindings
decrement-argcounts argcounts head argcounts
decrement-funcalls argcounts funcalls head argcounts
_
token
tokens.

parse-def-try-binding names arities bindings argcounts funcalls tokens
name _ . _ =
== not in bindings, nullary funcall
list-encode if-equal _ argcounts
concat 111 concat list-encode name 0
concat 011 concat list-encode name 0
check-argcounts argcounts funcalls
head argcounts
parse-def-body names arities bindings
decrement-argcounts argcounts head argcounts
decrement-funcalls argcounts funcalls head argcounts
_
head tokens
tail tokens.

parse-def-try-binding names arities bindings argcounts funcalls tokens
name _ . arity =
== not in bindings, push funcall
parse-def-body names arities bindings
concat list-encode arity
argcounts
concat list-encode concat list-encode name
list-encode arity
funcalls
_
head tokens
tail tokens.

parse-def-try-binding names arities bindings argcounts funcalls tokens
name bind-list bind-index arity =
if-equal name head bind-list
parse-def-emit-bound names arities bindings
argcounts funcalls tokens
bind-index
parse-def-try-binding names arities bindings
argcounts funcalls tokens
name
tail bind-list
concat 1 bind-index
arity.

parse-def-emit-bound names arities bindings argcounts funcalls tokens
bind-index =
list-encode if-equal _ argcounts
concat 10 bind-index == concat
concat 00 bind-index == push
check-argcounts argcounts funcalls
head argcounts
parse-def-body names arities bindings
decrement-argcounts argcounts head argcounts
decrement-funcalls argcounts funcalls head argcounts
_
head tokens
tail tokens.

end-constant argcounts _ = _.
end-constant argcounts bits =
list-encode if-equal argcounts _
concat 110 bits == concat constant
concat 010 bits. == push constant

check-argcounts _ . . = _.
check-argcounts argcounts funcalls _ =
list-encode concat if-equal _ tail argcounts
111_ == concat funcall
011 == push funcall
head funcalls
check-argcounts tail argcounts tail funcalls
head tail argcounts.
check-argcounts argcounts funcalls 1_ =
list-encode concat if-equal _ tail argcounts
111_ == concat funcall
011 == push funcall
head funcalls
check-argcounts tail argcounts tail funcalls
head tail argcounts.
check-argcounts . . . = _.

decrement-argcounts _ . = _.
decrement-argcounts argcounts _ =
decrement-argcounts tail argcounts head tail argcounts.
decrement-argcounts argcounts 1_ =
decrement-argcounts tail argcounts head tail argcounts.
decrement-argcounts argcounts 1argcount =
list-encode argcount
tail argcounts.

decrement-funcalls _ . . = _.
decrement-funcalls argcounts funcalls _ =
decrement-funcalls tail argcounts tail funcalls head tail argcounts.
decrement-funcalls argcounts funcalls 1_ =
decrement-funcalls tail argcounts tail funcalls head tail argcounts.
decrement-funcalls . funcalls . = funcalls.

Bit code interpreter


== 01_ bitcode interpreter

bitcode bitcode arg arg2 arg3 arg4 arg5 arg6 arg7 arg8 arg9 =
interpret-bitcode bitcode
arglist arg arg2 arg3 arg4 arg5 arg6 arg7 arg8 arg9.

interpret-bitcode bitcode args =
apply head tail tail bitcode == list of function names
head tail tail tail bitcode == list of functions
alist-lookup head bitcode == main function name
head tail tail bitcode == list of function names
head tail tail tail bitcode == list of functions
push-arguments head tail bitcode == main function arity
args
empty-stack.

arglist arg arg2 arg3 arg4 arg5 arg6 arg7 arg8 arg9 =
list-encode arg list-encode arg2 list-encode arg3
list-encode arg4 list-encode arg5 list-encode arg6
list-encode arg7 list-encode arg8 list-encode arg9.

push-arguments _ . stack = stack.
push-arguments 1count args stack =
push-arguments count
tail args
push head args
stack.

apply fnames functions function args =
match fnames functions args
head function
tail function.

match fnames functions args _ defs = error _. == pattern match failed

match fnames functions args def defs =
match2 fnames functions args defs
head head def == first pattern
tail head def == remaining patterns
tail def == body
empty-stack == bindings
args.

match2 fnames functions saved-args defs
_ patterns body bindings args
= eval fnames functions bindings
head body
tail body
empty-stack.

== binding
match2 fnames functions saved-args defs
0pattern-bits patterns body bindings args =
if-match pattern-bits
top args
match2 fnames functions saved-args defs
head patterns
tail patterns
body
push drop-head pattern-bits
top args
bindings
pop args
match fnames functions saved-args
head defs
tail defs.

== literal
match2 fnames functions saved-args defs
10pattern-bits patterns body bindings args =
if-equal pattern-bits
top args
match2 fnames functions saved-args defs
head patterns
tail patterns
body
bindings
pop args
match fnames functions saved-args
head defs
tail defs.

== wild
match2 fnames functions saved-args defs
11pattern-bits patterns body bindings args =
if-match pattern-bits
top args
match2 fnames functions saved-args defs
head patterns
tail patterns
body
bindings
pop args
match fnames functions saved-args
head defs
tail defs.

eval fnames functions bindings _ ops stack = _.

== push bound expr
eval fnames functions bindings
00index ops stack =
eval fnames functions bindings
head ops
tail ops
push get-binding index bindings
stack.

== concat bound expr
eval fnames functions bindings
10index ops 0. =
get-binding index bindings
eval fnames functions bindings
head ops
tail ops
empty-stack.

get-binding _ bindings = top bindings.
get-binding 1index bindings = get-binding index pop bindings.

== push literal expr
eval fnames functions bindings
010bits ops stack =
eval fnames functions bindings
head ops
tail ops
push bits stack.

== concat literal expr
eval fnames functions bindings
110bits ops 0. =
bits
eval fnames functions bindings
head ops
tail ops
empty-stack.

== push funcall expr
eval fnames functions bindings
011funcall ops stack =
eval fnames functions bindings
head ops
tail ops
push apply fnames functions
alist-lookup head funcall
fnames functions
pull-args head tail funcall
stack empty-stack
pop-args head tail funcall
stack.

== concat funcall expr
eval fnames functions bindings
111funcall ops stack =
apply fnames functions
alist-lookup head funcall
fnames functions
pull-args head tail funcall
stack empty-stack
eval fnames functions bindings
head ops
tail ops
pop-args head tail funcall
stack.

pull-args _ . args = args.
pull-args 1count stack args =
pull-args count
pop stack
push top stack
args.

pop-args _ stack = stack.
pop-args 1count stack =
pop-args count
pop stack.

if-match 0x 0y true false = if-match x y true false.
if-match 1x 1y true false = if-match x y true false.
if-match _ . true . = true.
if-match . . . false = false.

drop-head 0x 0y = drop-head x y.
drop-head 1x 1y = drop-head x y.
drop-head _ y = y.

Top-level


interp in arg arg2 arg3 arg4 arg5 arg6 arg7 arg8 arg9 =
interpret-bitcode parse tokenize in
arglist arg arg2 arg3 arg4 arg5 arg6 arg7 arg8 arg9.

compile in = parse tokenize in 0000000. == pad to at least an even byte

Code size comparisons

Since I've implemented multiple 01_ interpreters, here's a comparison of the source code sizes:
  • Haskell - 216 lines, 9k
  • Python - 424 lines, 12k
  • 01_ - 746 lines, 25k
  • Java (slow original version) - 782 lines, 25k
  • Java (fast new version including compiler to Java) - 1188 lines, 34k
  • C - 1527 lines, 37k

Monday, February 15, 2010

I had the crazy notion of implementing an 01_ interpreter in 01_. One first step would be to come up with some bit-code data structure and write an interpreter for that in 01_, and, to start with, write a compiler from 01_ source code to the bit-code in a sane language like Haskell or Java for testing. Then, the final step would be to write an 01_ source to the bit code compiler in 01_.

Which leads to data structures in 01_. The only data type in 01_ is the list of bits, and only pattern matching and list concatenation are the available operations. Higher-level data structures can be built on that, and that's what I'm writing about.

I had an idea on how to make lists of arbitrary (finite) data. However, 01_ programs can deal with infinite bit lists, so I came up with an idea on how to make a stack of potentially infinite bit lists as well.

An 01_ interpreter would need some dictionary datatype, so association lists can be built on top of the lists. One way would be to use nested lists. Another would be to use a pair of lists.
List of arbitrary finite data

The scheme uses a control bit, where 1 means a data bit follows, and there are more bits, and 0 means the end of the list.

== first item in the list
head 0. = _.
head 11rest = 1 head rest.
head 10rest = 0 head rest.
head _ = _.

== the list with the first item removed
tail 0rest = rest.
tail 11rest = tail rest.
tail 10rest = tail rest.
tail _ = _.

== the list with a new item added to the front (or end)
add data list = list-encode data list.
add-to-end data list = list list-encode data.

list-encode _ = 0.
list-encode 1x = 11 encode x.
list-encode 0x = 10 encode x.

Stack of potentially infinite lists

Let n = the number of items in the stack. The stack data structure starts with n 1s, followed by a 0, followed the stack data.

The stack data starts with the first bit of every item in the stack, followed by the second bit of every item in the stack, followed by the third bit, etc.

Each stack bit starts with the control bit for the top bit in the top item in the stack, where a 1 is followed by the data bit of the item, and a 0 means the item has no more bits, which is followed by next item in the stack, etc all the way to the bottom of the stack.

This implementation would undoubtedly be hugely inefficient. When dealing with the largest item yet to be in the stack, the entire history of operations on the stack will have to be unwound for the rightmost bits. The 1 item optimization in pop helps a little by limiting the history that has to be unwound to the last time the stack was empty.

== new empty stack
empty = 0_.

== number of items in the stack (in base 1)
stack-count 0stack = _.
stack-count 1stack = 1 stack-count stack.

drop-stack-count 0stack = stack.
drop-stack-count 1stack = drop-stack-count stack.

== top of the stack
top 0. = _. == stack underflow
top stack =
top' decrement-count stack-count stack
drop-stack-count stack.

top' count 0stack-data = _.
top' count 10stack-data =
0 top' count
drop-stack-bit count stack-data.
top' count 11stack-data =
1 top' count
drop-stack-bit count stack-data.

decrement-count 1count = count.

drop-stack-bit _ stack-data = stack-data.
drop-stack-bit 1count 0stack-data = drop-stack-bit count stack-data.
drop-stack-bit 1count 10stack-data = drop-stack-bit count stack-data.
drop-stack-bit 1count 11stack-data = drop-stack-bit count stack-data.

== returns stack with data pushed on top
push data stack =
1 stack-count stack 0
push' data count drop-stack-count stack.

push' _ count stack-data =
0 take-stack-bit count stack-data
push' _ count drop-stack-bit count stack-data.
push' 0data count stack-data =
10 take-stack-bit count stack-data
push' data count drop-stack-bit count stack-data.
push' 1data count stack-data =
11 take-stack-bit count stack-data
push' data count drop-stack-bit count stack-data.

take-stack-bit _ . = _.
take-stack-bit 1count 0stack-data = 0 take-stack-bit count stack-data.
take-stack-bit 1count 10stack-data = 10 take-stack-bit count stack-data.
take-stack-bit 1count 11stack-data = 11 take-stack-bit count stack-data.

== returns stack with the top item popped
pop 0. = 0_. == stack underflow
pop 10. = 0_. == 1 item optimization
pop 1stack =
stack-count stack 0
pop' stack-count stack
drop-stack-count stack.

pop' count 0stack-data =
take-stack-bit count stack-data
pop' count
drop-stack-bit count stack-data.
pop' count 10stack-data =
take-stack-bit count stack-data
pop' count
drop-stack-bit count stack-data.
pop' count 11stack-data =
take-stack-bit count stack-data
pop' count
drop-stack-bit count stack-data.

Association lists

For implementing association lists, a pair of lists would be more efficient, but more complicated and error-prone to use than nested lists. I'll include implementations of both, but I'd probably use a pair of lists.

Association list using nested lists.

lookup key alist = try-lookup key head alist lookup key tail alist.
lookup . _ = _.

try-lookup key pair fail = if-equal key head pair head tail pair fail.

add-alist key data alist = add add key add data _ alist.

Association list using a pair of lists.

lookup key klist dlist =
if-equal key head klist head dlist lookup key tail klist tail dlist.
lookup . _ . = _.

They both use if-equal, defined here.

if-equal 0x 0y true false = if-equal x y true false.
if-equal 1x 1y true false = if-equal x y true false.
if-equal _ _ true . = true.
if-equal . . . false = false.

More association lists

These will use a pair of lists, one for the keys, and one for the data.

== test if key is present
if-has-key key _ true false = false.
if-has-key key klist true false =
if-equal key head klist true if-has-key key tail klist true false.

== append to data associated with key
append-alist key data klist dlist =
if-has-key key klist
append-alist' key data klist dlist
add data dlist.

append-alist' key data klist dlist =
if-equal key head klist
append-alist-matching data dlist
append-alist-nonmatching key data klist dlist.

append-alist-matching data dlist =
add add-to-end data head dlist
tail dlist.

append-alist-nonmatching key data klist dlist =
head dlist
append-alist' key data tail klist tail dlist.

append-alist-key key klist =
if-has-key key klist
klist
add key klist.

Monday, February 8, 2010

I had the idea of making a compiler for the 01_ programming language that compiled to the Java Virtual Machine (JVM). For starters, I wrote a few classes for the runtime library.
  • rt01_.val: For values in the only datatype in 01_, a list of bits. I included the trampoline() method, which must be called before evaluation, to avoid stack overflows, and to allow discarded values to be garbage collected.
  • rt01_.constant: For literal constants. I decided on representing the constant as a string of 0 and 1, since strings are the only way to represent arbitrary length constants in the Java class file format without needing initialization code to construct it. Additionally, the same string in the constant pool could be reused as the name of the field holding that constant. Compiling to Java means the field names have to be valid Java identifiers, but they don't have to be when compiling directly to Java class files.
  • rt01_.concat: For the only operation in 01_, list concatenation.
  • rt01_.input: For reading input as a list of bits.
  • rt01_.function: To be extended by classes representing 01_ functions. They should receive their arguments in the constructor, and build their results in eval(), and have a main() for being invoked from the command line.

Then, to test it out, I started with using it to write an interpreter. At first, it was getting OutOfMemoryErrors. I had written Function.eval() as

public rt01_.function eval(final rt01_.val[] args) {
return new rt01_.function() {
protected rt01_.val eval() {
for (Def def : defs) {
rt01_.val val = def.eval(args);
if (val != null)
return val;
}
throw new RuntimeException(...);
}
};
}

The capture of args in the anonymous class prevented the arguments from being garbage collected. Once I fixed that, the interpreter ran much faster than my previous interpreter in Java, faster than my interpreter in C, and even slightly faster than my interpreter in Haskell. Once I implemented the interpreter, it was a simple matter to extend it to compile to Java. The compiled code was maybe 5-10% faster than the interpreter.

I want to compile directly to class files rather than to Java, which will take a lot more work. This interpreter and compiler to Java was pretty much a weekend hack. I imagine the generated code would be pretty much the same as compiling to Java. The main difference is that the function names wouldn't have to be as heavily mangled, since they would no longer have to be valid Java identifiers, and would not have to avoid Java reserved words. Also, the SourceFile and LineNumberTable attributes could point to the 01_ source.

Here is the runtime:

package rt01_;

public abstract class val {
protected val trampoline() {
return null;
}

public boolean nil() {
return false;
}

public abstract boolean head();
public abstract val tail();

public static final val NIL = new val() {
public boolean nil() {
return true;
}

public boolean head() {
throw new NullPointerException();
}

public val tail() {
throw new NullPointerException();
}
};

public static val trampoline(val val) {
for (val v = val.trampoline(); v != null; v = val.trampoline())
val = v;
return val;
}
}

package rt01_;

public class constant extends val {
private String bits;
private int index;
private val tail = null;

public constant(String bits) {
this(bits, 0);
}

private constant(String bits, int index) {
this.bits = bits;
this.index = index;
}

protected val trampoline() {
if (index >= bits.length())
return NIL;
return null;
}

public boolean head() {
return '1' == bits.charAt(index);
}

public val tail() {
if (tail == null) {
if (index + 1 < bits.length())
tail = new constant(bits, index + 1);
else
tail = NIL;
}
return tail;
}
}

package rt01_;

public class concat extends val {
private val first;
private val second;
private val tail = null;

public concat(val first, val second) {
this.first = first;
this.second = second;
}

protected val trampoline() {
first = trampoline(first);
if (first.nil())
return second;
else
return null;
}

public boolean head() {
return first.head();
}

public val tail() {
if (tail == null)
tail = new concat(first.tail(), second);
return tail;
}
}

package rt01_;

import java.io.FileInputStream;
import java.io.InputStream;
import java.io.IOException;

public class input extends val {
private InputStream in;
private int byt;
private int bit;
private val tail = null;

public input(String file) throws IOException {
this(new FileInputStream(file));
}

public input(InputStream in) {
this(in, -1, 0);
}

private input(InputStream in, int byt, int bit) {
this.in = in;
this.byt = byt;
this.bit = bit;
}

protected val trampoline() {
if (bit == 0) {
try {
byt = in.read();
} catch (IOException e) {
throw new RuntimeException(e);
}
if (byt < 0)
return NIL;
bit = 128;
}
return null;
}

public boolean head() {
return (byt & bit) != 0;
}

public val tail() {
if (tail == null)
tail = new input(in, byt, bit >> 1);
return tail;
}
}

package rt01_;

import java.io.OutputStream;
import java.io.PrintStream;

public abstract class function extends val {
private val val = null;

protected val trampoline() {
if (val == null)
val = eval();
return val;
}

protected abstract val eval();

public boolean nil() {
throw new RuntimeException();
}

public boolean head() {
throw new RuntimeException();
}

public val tail() {
throw new RuntimeException();
}

public static void main(String[] args, int arity, String name) throws Exception {
Class<?>[] types = new Class<?>[arity];
for (int i = 0; i < arity; i++)
types[i] = val.class;
int index = 0;
boolean bits = false;
if (args.length > 0 && "-bits".equals(args[0])) {
index = 1;
bits = true;
}
val val = (function) Class.forName(name).getDeclaredConstructor(types).newInstance((Object[]) args(args, arity, index));
if (bits)
writeBits(val, System.out);
else
write(val, System.out);
System.out.flush();
}

public static val[] args(String[] args, int arity, int index) throws Exception {
val[] vals = new val[arity];
val stdin = null;
for (int i = 0; i < arity; i++) {
if (index + i < args.length) {
if (!"-".equals(args[index + i])) {
vals[i] = new input(args[index + i]);
} else {
if (stdin == null)
stdin = new input(System.in);
vals[i] = stdin;
}
} else if (stdin == null) {
stdin = new input(System.in);
vals[i] = stdin;
} else {
vals[i] = NIL;
}
}
return vals;
}

public static void writeBits(val val, PrintStream out) throws Exception {
for (;;) {
val = trampoline(val);
if (val.nil())
break;
out.print(val.head() ? "1" : "0");
val = val.tail();
}
}

public static void write(val val, OutputStream out) throws Exception {
int byt = 0;
int bit = 128;
for (;;) {
val = trampoline(val);
if (val.nil())
break;
byt |= val.head() ? bit : 0;
bit >>= 1;
if (bit == 0) {
out.write(byt);
byt = 0;
bit = 128;
}
val = val.tail();
}
}
}

Here is the interpreter and compiler to Java:

public class BoundExpr extends Expr {
private int index;

public BoundExpr(Token token, int index) {
super(token);
this.index = index;
}

public rt01_.val eval(rt01_.val[] bindings) {
return bindings[index];
}

public int getIndex() {
return index;
}
}

import java.io.FileWriter;
import java.io.PrintWriter;

public class Compiler {
public static void main(String[] args) throws Exception {
Parser parser = new Parser();
for (String arg : args)
parser.add(arg);
for (Function function : parser.getFunctions().values()) {
PrintWriter out = new PrintWriter(new FileWriter(function.getMangledName() + ".java"));
function.compile(out);
out.flush();
out.close();
}
}
}

public class ConcatExpr extends Expr {
private Expr first;
private Expr second;

public ConcatExpr(Expr first, Expr second) {
super(first.getToken());
this.first = first;
this.second = second;
}

public rt01_.val eval(rt01_.val[] bindings) {
return new rt01_.concat(first.eval(bindings), second.eval(bindings));
}

public Expr getFirst() {
return first;
}

public Expr getSecond() {
return second;
}
}

import java.util.List;

public class ConstantExpr extends Expr {
private String bits;
private rt01_.constant val;

public ConstantExpr(Token token, boolean[] bits) {
super(token);
StringBuilder sb = new StringBuilder();
for (boolean bit : bits)
sb.append(bit ? "1" : "0");
this.bits = sb.toString();
val = new rt01_.constant(this.bits);
}

public rt01_.val eval(rt01_.val[] bindings) {
return val;
}

public String getBits() {
return bits;
}
}

import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Map;

public class Def {
private Token name;
private Pattern[] patterns;
private Token[] body;

private int bindingCount;
private Expr expr;

public Def(Token name, Pattern[] patterns, Token[] body) {
this.name = name;
this.patterns = patterns;
this.body = body;

bindingCount = 0;
for (Pattern pattern : patterns)
if (pattern.isBinding())
bindingCount++;
}

public int getArity() {
return patterns.length;
}

public Token getName() {
return name;
}

private class ParseState {
int index;
Expr expr;
}

public void parse(Map<String,Function> functions) {
if (body.length == 0) {
expr = new ConstantExpr(name, new boolean[0]);
} else {
ParseState state = new ParseState();
state.index = 0;
state.expr = null;
parse(functions, state);
expr = state.expr;
}
body = null;
}

private void parse(Map<String,Function> functions, ParseState state) {
state.expr = null;
parse1(functions, state);
if (state.index < body.length) {
Expr first = state.expr;
parse(functions, state);
state.expr = new ConcatExpr(first, state.expr);
}
}

private void parse1(Map<String,Function> functions, ParseState state) {
Token token = body[state.index];
switch (token.getType()) {
case EQUALS: case DOT:
assert false;
throw new RuntimeException();
case ZERO: case ONE: case NIL:
parseConstant(token, state);
return;
case SYMBOL:
state.index++;
state.expr = binding(token);
if (state.expr != null)
return;
if (!functions.containsKey(token.getSymbol()))
throw new RuntimeException(token.getLocation() + ": unknown function: " + token.getSymbol());
Function function = functions.get(token.getSymbol());
Expr[] args = new Expr[function.getArity()];
for (int i = 0; i < args.length; i++) {
parse1(functions, state);
args[i] = state.expr;
}
state.expr = new FuncallExpr(token, function, args);
}
}

private void parseConstant(Token token, ParseState state) {
ArrayList<Boolean> bits = new ArrayList<Boolean>();
loop: while (state.index < body.length) {
switch (body[state.index].getType()) {
case ZERO:
state.index++;
bits.add(false);
break;
case ONE:
state.index++;
bits.add(true);
break;
case NIL:
state.index++;
default:
break loop;
}
}
state.expr = new ConstantExpr(token, DefReader.toBitArray(bits));
}

private BoundExpr binding(Token token) {
int bindingIndex = 0;
for (Pattern pattern : patterns) {
if (!pattern.isBinding())
continue;
if (pattern.getToken().getSymbol().equals(token.getSymbol()))
return new BoundExpr(token, bindingIndex);
bindingIndex++;
}
return null;
}

public Expr getExpr() {
return expr;
}

public rt01_.val eval(rt01_.val[] args) {
assert args.length == patterns.length;
rt01_.val[] bindings = new rt01_.val[bindingCount];
int bindingIndex = 0;
for (int i = 0; i < args.length; i++) {
rt01_.val val = patterns[i].match(args[i]);
if (val == null)
return null;
if (patterns[i].isBinding())
bindings[bindingIndex++] = val;
}
return expr.eval(bindings);
}

public void compile(int n, PrintWriter out) throws Exception {
out.print(" private rt01_.val m");
out.print(n);
out.println("() {");
if (patterns.length > 0)
out.println(" rt01_.val val;");
int bindingIndex = 0;
for (int i = 0; i < patterns.length; i++) {
Pattern pattern = patterns[i];
boolean[] bits = pattern.getBits();
if (bits.length > 0) {
out.print(" a");
out.print(i);
out.print(" = trampoline(a");
out.print(i);
out.println(");");
}
out.print(" val = a");
out.print(i);
out.println(";");
boolean start = true;
for (boolean bit : bits) {
if (start)
start = false;
else
out.println(" val = trampoline(val);");
out.print(" if (val.nil() || ");
if (bit) out.print("!");
out.println("val.head()) return null;");
out.println(" val = val.tail();");
}
if (pattern.isLiteral()) {
out.println(" val = trampoline(val);");
out.println(" if (!val.nil()) return null;");
} else if (pattern.isBinding()) {
out.print(" rt01_.val b");
out.print(bindingIndex);
out.println(" = val;");
bindingIndex++;
}
}
out.print(" return ");
compileExpr(expr, out);
out.println(";");
out.println(" }");
}

private void compileExpr(Expr e, PrintWriter out) throws Exception {
if (e instanceof BoundExpr) {
out.print("b");
out.print(((BoundExpr) e).getIndex());
} else if (e instanceof ConstantExpr) {
out.print("_");
out.print(((ConstantExpr) e).getBits());
} else if (e instanceof ConcatExpr) {
out.print("new rt01_.concat(");
compileExpr(((ConcatExpr) e).getFirst(), out);
out.print(",");
compileExpr(((ConcatExpr) e).getSecond(), out);
out.print(")");
} else if (e instanceof FuncallExpr) {
out.print("new ");
out.print(((FuncallExpr) e).getFunction().getMangledName());
out.print("(");
boolean start = true;
for (Expr arg : ((FuncallExpr) e).getArgs()) {
if (start)
start = false;
else
out.print(",");
compileExpr(arg, out);
}
out.print(")");
}
}
}

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

public class DefReader implements Iterator<Def> {
private Iterator<Token> tokenizer;
private Def def = null;

public DefReader(Iterator<Token> tokenizer) {
this.tokenizer = tokenizer;
}

public boolean hasNext() {
if (def == null)
def = readNext();
return def != null;
}

public Def next() {
if (def == null)
return readNext();
Def result = def;
def = null;
return result;
}

public void remove() {
}

private Def readNext() {
if (!tokenizer.hasNext())
return null;
Token name = tokenizer.next();
if (name.getType() != Token.Type.SYMBOL)
throw new RuntimeException(name.getLocation() + ": symbol expected");
ArrayList<Pattern> patterns = readPatterns(name);
ArrayList<Token> body = new ArrayList<Token>();
for (;;) {
if (!tokenizer.hasNext())
throw new RuntimeException(name.getLocation() + ": incomplete definition");
Token token = tokenizer.next();
if (token.getType() == Token.Type.DOT)
break;
body.add(token);
}
return new Def(name, patterns.toArray(new Pattern[patterns.size()]), body.toArray(new Token[body.size()]));
}

private ArrayList<Pattern> readPatterns(Token name) {
ArrayList<Pattern> patterns = new ArrayList<Pattern>();
ArrayList<Boolean> bits = new ArrayList<Boolean>();
Token startToken = null;
for (;;) {
if (!tokenizer.hasNext())
throw new RuntimeException(name.getLocation() + ": incomplete definition");
Token token = tokenizer.next();
if (startToken == null)
startToken = token;
switch (token.getType()) {
case EQUALS:
if (bits.size() > 0)
patterns.add(new Pattern(startToken, toBitArray(bits), null));
return patterns;
case ZERO:
bits.add(false);
break;
case ONE:
bits.add(true);
break;
case DOT:
case NIL:
case SYMBOL:
patterns.add(new Pattern(startToken, toBitArray(bits), token));
bits.clear();
startToken = null;
break;
}
}
}

public static boolean[] toBitArray(List<Boolean> list) {
boolean[] bits = new boolean[list.size()];
for (int i = 0; i < list.size(); i++)
bits[i] = list.get(i);
return bits;
}
}
public abstract class Expr {
private Token token;

protected Expr(Token token) {
this.token = token;
}

public abstract rt01_.val eval(rt01_.val[] bindings);

public Token getToken() {
return token;
}
}

public class FuncallExpr extends Expr {
private Function function;
private Expr[] args;

public FuncallExpr(Token token, Function function, Expr[] args) {
super(token);
this.function = function;
this.args = args;
}

public rt01_.val eval(rt01_.val[] bindings) {
rt01_.val[] argVals = new rt01_.val[args.length];
for (int i = 0; i < argVals.length; i++)
argVals[i] = args[i].eval(bindings);
return function.eval(argVals);
}

public Function getFunction() {
return function;
}

public Expr[] getArgs() {
return args;
}
}

import java.io.File;
import java.io.PrintWriter;
import java.util.HashSet;
import java.util.Map;

public class Function {
private Def[] defs;

public Function(Def[] defs) {
this.defs = defs;
}

public int getArity() {
return defs[0].getArity();
}

public void parse(Map<String,Function> functions) {
for (Def def : defs)
def.parse(functions);
}

private class Result extends rt01_.function {
private rt01_.val[] args;
Result(rt01_.val[] args) {
this.args = args;
}

protected rt01_.val eval() {
for (Def def : defs) {
rt01_.val val = def.eval(args);
if (val != null) {
args = null; // let arguments get garbage collected
return val;
}
}
Token name = defs[defs.length-1].getName();
throw new RuntimeException(name.getLocation() + ": no matching pattern in definition of " + name.getSymbol());
}
}

public rt01_.function eval(rt01_.val[] args) {
return new Result(args);
}

private static final HashSet<String> reserved = new HashSet<String>();
static {
reserved.add("abstract");
reserved.add("assert");
reserved.add("boolean");
reserved.add("break");
reserved.add("byte");
reserved.add("case");
reserved.add("catch");
reserved.add("char");
reserved.add("class");
reserved.add("const");
reserved.add("continue");
reserved.add("default");
reserved.add("do");
reserved.add("double");
reserved.add("else");
reserved.add("enum");
reserved.add("extends");
reserved.add("false");
reserved.add("final");
reserved.add("finally");
reserved.add("float");
reserved.add("for");
reserved.add("goto");
reserved.add("if");
reserved.add("implements");
reserved.add("import");
reserved.add("instanceof");
reserved.add("int");
reserved.add("interface");
reserved.add("long");
reserved.add("native");
reserved.add("new");
reserved.add("null");
reserved.add("package");
reserved.add("private");
reserved.add("protected");
reserved.add("public");
reserved.add("return");
reserved.add("short");
reserved.add("static");
reserved.add("switch");
reserved.add("synchronized");
reserved.add("strictfp");
reserved.add("super");
reserved.add("this");
reserved.add("throw");
reserved.add("throws");
reserved.add("transient");
reserved.add("true");
reserved.add("try");
reserved.add("void");
reserved.add("volatile");
reserved.add("while");
}

public static String mangle(String name) {
StringBuilder sb = new StringBuilder();
if (reserved.contains(name))
sb.append("__");
for (int i = 0; i < name.length(); i++)
switch (name.charAt(i)) {
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
if (i == 0)
sb.append("__");
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
sb.append(name.charAt(i));
break;
default:
sb.append('_').append(Integer.toHexString(name.charAt(i))).append('_');
}
return sb.toString();
}

public String getMangledName() {
return mangle(defs[0].getName().getSymbol());
}

public void compile(PrintWriter out) throws Exception {
int arity = getArity();
String name = getMangledName();
out.print("public class ");
out.print(name);
out.println(" extends rt01_.function {");
for (int i = 0; i < arity; i++) {
out.print(" private rt01_.val a");
out.print(i);
out.println(";");
}
out.print(" public ");
out.print(name);
out.print("(");
for (int i = 0; i < arity; i++) {
if (i > 0)
out.print(",");
out.print("rt01_.val p");
out.print(i);
}
out.println(") {");
for (int i = 0; i < arity; i++) {
out.print(" a");
out.print(i);
out.print("=p");
out.print(i);
out.println(";");
}
out.println(" }");
out.println(" protected rt01_.val eval() {");
out.println(" rt01_.val val;");
out.print(" if (");
for (int i = 0; i < defs.length; i++) {
out.print("(val = m");
out.print(i);
out.print("()) == null && ");
}
out.print("true) throw new RuntimeException(\"");
out.print(getLocation(defs[defs.length-1].getName()));
out.println(": pattern match failed\");");
for (int i = 0; i < arity; i++) {
out.print(" a");
out.print(i);
out.println(" = null;");
}
out.println(" return val;");
out.println(" }");
HashSet<String> constants = new HashSet<String>();
for (Def def : defs)
collectConstants(constants, def.getExpr());
for (String constant : constants) {
out.print(" private static final rt01_.val _");
out.print(constant);
if (constant.length() == 0) {
out.println(" = NIL;");
} else {
out.print(" = new rt01_.constant(\"");
out.print(constant);
out.println("\");");
}
}
for (int i = 0; i < defs.length; i++)
defs[i].compile(i, out);
out.println(" public static void main(String[] args) throws Exception {");
out.print(" main(args,");
out.print(arity);
out.print(",\"");
out.print(name);
out.println("\");");
out.println(" }");
out.println("}");
}

private void collectConstants(HashSet<String> constants, Expr expr) {
if (expr instanceof ConstantExpr) {
constants.add(((ConstantExpr) expr).getBits());
} else if (expr instanceof ConcatExpr) {
collectConstants(constants, ((ConcatExpr) expr).getFirst());
collectConstants(constants, ((ConcatExpr) expr).getSecond());
} else if (expr instanceof FuncallExpr) {
for (Expr arg : ((FuncallExpr) expr).getArgs())
collectConstants(constants, arg);
}
}

public static String getLocation(Token token) {
return new File(token.getFileName()).getName() + ":" + token.getLineNumber() + ":" + token.getColumn();
}
}

import java.io.File;
import java.util.Map;

public class Interpreter {
public static void main(String[] args) throws Exception {
Parser parser = new Parser();
int i;
String fname = null;
for (i = 0; i < args.length && !args[i].equals("-"); i++) {
parser.add(args[i]);
fname = getFunction(args[i]);
}
if (i + 1 < args.length) {
fname = args[i+1];
i = i + 2;
}
boolean writeBits = false;
if (i < args.length && "-bits".equals(args[i])) {
i++;
writeBits = true;
}
Function f = parser.getFunctions().get(fname);
if (writeBits)
rt01_.function.writeBits(f.eval(rt01_.function.args(args, f.getArity(), i)), System.out);
else
rt01_.function.write(f.eval(rt01_.function.args(args, f.getArity(), i)), System.out);
System.out.flush();
}

private static String getFunction(String fileName) {
String fn = new File(fileName).getName();
if (fn.indexOf('.') > 0)
return fn.substring(0, fn.indexOf('.'));
else
return fn;
}
}

import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

public class Parser {
private HashMap<String,Integer> arities = new HashMap<String,Integer>();
private HashMap<String,List<Def>> functions = new HashMap<String,List<Def>>();

public void add(String fileName) throws Exception {
add(new Tokenizer(fileName));
}

public void add(Reader in, String fileName, int lineNumber, int column) {
add(new Tokenizer(in, fileName, lineNumber, column));
}

public void add(Iterator<Token> tokenizer) {
addDefs(new DefReader(tokenizer));
}

public void addDefs(Iterator<Def> defs) {
while (defs.hasNext()) {
Def def = defs.next();
Token name = def.getName();
if (arities.containsKey(name.getSymbol())) {
if (def.getArity() != arities.get(name.getSymbol()))
throw new RuntimeException(name.getLocation() + ": arity mismatch in definition of " + name.getSymbol());
} else {
arities.put(name.getSymbol(), def.getArity());
functions.put(name.getSymbol(), new ArrayList<Def>());
}
functions.get(name.getSymbol()).add(def);
}
}

public Map<String,Function> getFunctions() {
HashMap<String,Function> fns = new HashMap<String,Function>();
for (Map.Entry<String,List<Def>> entry : functions.entrySet()) {
List<Def> defs = entry.getValue();
fns.put(entry.getKey(), new Function(defs.toArray(new Def[defs.size()])));
}
for (Function function : fns.values())
function.parse(fns);
return fns;
}
}

public class Pattern {
private Token startToken;
private boolean[] bits;
private Token token;
private String fileName;
private int lineNumber;

public Pattern(Token startToken, boolean[] bits, Token token) {
this.startToken = startToken;
this.bits = bits;
this.token = token;
}

public Token getStartToken() {
return startToken;
}

public boolean[] getBits() {
return bits;
}

public Token getToken() {
return token;
}

public boolean isLiteral() {
return token == null || token.getType() == Token.Type.NIL;
}

public boolean isWild() {
return token != null && token.getType() == Token.Type.DOT;
}

public boolean isBinding() {
return token != null && token.getType() == Token.Type.SYMBOL;
}

public rt01_.val match(rt01_.val val) {
for (int i = 0; i < bits.length; i++) {
val = rt01_.val.trampoline(val);
if (val.nil() || val.head() != bits[i])
return null;
val = val.tail();
}
if (isLiteral()) {
val = rt01_.val.trampoline(val);
if (!val.nil())
return null;
}
return val;
}
}

public class Token {
public enum Type {
ZERO, ONE, NIL, DOT, EQUALS, SYMBOL
}

private Type type;
private String symbol;
private String fileName;
private int lineNumber;
private int column;

public Token(Type type, String symbol, String fileName, int lineNumber, int column) {
this.type = type;
this.symbol = symbol;
this.fileName = fileName;
this.lineNumber = lineNumber;
this.column = column;
}

public Type getType() {
return type;
}

public String getSymbol() {
return symbol;
}

public String getFileName() {
return fileName;
}

public int getLineNumber() {
return lineNumber;
}

public int getColumn() {
return column;
}

public String getLocation() {
return fileName + ":" + lineNumber + ":" + column;
}
}

import java.io.FileReader;
import java.io.Reader;
import java.util.Iterator;

public class Tokenizer implements Iterator<Token> {
private Reader in;
private String fileName;
private int lineNumber;
private int column = 0;

private int pushback = -1;
private Token next;

public Tokenizer(String fileName) throws Exception {
this(new FileReader(fileName), fileName, 1, 0);
}

public Tokenizer(Reader in, String fileName, int lineNumber, int column) {
this.in = in;
this.fileName = fileName;
this.lineNumber = lineNumber;
}

public boolean hasNext() {
if (next == null)
readNext();
return next != null;
}

public Token next() {
if (next == null)
readNext();
Token result = next;
next = null;
return result;
}

public void remove() {
}

private void pushback(int lastChar) {
assert pushback < 0;
pushback = lastChar;
column--;
if (lastChar == '\n')
lineNumber--;
}

private int nextChar() {
int nextChar = -1;
if (pushback >= 0) {
nextChar = pushback;
pushback = -1;
} else {
try {
nextChar = in.read();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
if (nextChar >= 0) {
column++;
if (nextChar == '\n') {
lineNumber++;
column = 0;
}
}
return nextChar;
}

private void readNext() {
for (;;) {
int nextChar = nextChar();
if (nextChar < 0)
return;
int saveLineNumber = lineNumber;
int saveColumn = column;
switch (nextChar) {
case '0':
next = new Token(Token.Type.ZERO, null, fileName, lineNumber, column);
return;
case '1':
next = new Token(Token.Type.ONE, null, fileName, lineNumber, column);
return;
case '_':
next = new Token(Token.Type.NIL, null, fileName, lineNumber, column);
return;
case '.':
next = new Token(Token.Type.DOT, null, fileName, lineNumber, column);
return;
case '=':
nextChar = nextChar();
if (nextChar != '=') {
pushback(nextChar);
next = new Token(Token.Type.EQUALS, null, fileName, saveLineNumber, saveColumn);
return;
}
while (nextChar >= 0 && nextChar != '\n')
nextChar = nextChar();
continue;
case ' ': case '\t': case '\r': case '\n':
continue;
default:
StringBuilder sb = new StringBuilder();
sb.append((char) nextChar);
for (;;) {
nextChar = nextChar();
if (nextChar < 0) {
next = new Token(Token.Type.SYMBOL, sb.toString(), fileName, saveLineNumber, saveColumn);
return;
}
switch (nextChar) {
case '0': case '1': case '_': case '.': case '=':
case ' ': case '\t': case '\r': case '\n':
pushback(nextChar);
next = new Token(Token.Type.SYMBOL, sb.toString(), fileName, saveLineNumber, saveColumn);
return;
default:
sb.append((char) nextChar);
}
}
}
}
}
}


Here is 99 bottles of beer compiled to Java:

public class __2 extends rt01_.function {
private rt01_.val a0;
public __2(rt01_.val p0) {
a0=p0;
}
protected rt01_.val eval() {
rt01_.val val;
if ((val = m0()) == null && true) throw new RuntimeException("99.01_:5:1: pattern match failed");
a0 = null;
return val;
}
private static final rt01_.val _010 = new rt01_.constant("010");
private rt01_.val m0() {
rt01_.val val;
a0 = trampoline(a0);
val = a0;
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || !val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || !val.head()) return null;
val = val.tail();
rt01_.val b0 = val;
return new rt01_.concat(_010,b0);
}
public static void main(String[] args) throws Exception {
main(args,1,"__2");
}
}
public class __33r extends rt01_.function {
public __33r() {
}
protected rt01_.val eval() {
rt01_.val val;
if ((val = m0()) == null && true) throw new RuntimeException("99.01_:8:1: pattern match failed");
return val;
}
private static final rt01_.val _001000000110111101101110001000000111010001101000011001010010000001110111011000010110110001101100 = new rt01_.constant("001000000110111101101110001000000111010001101000011001010010000001110111011000010110110001101100");
private rt01_.val m0() {
return _001000000110111101101110001000000111010001101000011001010010000001110111011000010110110001101100;
}
public static void main(String[] args) throws Exception {
main(args,0,"__33r");
}
}
public class __4 extends rt01_.function {
private rt01_.val a0;
public __4(rt01_.val p0) {
a0=p0;
}
protected rt01_.val eval() {
rt01_.val val;
if ((val = m0()) == null && (val = m1()) == null && (val = m2()) == null && (val = m3()) == null && true) throw new RuntimeException("99.01_:14:1: pattern match failed");
a0 = null;
return val;
}
private static final rt01_.val _ = NIL;
private static final rt01_.val _1 = new rt01_.constant("1");
private static final rt01_.val _1001 = new rt01_.constant("1001");
private static final rt01_.val _0 = new rt01_.constant("0");
private rt01_.val m0() {
rt01_.val val;
a0 = trampoline(a0);
val = a0;
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (!val.nil()) return null;
return _1001;
}
private rt01_.val m1() {
rt01_.val val;
val = a0;
val = trampoline(val);
if (!val.nil()) return null;
return _;
}
private rt01_.val m2() {
rt01_.val val;
a0 = trampoline(a0);
val = a0;
if (val.nil() || val.head()) return null;
val = val.tail();
rt01_.val b0 = val;
return new rt01_.concat(_1,new __4(b0));
}
private rt01_.val m3() {
rt01_.val val;
a0 = trampoline(a0);
val = a0;
if (val.nil() || !val.head()) return null;
val = val.tail();
rt01_.val b0 = val;
return new rt01_.concat(_0,b0);
}
public static void main(String[] args) throws Exception {
main(args,1,"__4");
}
}
public class __5 extends rt01_.function {
private rt01_.val a0;
private rt01_.val a1;
public __5(rt01_.val p0,rt01_.val p1) {
a0=p0;
a1=p1;
}
protected rt01_.val eval() {
rt01_.val val;
if ((val = m0()) == null && (val = m1()) == null && (val = m2()) == null && (val = m3()) == null && true) throw new RuntimeException("99.01_:20:1: pattern match failed");
a0 = null;
a1 = null;
return val;
}
private static final rt01_.val _00110001 = new rt01_.constant("00110001");
private static final rt01_.val _01101110011011110010000001101101011011110111001001100101 = new rt01_.constant("01101110011011110010000001101101011011110111001001100101");
private static final rt01_.val _0011 = new rt01_.constant("0011");
private static final rt01_.val _01110011 = new rt01_.constant("01110011");
private rt01_.val m0() {
rt01_.val val;
a0 = trampoline(a0);
val = a0;
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
a1 = trampoline(a1);
val = a1;
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (!val.nil()) return null;
return new rt01_.concat(_01101110011011110010000001101101011011110111001001100101,new rt01_.concat(new __7(),_01110011));
}
private rt01_.val m1() {
rt01_.val val;
a0 = trampoline(a0);
val = a0;
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
a1 = trampoline(a1);
val = a1;
if (val.nil() || !val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (!val.nil()) return null;
return new rt01_.concat(_00110001,new __7());
}
private rt01_.val m2() {
rt01_.val val;
a0 = trampoline(a0);
val = a0;
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = a1;
rt01_.val b0 = val;
return new rt01_.concat(_0011,new rt01_.concat(new __6(b0),new rt01_.concat(new __7(),_01110011)));
}
private rt01_.val m3() {
rt01_.val val;
val = a0;
rt01_.val b0 = val;
val = a1;
rt01_.val b1 = val;
return new rt01_.concat(_0011,new rt01_.concat(new __6(b0),new rt01_.concat(_0011,new rt01_.concat(new __6(b1),new rt01_.concat(new __7(),_01110011)))));
}
public static void main(String[] args) throws Exception {
main(args,2,"__5");
}
}
public class __6 extends rt01_.function {
private rt01_.val a0;
public __6(rt01_.val p0) {
a0=p0;
}
protected rt01_.val eval() {
rt01_.val val;
if ((val = m0()) == null && (val = m1()) == null && (val = m2()) == null && true) throw new RuntimeException("99.01_:25:1: pattern match failed");
a0 = null;
return val;
}
private static final rt01_.val _ = NIL;
private static final rt01_.val _1 = new rt01_.constant("1");
private static final rt01_.val _0 = new rt01_.constant("0");
private rt01_.val m0() {
rt01_.val val;
val = a0;
val = trampoline(val);
if (!val.nil()) return null;
return _;
}
private rt01_.val m1() {
rt01_.val val;
a0 = trampoline(a0);
val = a0;
if (val.nil() || val.head()) return null;
val = val.tail();
rt01_.val b0 = val;
return new rt01_.concat(new __6(b0),_0);
}
private rt01_.val m2() {
rt01_.val val;
a0 = trampoline(a0);
val = a0;
if (val.nil() || !val.head()) return null;
val = val.tail();
rt01_.val b0 = val;
return new rt01_.concat(new __6(b0),_1);
}
public static void main(String[] args) throws Exception {
main(args,1,"__6");
}
}
public class __7 extends rt01_.function {
public __7() {
}
protected rt01_.val eval() {
rt01_.val val;
if ((val = m0()) == null && true) throw new RuntimeException("99.01_:28:1: pattern match failed");
return val;
}
private static final rt01_.val _00100000011000100110111101110100011101000110110001100101 = new rt01_.constant("00100000011000100110111101110100011101000110110001100101");
private rt01_.val m0() {
return _00100000011000100110111101110100011101000110110001100101;
}
public static void main(String[] args) throws Exception {
main(args,0,"__7");
}
}
public class __8 extends rt01_.function {
private rt01_.val a0;
private rt01_.val a1;
public __8(rt01_.val p0,rt01_.val p1) {
a0=p0;
a1=p1;
}
protected rt01_.val eval() {
rt01_.val val;
if ((val = m0()) == null && (val = m1()) == null && true) throw new RuntimeException("99.01_:32:1: pattern match failed");
a0 = null;
a1 = null;
return val;
}
private rt01_.val m0() {
rt01_.val val;
val = a0;
rt01_.val b0 = val;
a1 = trampoline(a1);
val = a1;
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (!val.nil()) return null;
return new __4(b0);
}
private rt01_.val m1() {
rt01_.val val;
val = a0;
rt01_.val b0 = val;
val = a1;
return b0;
}
public static void main(String[] args) throws Exception {
main(args,2,"__8");
}
}
public class __9 extends rt01_.function {
private rt01_.val a0;
private rt01_.val a1;
public __9(rt01_.val p0,rt01_.val p1) {
a0=p0;
a1=p1;
}
protected rt01_.val eval() {
rt01_.val val;
if ((val = m0()) == null && (val = m1()) == null && true) throw new RuntimeException("99.01_:36:1: pattern match failed");
a0 = null;
a1 = null;
return val;
}
private static final rt01_.val _1001 = new rt01_.constant("1001");
private static final rt01_.val _0010110000100000 = new rt01_.constant("0010110000100000");
private static final rt01_.val _001011100000101001010100011000010110101101100101001000000110111101101110011001010010000001100100011011110111011101101110001000000110000101101110011001000010000001110000011000010111001101110011001000000110100101110100001000000110000101110010011011110111010101101110011001000010110000100000 = new rt01_.constant("001011100000101001010100011000010110101101100101001000000110111101101110011001010010000001100100011011110111011101101110001000000110000101101110011001000010000001110000011000010111001101110011001000000110100101110100001000000110000101110010011011110111010101101110011001000010110000100000");
private static final rt01_.val _0010111000001010 = new rt01_.constant("0010111000001010");
private static final rt01_.val _0000 = new rt01_.constant("0000");
private static final rt01_.val _00101110000010100100011101101111001000000111010001101111001000000111010001101000011001010010000001110011011101000110111101110010011001010010000001100001011011100110010000100000011000100111010101111001001000000111001101101111011011010110010100100000011011010110111101110010011001010010110000100000 = new rt01_.constant("00101110000010100100011101101111001000000111010001101111001000000111010001101000011001010010000001110011011101000110111101110010011001010010000001100001011011100110010000100000011000100111010101111001001000000111001101101111011011010110010100100000011011010110111101110010011001010010110000100000");
private static final rt01_.val _001011100000101000001010 = new rt01_.constant("001011100000101000001010");
private rt01_.val m0() {
rt01_.val val;
a0 = trampoline(a0);
val = a0;
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
a1 = trampoline(a1);
val = a1;
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (val.nil() || val.head()) return null;
val = val.tail();
val = trampoline(val);
if (!val.nil()) return null;
return new rt01_.concat(new __2(new __5(_0000,_0000)),new rt01_.concat(new b(),new rt01_.concat(new __33r(),new rt01_.concat(_0010110000100000,new rt01_.concat(new __5(_0000,_0000),new rt01_.concat(new b(),new rt01_.concat(_00101110000010100100011101101111001000000111010001101111001000000111010001101000011001010010000001110011011101000110111101110010011001010010000001100001011011100110010000100000011000100111010101111001001000000111001101101111011011010110010100100000011011010110111101110010011001010010110000100000,new rt01_.concat(new __5(_1001,_1001),new rt01_.concat(new b(),new rt01_.concat(new __33r(),_0010111000001010))))))))));
}
private rt01_.val m1() {
rt01_.val val;
val = a0;
rt01_.val b0 = val;
val = a1;
rt01_.val b1 = val;
return new rt01_.concat(new __5(b0,b1),new rt01_.concat(new b(),new rt01_.concat(new __33r(),new rt01_.concat(_0010110000100000,new rt01_.concat(new __5(b0,b1),new rt01_.concat(new b(),new rt01_.concat(_001011100000101001010100011000010110101101100101001000000110111101101110011001010010000001100100011011110111011101101110001000000110000101101110011001000010000001110000011000010111001101110011001000000110100101110100001000000110000101110010011011110111010101101110011001000010110000100000,new rt01_.concat(new __5(new __8(b0,b1),new __4(b1)),new rt01_.concat(new b(),new rt01_.concat(new __33r(),new rt01_.concat(_001011100000101000001010,new __9(new __8(b0,b1),new __4(b1)))))))))))));
}
public static void main(String[] args) throws Exception {
main(args,2,"__9");
}
}
public class __99 extends rt01_.function {
public __99() {
}
protected rt01_.val eval() {
rt01_.val val;
if ((val = m0()) == null && true) throw new RuntimeException("99.01_:38:1: pattern match failed");
return val;
}
private static final rt01_.val _1001 = new rt01_.constant("1001");
private rt01_.val m0() {
return new __9(_1001,_1001);
}
public static void main(String[] args) throws Exception {
main(args,0,"__99");
}
}
public class b extends rt01_.function {
public b() {
}
protected rt01_.val eval() {
rt01_.val val;
if ((val = m0()) == null && true) throw new RuntimeException("99.01_:2:1: pattern match failed");
return val;
}
private static final rt01_.val _0010000001101111011001100010000001100010011001010110010101110010 = new rt01_.constant("0010000001101111011001100010000001100010011001010110010101110010");
private rt01_.val m0() {
return _0010000001101111011001100010000001100010011001010110010101110010;
}
public static void main(String[] args) throws Exception {
main(args,0,"b");
}
}