Monday, December 7, 2009

It had been over 10 years since I had last written anything in Haskell, so I decided to try using it to see if I still could. I returned to the simple 01_ programming language that I invented earlier this year, and wrote an interpreter, and it took less than a day. I didn't worry about making it fast, but the interpreter compiled with ghc was about twice as fast as the one I wrote in C

I had written an interpreter in Java, and the source was about 3 times longer than the Haskell version. Since Haskell has laziness built-in, I didn't have to code in the laziness. The source for the interpreter in C that I wrote was about twice the size of the Java version and about 6 times the Haskell version. A lot of the C code was junk for debugging the memory management, since I couldn't rely on a garbage collector.
Haskell216 lines
Java782 lines
C1527 lines


I first wrote the Haskell 01_ interpreter in multiple modules, and it would be more maintainable that way. However, I'll present here all smashed into a single module, so that it can be stuck into a single file and run.

module Main(main) where

import Data.Bits(bit,testBit)
import Data.Char(chr,ord)
import Data.List(splitAt,elemIndex,(!!))
import Data.Map(Map,empty,findWithDefault,insert,member,(!))
import qualified Data.Map(map)
import System.Environment(getArgs)

data Token = Symbol String | Zero | One | Equal | Dot | Nil

tokenize :: String -> [Token]
tokenize ('=':'=':cs) = tokenize (dropComment cs)
tokenize ('0':cs) = Zero : tokenize cs
tokenize ('1':cs) = One : tokenize cs
tokenize ('=':cs) = Equal : tokenize cs
tokenize ('.':cs) = Dot : tokenize cs
tokenize ('_':cs) = Nil : tokenize cs
tokenize (' ':cs) = tokenize cs
tokenize ('\t':cs) = tokenize cs
tokenize ('\r':cs) = tokenize cs
tokenize ('\n':cs) = tokenize cs
tokenize [] = []
tokenize cs = tokenizeSymbol cs ""

tokenizeSymbol s@(c:cs) symbol =
case c of
'=' -> Symbol (reverse symbol) : tokenize s
'0' -> Symbol (reverse symbol) : tokenize s
'1' -> Symbol (reverse symbol) : tokenize s
'.' -> Symbol (reverse symbol) : tokenize s
'_' -> Symbol (reverse symbol) : tokenize s
' ' -> Symbol (reverse symbol) : tokenize cs
'\t' -> Symbol (reverse symbol) : tokenize cs
'\r' -> Symbol (reverse symbol) : tokenize cs
'\n' -> Symbol (reverse symbol) : tokenize cs
_ -> tokenizeSymbol cs (c:symbol)
tokenizeSymbol [] symbol = [Symbol (reverse symbol)]

dropComment ('\n':cs) = cs
dropComment (_:cs) = dropComment cs
dropComment [] = []

type Bits = [Bool]
data Pattern = Literal Bits | Binding Bits String | Wildcard Bits
data Expr = LiteralBits Bits | Concat Expr Expr | Bound Int | Call String [Expr]
data Definition = Def [Pattern] Expr

data Signature = Sig String [Pattern] [Token]

parseSigs :: [Token] -> [Signature]
parseSigs (Symbol s:tokens) = parseSig s [] tokens []
parseSigs [] = []
parseSigs _ = error "name expected to start definition"

parseSig name patterns (Equal:tokens) [] =
collectBody name (reverse patterns) tokens []
parseSig name patterns (Equal:tokens) bits =
collectBody name (reverse (Literal (reverse bits) : patterns)) tokens []
parseSig name patterns (Zero:tokens) bits =
parseSig name patterns tokens (False:bits)
parseSig name patterns (One:tokens) bits =
parseSig name patterns tokens (True:bits)
parseSig name patterns (Nil:tokens) bits =
parseSig name (Literal (reverse bits) : patterns) tokens []
parseSig name patterns (Symbol binding:tokens) bits =
parseSig name (Binding (reverse bits) binding : patterns) tokens []
parseSig name patterns (Dot:tokens) bits =
parseSig name (Wildcard (reverse bits) : patterns) tokens []

collectBody name patterns (Dot:tokens) body =
Sig name patterns (reverse body) : parseSigs tokens
collectBody name patterns (token:tokens) body =
collectBody name patterns tokens (token:body)
collectBody name patterns [] body = error ("unexpected end of body of "++name)

groupDefs [] defs = Data.Map.map reverse defs
groupDefs (sig@(Sig name patterns _):sigs) defs =
case findWithDefault [] name defs of
[] -> groupDefs sigs (insert name [sig] defs)
siglist@(Sig _ otherPatterns _:_) ->
if length patterns /= length otherPatterns then
error ("arity mismatch:"++name)
else
groupDefs sigs (insert name (sig:siglist) defs)

parseDefs defs = Data.Map.map (map (parseDef arities)) defs
where
arities = Data.Map.map (\ (Sig _ patterns _:_) -> length patterns) defs

parseDef arities (Sig _ patterns body) =
case body of
[] -> Def patterns (LiteralBits [])
_ -> Def patterns
(foldr1 (\a b -> Concat a b) (parseExprs arities bindings body))
where
bindings = map (\ (Binding _ name) -> name)
(filter (\x -> case x of Binding _ _ -> True; _ -> False) patterns)

parseExprs arities bindings [] = []
parseExprs arities bindings tokens@(Zero:_) =
parseLiteral arities bindings tokens []
parseExprs arities bindings tokens@(One:_) =
parseLiteral arities bindings tokens []
parseExprs arities bindings tokens@(Nil:_) =
parseLiteral arities bindings tokens []
parseExprs arities bindings (Symbol name:tokens) =
case elemIndex name bindings of
Just i -> Bound i : parseExprs arities bindings tokens
Nothing ->
let (args,exprs) = splitAt (arities ! name) (parseExprs arities bindings tokens)
in Call name args : exprs

parseLiteral arities bindings (Zero:tokens) literal =
parseLiteral arities bindings tokens (False:literal)
parseLiteral arities bindings (One:tokens) literal =
parseLiteral arities bindings tokens (True:literal)
parseLiteral arities bindings [] literal = [LiteralBits (reverse literal)]
parseLiteral arities bindings [Nil] literal = [LiteralBits (reverse literal)]
parseLiteral arities bindings (Nil:tokens) literal =
LiteralBits (reverse literal) : parseExprs arities bindings tokens
parseLiteral arities bindings tokens literal =
LiteralBits (reverse literal) : parseExprs arities bindings tokens

parseBodies :: [Signature] -> Map String [Definition]
parseBodies sigs = parseDefs (groupDefs sigs empty)

arity :: Map String [Definition] -> String -> Int
arity defs fn = case defs ! fn of (Def patterns _):_ -> length patterns

apply :: Map String [Definition] -> String -> [Bits] -> Bits
apply defs fn args =
let evalMatching Nothing (Def patterns expr) =
case bind patterns args [] of
Just bindings -> Just (eval defs bindings expr)
_ -> Nothing
evalMatching result _ = result
in case foldl evalMatching Nothing (defs ! fn) of
Just bits -> bits
_ -> error ("No matching pattern for "++fn)

bind :: [Pattern] -> [Bits] -> [Bits] -> Maybe [Bits]
bind (pattern:patterns) (arg:args) acc =
case pattern of
Literal bits ->
case matchArg bits arg of
Just [] -> bind patterns args acc
_ -> Nothing
Binding bits _ ->
case matchArg bits arg of
Just binding -> bind patterns args (binding:acc)
_ -> Nothing
Wildcard bits ->
case matchArg bits arg of
Just _ -> bind patterns args acc
_ -> Nothing
bind [] [] acc = Just (reverse acc)

matchArg (bit:bits) (argBit:argBits)
| bit == argBit = matchArg bits argBits
| otherwise = Nothing
matchArg [] arg = Just arg
matchArg _ [] = Nothing

eval :: Map String [Definition] -> [Bits] -> Expr -> Bits
eval _ _ (LiteralBits bits) = bits
eval defs bindings (Concat head tail) =
eval defs bindings head ++ eval defs bindings tail
eval _ bindings (Bound i) = bindings !! i
eval defs bindings (Call fn args) =
apply defs fn (map (eval defs bindings) args)

main :: IO ()
main = do
stdinText <- getContents
interpArgs <- getArgs
let (files,fn,argFiles) = parseArgs interpArgs []
stdinBits = textToBits stdinText
sigs <- readSources files []
args <- readArgs stdinBits argFiles []
let defs = parseBodies sigs
let nils = []:nils
let result = apply defs fn (take (arity defs fn) (args ++ stdinBits:nils))
putStr (bitsToStr result)

parseArgs ("-":fn:args) files = (reverse files,fn,args)
parseArgs ["-"] files = (reverse files,defaultFn files,[])
parseArgs (file:args) files = parseArgs args (file:files)
parseArgs [] files = (reverse files,defaultFn files,[])

defaultFn (fn:_) =
(fst . (break (=='.')) . reverse . fst . (break (=='/')) . reverse) fn

readSources [] sigs = return sigs
readSources (file:files) sigs = do
text <- readFile file
let s = sigs ++ ((parseSigs . tokenize) text)
readSources files s

readArgs _ [] args = return (reverse args)
readArgs stdinBits ("-":files) args =
readArgs stdinBits files (stdinBits:args)
readArgs stdinBits (file:files) args = do
text <- readFile file
readArgs stdinBits files (textToBits text:args)

textToBits text = foldl (++) [] (map charToBits text) where
charToBits char = map (testBit (ord char)) [7,6..0]

bitsToStr [] = []
bitsToStr bits =
let (byte,rest) = splitAt 8 bits
bitToInt flag index = if flag then bit index else 0
bitsToChar bs = chr (foldl (+) 0 (zipWith bitToInt bs [7,6..0]))
in
bitsToChar byte : bitsToStr rest

No comments:

Post a Comment