[Shootout-list] Faster ghc implementation for wc benchmark

Tomasz Zielonka t.zielonka@students.mimuw.edu.pl
Wed, 29 Sep 2004 15:26:21 +0200


--k1lZvvs/B4yU6o8G
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Hello!

I have written much faster (but longer) GHC implementation for Count
Lines/Words/Chars. It's in the attachment.

Cheers,
Tom

-- 
.signature: Too many levels of symbolic links

--k1lZvvs/B4yU6o8G
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="wc.hs"


-- Implementation for 'wc' benchmark optimised for time and memory
-- Author: Tomasz Zielonka <t.zielonka@students.mimuw.edu.pl>
-- compile with: ghc -O2 -o wc wc.hs

import System.IO
import Data.Array.IO
import Data.Array.Base (unsafeRead)
import Data.Word
import List

isspace :: Char -> Bool
isspace ' '  = True
isspace '\n' = True
isspace '\t' = True
isspace  _   = False

wc :: Handle -> IO (Int, Int, Int)
wc h = do
    buf <- newArray_ (0, bufSize - 1) :: IO (IOUArray Int Word8)
    let
        wcLoop :: Int -> Int -> Int -> Int -> Int -> Int -> IO (Int, Int, Int)
        wcLoop prevIsSpace nl nw nc i n 
            | prevIsSpace `seq` nl `seq` nw `seq` nc `seq` i `seq` n `seq` False =
                undefined
            | i == n =
                do  n' <- hGetArray h buf bufSize
                    if n' == 0
                        then return (nl, nw, nc)
                        else wcLoop prevIsSpace nl nw nc 0 n'
            | otherwise =
                do  c <- fmap (toEnum . fromEnum) (unsafeRead buf i)
                    -- Watch out, unsafeRead used here. This gives about
                    -- 1.6x times speedup.

                    let cIsSpace = oneIf (isspace c)
                    wcLoop
                        cIsSpace
                        (incIf nl (c == '\n'))
                        (incIf nw (prevIsSpace > cIsSpace))
                        (nc + 1)
                        (i + 1)
                        n
    wcLoop 1 0 0 0 0 0
  where
    bufSize :: Int
    bufSize = 4096

    oneIf c = if c then 1 else 0
    incIf n c = if c then n + 1 else n

main :: IO ()
main = do
    (nl, nw, nc) <- wc stdin    
    putStrLn $ concat $ intersperse " " $ map show [nl, nw, nc]


--k1lZvvs/B4yU6o8G--