[Shootout-list] Faster wordfreq for ghc

Tomasz Zielonka t.zielonka@students.mimuw.edu.pl
Thu, 30 Sep 2004 10:11:55 +0200


--3MwIy2ne0vdjdPXF
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Hello!

I have written a faster and more memory efficient ghc solution for the
wordfreq benchmark. It was possible to futher micro-optimize it by
using unsafe array operations, but I thought it wasn't worth the hassle.

Best regards,
Tom

-- 
.signature: Too many levels of symbolic links

--3MwIy2ne0vdjdPXF
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="wordfreq.hs"


-- Implementation of 'wordfreq' benchmark optimised for time and memory
-- Author: Tomasz Zielonka <t.zielonka@students.mimuw.edu.pl>
-- compile with: ghc -O2 -o wc wc.hs

import System.IO
import Data.Array.IO
import Data.Array.Base
import Data.Word
import Data.Int
import Data.HashTable as HashTable
import Data.IORef
import List
import Char

myHashString :: String -> Int32
myHashString = fromIntegral . foldr f 0
  where f c m = ord c + (m * 67)

main :: IO ()
main = do
    ht <- HashTable.new (==) myHashString

    let handleReversedWord rw = do
            l1 <- HashTable.lookup ht rw
            case l1 of
                Just ref -> incr ref
                Nothing -> do
                    ref <- newIORef (1 :: Int)
                    HashTable.insert ht rw ref
    
    iterateOnReversedWords stdin handleReversedWord

    l <- HashTable.toList ht
    l <- mapM (\(rw, ref) -> do n <- readIORef ref; return (n, reverse rw)) l

    mapM_ (putStrLn . pretty) (sortBy (flip compare) l)

  where
    pretty (n,w) = pad 7 (show n) ++ " " ++ w
    pad n s = replicate (n - length s) ' ' ++ s
    incr ref = do
        x <- readIORef ref
        writeIORef ref $! (x + 1)

------------------------------------------------------------------

iterateOnReversedWords :: Handle -> (String -> IO ()) -> IO ()
iterateOnReversedWords h f = do
    cs <- fileIterate h "" aux
    aux cs ' '
    return ()
  where
    aux cs c
        | isAlpha c = do 
            let c' = toLower c
            return (c' : cs)
        | not (null cs) = do
            f cs
            return []
        | otherwise = return []

{-# INLINE fileIterate #-}

fileIterate :: Handle -> a -> (a -> Char -> IO a) -> IO a
fileIterate h a0 f = do
    buf <- newArray_ (0, bufSize - 1) :: IO (IOUArray Int Word8)
    let loop i n a
            | i == n =
                do  n' <- hGetArray h buf bufSize
                    if n' == 0
                        then return a
                        else loop 0 n' a
            | otherwise =
                do  c <- fmap (toEnum . fromEnum) (readArray buf i)

                    a' <- f a c

                    loop (i + 1) n a'
    loop 0 0 a0
  where
    bufSize :: Int
    bufSize = 4096


--3MwIy2ne0vdjdPXF--