[Shootout-list] Faster wordfreq for ghc
Tomasz Zielonka
t.zielonka@students.mimuw.edu.pl
Thu, 30 Sep 2004 10:11:55 +0200
--3MwIy2ne0vdjdPXF
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Hello!
I have written a faster and more memory efficient ghc solution for the
wordfreq benchmark. It was possible to futher micro-optimize it by
using unsafe array operations, but I thought it wasn't worth the hassle.
Best regards,
Tom
--
.signature: Too many levels of symbolic links
--3MwIy2ne0vdjdPXF
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="wordfreq.hs"
-- Implementation of 'wordfreq' benchmark optimised for time and memory
-- Author: Tomasz Zielonka <t.zielonka@students.mimuw.edu.pl>
-- compile with: ghc -O2 -o wc wc.hs
import System.IO
import Data.Array.IO
import Data.Array.Base
import Data.Word
import Data.Int
import Data.HashTable as HashTable
import Data.IORef
import List
import Char
myHashString :: String -> Int32
myHashString = fromIntegral . foldr f 0
where f c m = ord c + (m * 67)
main :: IO ()
main = do
ht <- HashTable.new (==) myHashString
let handleReversedWord rw = do
l1 <- HashTable.lookup ht rw
case l1 of
Just ref -> incr ref
Nothing -> do
ref <- newIORef (1 :: Int)
HashTable.insert ht rw ref
iterateOnReversedWords stdin handleReversedWord
l <- HashTable.toList ht
l <- mapM (\(rw, ref) -> do n <- readIORef ref; return (n, reverse rw)) l
mapM_ (putStrLn . pretty) (sortBy (flip compare) l)
where
pretty (n,w) = pad 7 (show n) ++ " " ++ w
pad n s = replicate (n - length s) ' ' ++ s
incr ref = do
x <- readIORef ref
writeIORef ref $! (x + 1)
------------------------------------------------------------------
iterateOnReversedWords :: Handle -> (String -> IO ()) -> IO ()
iterateOnReversedWords h f = do
cs <- fileIterate h "" aux
aux cs ' '
return ()
where
aux cs c
| isAlpha c = do
let c' = toLower c
return (c' : cs)
| not (null cs) = do
f cs
return []
| otherwise = return []
{-# INLINE fileIterate #-}
fileIterate :: Handle -> a -> (a -> Char -> IO a) -> IO a
fileIterate h a0 f = do
buf <- newArray_ (0, bufSize - 1) :: IO (IOUArray Int Word8)
let loop i n a
| i == n =
do n' <- hGetArray h buf bufSize
if n' == 0
then return a
else loop 0 n' a
| otherwise =
do c <- fmap (toEnum . fromEnum) (readArray buf i)
a' <- f a c
loop (i + 1) n a'
loop 0 0 a0
where
bufSize :: Int
bufSize = 4096
--3MwIy2ne0vdjdPXF--