[Haskell] Re: performance tuning Data.FiniteMap

oleg at pobox.com oleg at pobox.com
Fri Feb 27 16:12:27 EST 2004


Hello!

	If indeed the read performance is at premium and updates are
infrequent, by bother with ternary etc. trees -- why not to use just a
single, one-level array. Given a reasonable hash function, the
retrieval performance is O(1). And still, no IO/ST are necessary.


{-# OPTIONS -fglasgow-exts #-}
module Foo where

import Data.Array
import Data.List

import Data.HashTable (hashString)
import Data.Int (Int32)

class Hashy a where
  hash:: a -> Int
  
data MyFM key val = MyFM { base::  Int
			 , purgatory:: [(key,val)]
			 , store:: Array Int [(key,val)]
			 } deriving Show
			 

empty = MyFM {base = 41, purgatory = [],
              store = listArray (0,base(empty)-1) $ repeat []}
	      
lkup fm key = case lookup key (purgatory fm) of
                 t@(Just _) -> t
		 _          -> lookup key item
   where item  = (store fm)! hashv
	 hashv = (hash key) `mod` (base fm)
	 
count = length . concat . elems . store

purgatory_limit = 10

ins fm key val 
     = rebuild_perhaps $ fm {purgatory = add_uniq (purgatory fm) key val}
  where
       rebuild_perhaps fm | length (purgatory fm) > purgatory_limit 
                              = rebuild fm
       rebuild_perhaps fm = fm
       
rebuild fm | 2*(count fm) > base fm = major_rebuild fm
rebuild fm = fm{purgatory = [], store = (store fm) // updates}
  where
    updates = map (retr . merge) $ groupBy gfirs $ 
              sortBy sfirs $ map (\p@(k,v) -> (hashk k,p)) $ purgatory fm
    hashk k = (hash k) `mod` (base fm)
    gfirs (k1,_) (k2,_) = k1 == k2
    sfirs (k1,_) (k2,_) = compare k1 k2
    merge x = (fst$ head x, map snd x)
    retr (h,v) = (h, unionBy gfirs v ((store fm)!h))

-- reallocate the hash table to the bigger size
major_rebuild fm = undefined -- exercise for the reader

-- add association (key,val) to the list, replacing an old association
-- with the same key, if any. At most one such association could have
-- existed
add_uniq [] key val = [(key,val)]
add_uniq ((hkey,_):t) key val | hkey == key = (key,val):t
add_uniq (h:t) key val = h: add_uniq t key val

instance Hashy String where
    hash = fromInteger . toInteger . hashString



test1 = foldl (\fm v -> ins fm v v) empty $ map (:[]) ['a'..'h']
test2 = foldl (\fm v -> ins fm v v) test1 $ map (:[]) ['a'..'o']
test3 = foldl (\fm v -> ins fm v v) test2 $ map (:[]) ['a'..'o']


More information about the Haskell mailing list