Benchmarks Game/Parallel/RegexDNA
From HaskellWiki
m |
m (Shootout/Parallel/RegexDNA moved to Benchmarks Game/Parallel/RegexDNA: The name of the benchmarks site has changed) |
||
| (2 intermediate revisions not shown.) | |||
| Line 57: | Line 57: | ||
substCh 'Y' = "(c|t)" | substCh 'Y' = "(c|t)" | ||
substCh etc = [etc] | substCh etc = [etc] | ||
| + | </haskell> | ||
| + | |||
| + | The changes should be directly applicable to the non-PCRE version as well. Just change the import to use the Posix library. Here are the parallelizing changes: | ||
| + | |||
| + | <haskell> | ||
| + | --- regexdna2.hs 2008-09-22 07:56:49.000000000 -1000 | ||
| + | +++ regexdna3.hs 2008-09-21 07:50:32.000000000 -1000 | ||
| + | @@ -2,6 +2,7 @@ | ||
| + | -- http://shootout.alioth.debian.org/ | ||
| + | -- Contributed by: Sergei Matusevich 2007 | ||
| + | |||
| + | +import Control.Parallel.Strategies | ||
| + | import List | ||
| + | import Text.Regex.PCRE -- requires regex-pcre-builtin | ||
| + | import qualified Data.ByteString.Char8 as B | ||
| + | @@ -20,7 +21,8 @@ | ||
| + | main = do | ||
| + | file <- B.getContents | ||
| + | let [s1,s2,s3] = map (B.concat . tail) $ groupBy notHeader $ B.split '\n' file | ||
| + | - mapM_ (printVars s2 s3) variants | ||
| + | + showVars r = r ++ ' ' : show ((s2 =~ r :: Int) + (s3 =~ r :: Int)) | ||
| + | + mapM_ putStrLn $ parMap rnf showVars variants | ||
| + | putChar '\n' | ||
| + | print (B.length file) | ||
| + | print (B.length s1 + B.length s2 + B.length s3) | ||
| + | @@ -38,8 +40,4 @@ | ||
| + | substCh 'W' = "(a|t)" | ||
| + | substCh 'Y' = "(c|t)" | ||
| + | substCh etc = [etc] | ||
| + | - printVars s2 s3 r = do | ||
| + | - putStr r | ||
| + | - putChar ' ' | ||
| + | - print ((s2 =~ r :: Int) + (s3 =~ r :: Int)) | ||
| + | |||
| + | </haskell> | ||
| + | |||
| + | == I submitted this == | ||
| + | |||
| + | by Stephen Blackheath | ||
| + | |||
| + | Not pretty, but fast and legal, and by no means the final word on the matter! | ||
| + | |||
| + | <haskell> | ||
| + | {-# LANGUAGE ForeignFunctionInterface, BangPatterns #-} | ||
| + | {-# OPTIONS_GHC -O2 #-} | ||
| + | |||
| + | -- The Computer Language Benchmarks Game | ||
| + | -- http://shootout.alioth.debian.org/ | ||
| + | -- | ||
| + | -- contributed by Sergei Matusevich 2007 | ||
| + | -- modified by Tim Newsham | ||
| + | -- modified by Stephen Blackheath 2009, v1.0 | ||
| + | |||
| + | ----------------------------------------- | ||
| + | -- Requires the regex-pcre cabal package. | ||
| + | -- | ||
| + | -- Instructions for installing it, assuming ghc version 6.10.1 already | ||
| + | -- installed, tested on Ubuntu 8.10: | ||
| + | -- | ||
| + | -- 1. Make sure these OS packages are installed: | ||
| + | -- libgmp3-dev | ||
| + | -- zlib1g-dev | ||
| + | -- libcurl4-gnutls-dev | ||
| + | -- libpcre3-dev | ||
| + | -- | ||
| + | -- 2. get cabal-install package from | ||
| + | -- http://hackage.haskell.org/packages/archive/cabal-install/0.6.0/cabal-install-0.6.0.tar.gz | ||
| + | -- | ||
| + | -- 3. Unpack cabal-install, cd into directory, and type | ||
| + | -- sudo sh ./bootstap.sh | ||
| + | -- | ||
| + | -- 4. Type | ||
| + | -- sudo $HOME/.cabal/bin/cabal update | ||
| + | -- sudo $HOME/.cabal/bin/cabal install regex-pcre | ||
| + | |||
| + | ----------------------------------------- | ||
| + | -- Compile command: ghc -o regex regex.hs -threaded --make | ||
| + | -- Run command: ./regex +RTS -N4 (quad core) | ||
| + | -- ./regex (single core) | ||
| + | |||
| + | import Control.Concurrent | ||
| + | import Control.Parallel | ||
| + | import Control.Parallel.Strategies | ||
| + | import Control.Monad | ||
| + | import Foreign.Storable | ||
| + | import Foreign.Marshal.Alloc | ||
| + | import Foreign.Marshal.Array | ||
| + | import Foreign.Ptr | ||
| + | import Foreign.ForeignPtr | ||
| + | import Foreign.C.Types | ||
| + | import Text.Regex.PCRE -- requires haskell-regex-pcre-builtin | ||
| + | import qualified Data.ByteString as B | ||
| + | import qualified Data.ByteString.Internal as BI | ||
| + | import System.IO.Unsafe | ||
| + | import Data.Array | ||
| + | import Debug.Trace | ||
| + | import Data.List | ||
| + | import Data.Word | ||
| + | |||
| + | variants = [ | ||
| + | "agggtaaa|tttaccct", | ||
| + | "[cgt]gggtaaa|tttaccc[acg]", | ||
| + | "a[act]ggtaaa|tttacc[agt]t", | ||
| + | "ag[act]gtaaa|tttac[agt]ct", | ||
| + | "agg[act]taaa|ttta[agt]cct", | ||
| + | "aggg[acg]aaa|ttt[cgt]ccct", | ||
| + | "agggt[cgt]aa|tt[acg]accct", | ||
| + | "agggta[cgt]a|t[acg]taccct", | ||
| + | "agggtaa[cgt]|[acg]ttaccct" ] | ||
| + | |||
| + | subs = [ | ||
| + | ("B", "(c|g|t)"), | ||
| + | ("D", "(a|g|t)"), | ||
| + | ("H", "(a|c|t)"), | ||
| + | ("K", "(g|t)"), | ||
| + | ("M", "(a|c)"), | ||
| + | ("N", "(a|c|g|t)"), | ||
| + | ("R", "(a|g)"), | ||
| + | ("S", "(c|g)"), | ||
| + | ("V", "(a|c|g)"), | ||
| + | ("W", "(a|t)"), | ||
| + | ("Y", "(c|t)")] | ||
| + | |||
| + | main = do | ||
| + | file <- B.getContents | ||
| + | let [s1,s2,s3] = map (B.concat . tail) $ | ||
| + | groupBy notHeader $ B.split (BI.c2w '\n') file | ||
| + | showVars r = r ++ ' ' : show ((s2 =~ r :: Int) + (s3 =~ r :: Int)) | ||
| + | results = map showVars variants ++ [ | ||
| + | "", | ||
| + | show $ B.length file, | ||
| + | show $ B.length s1 + B.length s2 + B.length s3] | ||
| + | r2 = flip map (zip [1..] results) $ \(idx, a) -> | ||
| + | trace ("start "++show idx) () `seq` (a `using` rnf) `seq` trace ("end "++show idx) () `seq` a | ||
| + | -- Ensure that the data blocks are fully evaluated before we start | ||
| + | -- executing things in parallel, since they all depend on them | ||
| + | return $! (s1 `par` s2 `par` s3) `seq` s1 `seq` s2 `seq` s3 | ||
| + | mapM_ putStrLn $ parList rnf results `seq` results | ||
| + | |||
| + | let chunks = fragment 5000 s2 -- break into chunks to parallelize, which | ||
| + | -- is possible as our regexes are 1 char long | ||
| + | chunks' <- parallel $ map substituteAll chunks -- do regex substitutions | ||
| + | print $ B.length s1 + B.length s3 + B.length (B.concat chunks') | ||
| + | where notHeader _ s = B.null s || B.head s /= (BI.c2w '>') | ||
| + | |||
| + | -- Drop in replacement for sequence | ||
| + | parallel :: [IO a] -> IO [a] | ||
| + | parallel actions = do | ||
| + | vars <- forM actions $ \action -> do | ||
| + | var <- newEmptyMVar | ||
| + | forkIO $ do | ||
| + | answer <- action | ||
| + | return $! answer | ||
| + | putMVar var answer | ||
| + | return var | ||
| + | forM vars takeMVar | ||
| + | |||
| + | fragment :: Int -> B.ByteString -> [B.ByteString] | ||
| + | fragment chunkSize bs = | ||
| + | let (start, rem) = B.splitAt chunkSize bs | ||
| + | in if B.null rem | ||
| + | then [start] | ||
| + | else start : fragment chunkSize rem | ||
| + | |||
| + | -- Precompile regexes | ||
| + | subRegexes :: [(Regex, B.ByteString)] | ||
| + | subRegexes = flip map subs $ \(pattern, sub) -> | ||
| + | (makeRegex pattern :: Regex, B.pack (map BI.c2w sub)) | ||
| + | |||
| + | substituteAll :: B.ByteString -> IO B.ByteString | ||
| + | substituteAll !txt = do | ||
| + | let BI.PS srcFP srcOff srcLen = txt | ||
| + | withForeignPtr srcFP $ \src0 -> do | ||
| + | let src = src0 `plusPtr` srcOff | ||
| + | -- Capacity of -1 guarantees that a new buffer will be allocated | ||
| + | (dst, dstLen, _) <- foldM substitute_ (src, srcLen, -1) subRegexes | ||
| + | dstFP <- newForeignPtr finalizerFree dst | ||
| + | return $ BI.PS dstFP 0 dstLen | ||
| + | |||
| + | foreign import ccall unsafe "string.h memmove" memmove | ||
| + | :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8) | ||
| + | |||
| + | -- A function that does nothing | ||
| + | foreign import ccall unsafe "static unistd.h &getpid" c_null_finalizer | ||
| + | :: FunPtr (Ptr Word8 -> IO ()) | ||
| + | |||
| + | -- Do a single regex substitution, returning the modified string | ||
| + | substitute_ :: (Ptr Word8, Int, Int) -> (Regex, B.ByteString) -> IO (Ptr Word8, Int, Int) | ||
| + | substitute_ (src, srcLen, srcCap) (regex, sub) = do | ||
| + | -- Make a new string given the input string, regex match positions, and | ||
| + | -- string to substitute | ||
| + | -- Turn the source buffer into a byte string to pass to 'match' | ||
| + | srcFP <- newForeignPtr c_null_finalizer src | ||
| + | let srcBS = BI.PS srcFP 0 srcLen | ||
| + | am :: Array Int (MatchOffset, MatchLength) | ||
| + | AllMatches am = match regex srcBS | ||
| + | (start, end) = bounds am | ||
| + | matches = end + 1 | ||
| + | BI.PS subFP subOff subLen = sub | ||
| + | newLength = srcLen + matches * (subLen - 1) | ||
| + | withForeignPtr subFP $ \sub0 -> do | ||
| + | let sub = sub0 `plusPtr` subOff | ||
| + | (dst, dstCap) <- | ||
| + | if newLength > srcCap | ||
| + | then do | ||
| + | let dstCap = if srcCap < 0 | ||
| + | then srcLen * 2 | ||
| + | else srcCap * 2 | ||
| + | dst <- mallocBytes dstCap | ||
| + | return (dst, dstCap) | ||
| + | else | ||
| + | return (src, srcCap) | ||
| + | |||
| + | let copy :: Int -> Int -> Int -> IO () | ||
| + | copy idx sOff dOff | idx < 0 = do | ||
| + | let chunkLen = sOff | ||
| + | when (dst /= src) $ | ||
| + | BI.memcpy dst src (fromIntegral chunkLen) | ||
| + | |||
| + | copy idx sOff dOff = do | ||
| + | let (matchOff, _) = am ! idx | ||
| + | sOff' = matchOff + 1 | ||
| + | chunkLen = sOff - sOff' | ||
| + | dOff' = dOff - chunkLen | ||
| + | memmove (dst `plusPtr` dOff') (src `plusPtr` sOff') | ||
| + | (fromIntegral chunkLen) | ||
| + | let dOff'' = dOff' - subLen | ||
| + | sOff'' = sOff' - 1 | ||
| + | BI.memcpy (dst `plusPtr` dOff'') sub | ||
| + | (fromIntegral subLen) | ||
| + | copy (idx-1) sOff'' dOff'' | ||
| + | |||
| + | copy end srcLen newLength | ||
| + | when (dst /= src && srcCap >= 0) $ free src | ||
| + | return (dst, newLength, dstCap) | ||
</haskell> | </haskell> | ||
Current revision
1 Regex-DNA
Old submission is:
Running:
- ghc --make -O2 -fglasgow-exts -package regex-posix -optc-O3 -threaded regexdna3.hs
- ./regexdna3 +RTS -N2 < big.txt
Code:
This is almost identical to the old code, with a parallel map used to perform the first phase of the benchmark (counting matches of each variant). I had trouble compiling the original with Text.Regex.Posix so I used Text.Regex.PCRE (which is probably faster, are hackage packages fair game?). I see a speedup from 46 seconds to 33 seconds when running this case -N2 vs the original case (with PCRE) unthreaded.
I see some weirdness in the shootout page's numbers. They list haskell running at 70 seconds (with Posix regex) and Python running as 25 seconds. I have a comparable system (roughly), and when I run the Python test case I get 46 seconds (nearly identical to the Haskell PCRE case). This probably means their machine has better memory bandwidth and that moving to PCRE is a big win (I believe python uses PCRE), but I'm not sure.
-- The Computer Language Benchmarks Game -- http://shootout.alioth.debian.org/ -- Contributed by: Sergei Matusevich 2007 import Control.Parallel.Strategies import List import Text.Regex.PCRE -- requires regex-pcre-builtin import qualified Data.ByteString.Char8 as B variants = [ "agggtaaa|tttaccct", "[cgt]gggtaaa|tttaccc[acg]", "a[act]ggtaaa|tttacc[agt]t", "ag[act]gtaaa|tttac[agt]ct", "agg[act]taaa|ttta[agt]cct", "aggg[acg]aaa|ttt[cgt]ccct", "agggt[cgt]aa|tt[acg]accct", "agggta[cgt]a|t[acg]taccct", "agggtaa[cgt]|[acg]ttaccct" ] main = do file <- B.getContents let [s1,s2,s3] = map (B.concat . tail) $ groupBy notHeader $ B.split '\n' file showVars r = r ++ ' ' : show ((s2 =~ r :: Int) + (s3 =~ r :: Int)) mapM_ putStrLn $ parMap rnf showVars variants putChar '\n' print (B.length file) print (B.length s1 + B.length s2 + B.length s3) print (B.length s1 + B.length s3 + length (B.unpack s2 >>= substCh)) where notHeader _ s = B.null s || B.head s /= '>' substCh 'B' = "(c|g|t)" substCh 'D' = "(a|g|t)" substCh 'H' = "(a|c|t)" substCh 'K' = "(g|t)" substCh 'M' = "(a|c)" substCh 'N' = "(a|c|g|t)" substCh 'R' = "(a|g)" substCh 'S' = "(c|g)" substCh 'V' = "(a|c|g)" substCh 'W' = "(a|t)" substCh 'Y' = "(c|t)" substCh etc = [etc]
The changes should be directly applicable to the non-PCRE version as well. Just change the import to use the Posix library. Here are the parallelizing changes:
--- regexdna2.hs 2008-09-22 07:56:49.000000000 -1000 +++ regexdna3.hs 2008-09-21 07:50:32.000000000 -1000 @@ -2,6 +2,7 @@ -- http://shootout.alioth.debian.org/ -- Contributed by: Sergei Matusevich 2007 +import Control.Parallel.Strategies import List import Text.Regex.PCRE -- requires regex-pcre-builtin import qualified Data.ByteString.Char8 as B @@ -20,7 +21,8 @@ main = do file <- B.getContents let [s1,s2,s3] = map (B.concat . tail) $ groupBy notHeader $ B.split '\n' file - mapM_ (printVars s2 s3) variants + showVars r = r ++ ' ' : show ((s2 =~ r :: Int) + (s3 =~ r :: Int)) + mapM_ putStrLn $ parMap rnf showVars variants putChar '\n' print (B.length file) print (B.length s1 + B.length s2 + B.length s3) @@ -38,8 +40,4 @@ substCh 'W' = "(a|t)" substCh 'Y' = "(c|t)" substCh etc = [etc] - printVars s2 s3 r = do - putStr r - putChar ' ' - print ((s2 =~ r :: Int) + (s3 =~ r :: Int))
2 I submitted this
by Stephen Blackheath
Not pretty, but fast and legal, and by no means the final word on the matter!
{-# LANGUAGE ForeignFunctionInterface, BangPatterns #-} {-# OPTIONS_GHC -O2 #-} -- The Computer Language Benchmarks Game -- http://shootout.alioth.debian.org/ -- -- contributed by Sergei Matusevich 2007 -- modified by Tim Newsham -- modified by Stephen Blackheath 2009, v1.0 ----------------------------------------- -- Requires the regex-pcre cabal package. -- -- Instructions for installing it, assuming ghc version 6.10.1 already -- installed, tested on Ubuntu 8.10: -- -- 1. Make sure these OS packages are installed: -- libgmp3-dev -- zlib1g-dev -- libcurl4-gnutls-dev -- libpcre3-dev -- -- 2. get cabal-install package from -- http://hackage.haskell.org/packages/archive/cabal-install/0.6.0/cabal-install-0.6.0.tar.gz -- -- 3. Unpack cabal-install, cd into directory, and type -- sudo sh ./bootstap.sh -- -- 4. Type -- sudo $HOME/.cabal/bin/cabal update -- sudo $HOME/.cabal/bin/cabal install regex-pcre ----------------------------------------- -- Compile command: ghc -o regex regex.hs -threaded --make -- Run command: ./regex +RTS -N4 (quad core) -- ./regex (single core) import Control.Concurrent import Control.Parallel import Control.Parallel.Strategies import Control.Monad import Foreign.Storable import Foreign.Marshal.Alloc import Foreign.Marshal.Array import Foreign.Ptr import Foreign.ForeignPtr import Foreign.C.Types import Text.Regex.PCRE -- requires haskell-regex-pcre-builtin import qualified Data.ByteString as B import qualified Data.ByteString.Internal as BI import System.IO.Unsafe import Data.Array import Debug.Trace import Data.List import Data.Word variants = [ "agggtaaa|tttaccct", "[cgt]gggtaaa|tttaccc[acg]", "a[act]ggtaaa|tttacc[agt]t", "ag[act]gtaaa|tttac[agt]ct", "agg[act]taaa|ttta[agt]cct", "aggg[acg]aaa|ttt[cgt]ccct", "agggt[cgt]aa|tt[acg]accct", "agggta[cgt]a|t[acg]taccct", "agggtaa[cgt]|[acg]ttaccct" ] subs = [ ("B", "(c|g|t)"), ("D", "(a|g|t)"), ("H", "(a|c|t)"), ("K", "(g|t)"), ("M", "(a|c)"), ("N", "(a|c|g|t)"), ("R", "(a|g)"), ("S", "(c|g)"), ("V", "(a|c|g)"), ("W", "(a|t)"), ("Y", "(c|t)")] main = do file <- B.getContents let [s1,s2,s3] = map (B.concat . tail) $ groupBy notHeader $ B.split (BI.c2w '\n') file showVars r = r ++ ' ' : show ((s2 =~ r :: Int) + (s3 =~ r :: Int)) results = map showVars variants ++ [ "", show $ B.length file, show $ B.length s1 + B.length s2 + B.length s3] r2 = flip map (zip [1..] results) $ \(idx, a) -> trace ("start "++show idx) () `seq` (a `using` rnf) `seq` trace ("end "++show idx) () `seq` a -- Ensure that the data blocks are fully evaluated before we start -- executing things in parallel, since they all depend on them return $! (s1 `par` s2 `par` s3) `seq` s1 `seq` s2 `seq` s3 mapM_ putStrLn $ parList rnf results `seq` results let chunks = fragment 5000 s2 -- break into chunks to parallelize, which -- is possible as our regexes are 1 char long chunks' <- parallel $ map substituteAll chunks -- do regex substitutions print $ B.length s1 + B.length s3 + B.length (B.concat chunks') where notHeader _ s = B.null s || B.head s /= (BI.c2w '>') -- Drop in replacement for sequence parallel :: [IO a] -> IO [a] parallel actions = do vars <- forM actions $ \action -> do var <- newEmptyMVar forkIO $ do answer <- action return $! answer putMVar var answer return var forM vars takeMVar fragment :: Int -> B.ByteString -> [B.ByteString] fragment chunkSize bs = let (start, rem) = B.splitAt chunkSize bs in if B.null rem then [start] else start : fragment chunkSize rem -- Precompile regexes subRegexes :: [(Regex, B.ByteString)] subRegexes = flip map subs $ \(pattern, sub) -> (makeRegex pattern :: Regex, B.pack (map BI.c2w sub)) substituteAll :: B.ByteString -> IO B.ByteString substituteAll !txt = do let BI.PS srcFP srcOff srcLen = txt withForeignPtr srcFP $ \src0 -> do let src = src0 `plusPtr` srcOff -- Capacity of -1 guarantees that a new buffer will be allocated (dst, dstLen, _) <- foldM substitute_ (src, srcLen, -1) subRegexes dstFP <- newForeignPtr finalizerFree dst return $ BI.PS dstFP 0 dstLen foreign import ccall unsafe "string.h memmove" memmove :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8) -- A function that does nothing foreign import ccall unsafe "static unistd.h &getpid" c_null_finalizer :: FunPtr (Ptr Word8 -> IO ()) -- Do a single regex substitution, returning the modified string substitute_ :: (Ptr Word8, Int, Int) -> (Regex, B.ByteString) -> IO (Ptr Word8, Int, Int) substitute_ (src, srcLen, srcCap) (regex, sub) = do -- Make a new string given the input string, regex match positions, and -- string to substitute -- Turn the source buffer into a byte string to pass to 'match' srcFP <- newForeignPtr c_null_finalizer src let srcBS = BI.PS srcFP 0 srcLen am :: Array Int (MatchOffset, MatchLength) AllMatches am = match regex srcBS (start, end) = bounds am matches = end + 1 BI.PS subFP subOff subLen = sub newLength = srcLen + matches * (subLen - 1) withForeignPtr subFP $ \sub0 -> do let sub = sub0 `plusPtr` subOff (dst, dstCap) <- if newLength > srcCap then do let dstCap = if srcCap < 0 then srcLen * 2 else srcCap * 2 dst <- mallocBytes dstCap return (dst, dstCap) else return (src, srcCap) let copy :: Int -> Int -> Int -> IO () copy idx sOff dOff | idx < 0 = do let chunkLen = sOff when (dst /= src) $ BI.memcpy dst src (fromIntegral chunkLen) copy idx sOff dOff = do let (matchOff, _) = am ! idx sOff' = matchOff + 1 chunkLen = sOff - sOff' dOff' = dOff - chunkLen memmove (dst `plusPtr` dOff') (src `plusPtr` sOff') (fromIntegral chunkLen) let dOff'' = dOff' - subLen sOff'' = sOff' - 1 BI.memcpy (dst `plusPtr` dOff'') sub (fromIntegral subLen) copy (idx-1) sOff'' dOff'' copy end srcLen newLength when (dst /= src && srcCap >= 0) $ free src return (dst, newLength, dstCap)
