Difference between revisions of "Benchmarks Game/Parallel/RegexDNA"

From HaskellWiki
Jump to navigation Jump to search
m
m (Shootout/Parallel/RegexDNA moved to Benchmarks Game/Parallel/RegexDNA: The name of the benchmarks site has changed)
 
(3 intermediate revisions by 2 users not shown)
Line 5: Line 5:
   
 
Running:
 
Running:
* ghc --make -O2 -fglasgow-exts -package regex-posix -optc-O3 -threaded regexdna3.hs
+
* ghc --make -O2 -fglasgow-exts -package regex-posix -optc-O3 -threaded regexdna3.hs
* ./regexdna3 +RTS -N2 < big.txt
+
* ./regexdna3 +RTS -N2 < big.txt
   
 
Code:
 
Code:
Line 57: Line 57:
 
substCh 'Y' = "(c|t)"
 
substCh 'Y' = "(c|t)"
 
substCh etc = [etc]
 
substCh etc = [etc]
  +
</haskell>
  +
  +
The changes should be directly applicable to the non-PCRE version as well. Just change the import to use the Posix library. Here are the parallelizing changes:
  +
  +
<haskell>
  +
--- regexdna2.hs 2008-09-22 07:56:49.000000000 -1000
  +
+++ regexdna3.hs 2008-09-21 07:50:32.000000000 -1000
  +
@@ -2,6 +2,7 @@
  +
-- http://shootout.alioth.debian.org/
  +
-- Contributed by: Sergei Matusevich 2007
  +
  +
+import Control.Parallel.Strategies
  +
import List
  +
import Text.Regex.PCRE -- requires regex-pcre-builtin
  +
import qualified Data.ByteString.Char8 as B
  +
@@ -20,7 +21,8 @@
  +
main = do
  +
file <- B.getContents
  +
let [s1,s2,s3] = map (B.concat . tail) $ groupBy notHeader $ B.split '\n' file
  +
- mapM_ (printVars s2 s3) variants
  +
+ showVars r = r ++ ' ' : show ((s2 =~ r :: Int) + (s3 =~ r :: Int))
  +
+ mapM_ putStrLn $ parMap rnf showVars variants
  +
putChar '\n'
  +
print (B.length file)
  +
print (B.length s1 + B.length s2 + B.length s3)
  +
@@ -38,8 +40,4 @@
  +
substCh 'W' = "(a|t)"
  +
substCh 'Y' = "(c|t)"
  +
substCh etc = [etc]
  +
- printVars s2 s3 r = do
  +
- putStr r
  +
- putChar ' '
  +
- print ((s2 =~ r :: Int) + (s3 =~ r :: Int))
  +
  +
</haskell>
  +
  +
== I submitted this ==
  +
  +
by Stephen Blackheath
  +
  +
Not pretty, but fast and legal, and by no means the final word on the matter!
  +
  +
<haskell>
  +
{-# LANGUAGE ForeignFunctionInterface, BangPatterns #-}
  +
{-# OPTIONS_GHC -O2 #-}
  +
  +
-- The Computer Language Benchmarks Game
  +
-- http://shootout.alioth.debian.org/
  +
--
  +
-- contributed by Sergei Matusevich 2007
  +
-- modified by Tim Newsham
  +
-- modified by Stephen Blackheath 2009, v1.0
  +
  +
-----------------------------------------
  +
-- Requires the regex-pcre cabal package.
  +
--
  +
-- Instructions for installing it, assuming ghc version 6.10.1 already
  +
-- installed, tested on Ubuntu 8.10:
  +
--
  +
-- 1. Make sure these OS packages are installed:
  +
-- libgmp3-dev
  +
-- zlib1g-dev
  +
-- libcurl4-gnutls-dev
  +
-- libpcre3-dev
  +
--
  +
-- 2. get cabal-install package from
  +
-- http://hackage.haskell.org/packages/archive/cabal-install/0.6.0/cabal-install-0.6.0.tar.gz
  +
--
  +
-- 3. Unpack cabal-install, cd into directory, and type
  +
-- sudo sh ./bootstap.sh
  +
--
  +
-- 4. Type
  +
-- sudo $HOME/.cabal/bin/cabal update
  +
-- sudo $HOME/.cabal/bin/cabal install regex-pcre
  +
  +
-----------------------------------------
  +
-- Compile command: ghc -o regex regex.hs -threaded --make
  +
-- Run command: ./regex +RTS -N4 (quad core)
  +
-- ./regex (single core)
  +
  +
import Control.Concurrent
  +
import Control.Parallel
  +
import Control.Parallel.Strategies
  +
import Control.Monad
  +
import Foreign.Storable
  +
import Foreign.Marshal.Alloc
  +
import Foreign.Marshal.Array
  +
import Foreign.Ptr
  +
import Foreign.ForeignPtr
  +
import Foreign.C.Types
  +
import Text.Regex.PCRE -- requires haskell-regex-pcre-builtin
  +
import qualified Data.ByteString as B
  +
import qualified Data.ByteString.Internal as BI
  +
import System.IO.Unsafe
  +
import Data.Array
  +
import Debug.Trace
  +
import Data.List
  +
import Data.Word
  +
  +
variants = [
  +
"agggtaaa|tttaccct",
  +
"[cgt]gggtaaa|tttaccc[acg]",
  +
"a[act]ggtaaa|tttacc[agt]t",
  +
"ag[act]gtaaa|tttac[agt]ct",
  +
"agg[act]taaa|ttta[agt]cct",
  +
"aggg[acg]aaa|ttt[cgt]ccct",
  +
"agggt[cgt]aa|tt[acg]accct",
  +
"agggta[cgt]a|t[acg]taccct",
  +
"agggtaa[cgt]|[acg]ttaccct" ]
  +
  +
subs = [
  +
("B", "(c|g|t)"),
  +
("D", "(a|g|t)"),
  +
("H", "(a|c|t)"),
  +
("K", "(g|t)"),
  +
("M", "(a|c)"),
  +
("N", "(a|c|g|t)"),
  +
("R", "(a|g)"),
  +
("S", "(c|g)"),
  +
("V", "(a|c|g)"),
  +
("W", "(a|t)"),
  +
("Y", "(c|t)")]
  +
  +
main = do
  +
file <- B.getContents
  +
let [s1,s2,s3] = map (B.concat . tail) $
  +
groupBy notHeader $ B.split (BI.c2w '\n') file
  +
showVars r = r ++ ' ' : show ((s2 =~ r :: Int) + (s3 =~ r :: Int))
  +
results = map showVars variants ++ [
  +
"",
  +
show $ B.length file,
  +
show $ B.length s1 + B.length s2 + B.length s3]
  +
r2 = flip map (zip [1..] results) $ \(idx, a) ->
  +
trace ("start "++show idx) () `seq` (a `using` rnf) `seq` trace ("end "++show idx) () `seq` a
  +
-- Ensure that the data blocks are fully evaluated before we start
  +
-- executing things in parallel, since they all depend on them
  +
return $! (s1 `par` s2 `par` s3) `seq` s1 `seq` s2 `seq` s3
  +
mapM_ putStrLn $ parList rnf results `seq` results
  +
  +
let chunks = fragment 5000 s2 -- break into chunks to parallelize, which
  +
-- is possible as our regexes are 1 char long
  +
chunks' <- parallel $ map substituteAll chunks -- do regex substitutions
  +
print $ B.length s1 + B.length s3 + B.length (B.concat chunks')
  +
where notHeader _ s = B.null s || B.head s /= (BI.c2w '>')
  +
  +
-- Drop in replacement for sequence
  +
parallel :: [IO a] -> IO [a]
  +
parallel actions = do
  +
vars <- forM actions $ \action -> do
  +
var <- newEmptyMVar
  +
forkIO $ do
  +
answer <- action
  +
return $! answer
  +
putMVar var answer
  +
return var
  +
forM vars takeMVar
  +
  +
fragment :: Int -> B.ByteString -> [B.ByteString]
  +
fragment chunkSize bs =
  +
let (start, rem) = B.splitAt chunkSize bs
  +
in if B.null rem
  +
then [start]
  +
else start : fragment chunkSize rem
  +
  +
-- Precompile regexes
  +
subRegexes :: [(Regex, B.ByteString)]
  +
subRegexes = flip map subs $ \(pattern, sub) ->
  +
(makeRegex pattern :: Regex, B.pack (map BI.c2w sub))
  +
  +
substituteAll :: B.ByteString -> IO B.ByteString
  +
substituteAll !txt = do
  +
let BI.PS srcFP srcOff srcLen = txt
  +
withForeignPtr srcFP $ \src0 -> do
  +
let src = src0 `plusPtr` srcOff
  +
-- Capacity of -1 guarantees that a new buffer will be allocated
  +
(dst, dstLen, _) <- foldM substitute_ (src, srcLen, -1) subRegexes
  +
dstFP <- newForeignPtr finalizerFree dst
  +
return $ BI.PS dstFP 0 dstLen
  +
  +
foreign import ccall unsafe "string.h memmove" memmove
  +
:: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8)
  +
  +
-- A function that does nothing
  +
foreign import ccall unsafe "static unistd.h &getpid" c_null_finalizer
  +
:: FunPtr (Ptr Word8 -> IO ())
  +
  +
-- Do a single regex substitution, returning the modified string
  +
substitute_ :: (Ptr Word8, Int, Int) -> (Regex, B.ByteString) -> IO (Ptr Word8, Int, Int)
  +
substitute_ (src, srcLen, srcCap) (regex, sub) = do
  +
-- Make a new string given the input string, regex match positions, and
  +
-- string to substitute
  +
-- Turn the source buffer into a byte string to pass to 'match'
  +
srcFP <- newForeignPtr c_null_finalizer src
  +
let srcBS = BI.PS srcFP 0 srcLen
  +
am :: Array Int (MatchOffset, MatchLength)
  +
AllMatches am = match regex srcBS
  +
(start, end) = bounds am
  +
matches = end + 1
  +
BI.PS subFP subOff subLen = sub
  +
newLength = srcLen + matches * (subLen - 1)
  +
withForeignPtr subFP $ \sub0 -> do
  +
let sub = sub0 `plusPtr` subOff
  +
(dst, dstCap) <-
  +
if newLength > srcCap
  +
then do
  +
let dstCap = if srcCap < 0
  +
then srcLen * 2
  +
else srcCap * 2
  +
dst <- mallocBytes dstCap
  +
return (dst, dstCap)
  +
else
  +
return (src, srcCap)
  +
  +
let copy :: Int -> Int -> Int -> IO ()
  +
copy idx sOff dOff | idx < 0 = do
  +
let chunkLen = sOff
  +
when (dst /= src) $
  +
BI.memcpy dst src (fromIntegral chunkLen)
  +
  +
copy idx sOff dOff = do
  +
let (matchOff, _) = am ! idx
  +
sOff' = matchOff + 1
  +
chunkLen = sOff - sOff'
  +
dOff' = dOff - chunkLen
  +
memmove (dst `plusPtr` dOff') (src `plusPtr` sOff')
  +
(fromIntegral chunkLen)
  +
let dOff'' = dOff' - subLen
  +
sOff'' = sOff' - 1
  +
BI.memcpy (dst `plusPtr` dOff'') sub
  +
(fromIntegral subLen)
  +
copy (idx-1) sOff'' dOff''
  +
  +
copy end srcLen newLength
  +
when (dst /= src && srcCap >= 0) $ free src
  +
return (dst, newLength, dstCap)
 
</haskell>
 
</haskell>

Latest revision as of 22:31, 22 January 2012

Regex-DNA

Old submission is:

Running:

  • ghc --make -O2 -fglasgow-exts -package regex-posix -optc-O3 -threaded regexdna3.hs
  • ./regexdna3 +RTS -N2 < big.txt

Code:

This is almost identical to the old code, with a parallel map used to perform the first phase of the benchmark (counting matches of each variant). I had trouble compiling the original with Text.Regex.Posix so I used Text.Regex.PCRE (which is probably faster, are hackage packages fair game?). I see a speedup from 46 seconds to 33 seconds when running this case -N2 vs the original case (with PCRE) unthreaded.

I see some weirdness in the shootout page's numbers. They list haskell running at 70 seconds (with Posix regex) and Python running as 25 seconds. I have a comparable system (roughly), and when I run the Python test case I get 46 seconds (nearly identical to the Haskell PCRE case). This probably means their machine has better memory bandwidth and that moving to PCRE is a big win (I believe python uses PCRE), but I'm not sure.

-- The Computer Language Benchmarks Game
-- http://shootout.alioth.debian.org/
-- Contributed by: Sergei Matusevich 2007

import Control.Parallel.Strategies
import List
import Text.Regex.PCRE -- requires regex-pcre-builtin
import qualified Data.ByteString.Char8 as B

variants = [
  "agggtaaa|tttaccct",
  "[cgt]gggtaaa|tttaccc[acg]",
  "a[act]ggtaaa|tttacc[agt]t",
  "ag[act]gtaaa|tttac[agt]ct",
  "agg[act]taaa|ttta[agt]cct",
  "aggg[acg]aaa|ttt[cgt]ccct",
  "agggt[cgt]aa|tt[acg]accct",
  "agggta[cgt]a|t[acg]taccct",
  "agggtaa[cgt]|[acg]ttaccct" ]

main = do
  file <- B.getContents
  let [s1,s2,s3] = map (B.concat . tail) $ groupBy notHeader $ B.split '\n' file
      showVars r = r ++ ' ' : show ((s2 =~ r :: Int) + (s3 =~ r :: Int))
  mapM_ putStrLn $ parMap rnf showVars  variants
  putChar '\n'
  print (B.length file)
  print (B.length s1 + B.length s2 + B.length s3)
  print (B.length s1 + B.length s3 + length (B.unpack s2 >>= substCh))
  where notHeader _ s = B.null s || B.head s /= '>'
        substCh 'B' = "(c|g|t)"
        substCh 'D' = "(a|g|t)"
        substCh 'H' = "(a|c|t)"
        substCh 'K' = "(g|t)"
        substCh 'M' = "(a|c)"
        substCh 'N' = "(a|c|g|t)"
        substCh 'R' = "(a|g)"
        substCh 'S' = "(c|g)"
        substCh 'V' = "(a|c|g)"
        substCh 'W' = "(a|t)"
        substCh 'Y' = "(c|t)"
        substCh etc = [etc]

The changes should be directly applicable to the non-PCRE version as well. Just change the import to use the Posix library. Here are the parallelizing changes:

--- regexdna2.hs        2008-09-22 07:56:49.000000000 -1000
+++ regexdna3.hs        2008-09-21 07:50:32.000000000 -1000
@@ -2,6 +2,7 @@
 -- http://shootout.alioth.debian.org/
 -- Contributed by: Sergei Matusevich 2007

+import Control.Parallel.Strategies
 import List
 import Text.Regex.PCRE -- requires regex-pcre-builtin
 import qualified Data.ByteString.Char8 as B
@@ -20,7 +21,8 @@
 main = do
   file <- B.getContents
   let [s1,s2,s3] = map (B.concat . tail) $ groupBy notHeader $ B.split '\n' file
-  mapM_ (printVars s2 s3) variants
+      showVars r = r ++ ' ' : show ((s2 =~ r :: Int) + (s3 =~ r :: Int))
+  mapM_ putStrLn $ parMap rnf showVars  variants
   putChar '\n'
   print (B.length file)
   print (B.length s1 + B.length s2 + B.length s3)
@@ -38,8 +40,4 @@
         substCh 'W' = "(a|t)"
         substCh 'Y' = "(c|t)"
         substCh etc = [etc]
-        printVars s2 s3 r = do
-                            putStr r
-                            putChar ' '
-                            print ((s2 =~ r :: Int) + (s3 =~ r :: Int))

I submitted this

by Stephen Blackheath

Not pretty, but fast and legal, and by no means the final word on the matter!

{-# LANGUAGE ForeignFunctionInterface, BangPatterns #-}
{-# OPTIONS_GHC -O2 #-}

-- The Computer Language Benchmarks Game
-- http://shootout.alioth.debian.org/
--
-- contributed by Sergei Matusevich 2007
-- modified by Tim Newsham
-- modified by Stephen Blackheath 2009, v1.0

-----------------------------------------
-- Requires the regex-pcre cabal package.
--
-- Instructions for installing it, assuming ghc version 6.10.1 already
--    installed, tested on Ubuntu 8.10:
--
-- 1. Make sure these OS packages are installed:
--   libgmp3-dev
--   zlib1g-dev
--   libcurl4-gnutls-dev
--   libpcre3-dev
--
-- 2. get cabal-install package from
--   http://hackage.haskell.org/packages/archive/cabal-install/0.6.0/cabal-install-0.6.0.tar.gz
--
-- 3. Unpack cabal-install, cd into directory, and type
--   sudo sh ./bootstap.sh
--
-- 4. Type
--   sudo $HOME/.cabal/bin/cabal update
--   sudo $HOME/.cabal/bin/cabal install regex-pcre

-----------------------------------------
-- Compile command: ghc -o regex regex.hs -threaded --make
-- Run command:     ./regex +RTS -N4       (quad core)
--                  ./regex                (single core)

import Control.Concurrent
import Control.Parallel
import Control.Parallel.Strategies
import Control.Monad
import Foreign.Storable
import Foreign.Marshal.Alloc
import Foreign.Marshal.Array
import Foreign.Ptr
import Foreign.ForeignPtr
import Foreign.C.Types
import Text.Regex.PCRE          -- requires haskell-regex-pcre-builtin
import qualified Data.ByteString as B
import qualified Data.ByteString.Internal as BI
import System.IO.Unsafe
import Data.Array
import Debug.Trace
import Data.List
import Data.Word

variants = [
  "agggtaaa|tttaccct",
  "[cgt]gggtaaa|tttaccc[acg]",
  "a[act]ggtaaa|tttacc[agt]t",
  "ag[act]gtaaa|tttac[agt]ct",
  "agg[act]taaa|ttta[agt]cct",
  "aggg[acg]aaa|ttt[cgt]ccct",
  "agggt[cgt]aa|tt[acg]accct",
  "agggta[cgt]a|t[acg]taccct",
  "agggtaa[cgt]|[acg]ttaccct" ]

subs = [
    ("B", "(c|g|t)"),
    ("D", "(a|g|t)"),
    ("H", "(a|c|t)"),
    ("K", "(g|t)"),
    ("M", "(a|c)"),
    ("N", "(a|c|g|t)"),
    ("R", "(a|g)"),
    ("S", "(c|g)"),
    ("V", "(a|c|g)"),
    ("W", "(a|t)"),
    ("Y", "(c|t)")]

main = do
  file <- B.getContents
  let [s1,s2,s3] = map (B.concat . tail) $
                groupBy notHeader $ B.split (BI.c2w '\n') file
      showVars r = r ++ ' ' : show ((s2 =~ r :: Int) + (s3 =~ r :: Int))
      results = map showVars variants ++ [
                  "",
                  show $ B.length file,
                  show $ B.length s1 + B.length s2 + B.length s3]
      r2 = flip map (zip [1..] results) $ \(idx, a) ->
          trace ("start "++show idx) () `seq` (a `using` rnf) `seq` trace ("end "++show idx) () `seq` a
  -- Ensure that the data blocks are fully evaluated before we start
  -- executing things in parallel, since they all depend on them
  return $! (s1 `par` s2 `par` s3) `seq` s1 `seq` s2 `seq` s3
  mapM_ putStrLn $ parList rnf results `seq` results
  
  let chunks = fragment 5000 s2  -- break into chunks to parallelize, which
                                 -- is possible as our regexes are 1 char long
  chunks' <- parallel $ map substituteAll chunks  -- do regex substitutions
  print $ B.length s1 + B.length s3 + B.length (B.concat chunks')
  where notHeader _ s = B.null s || B.head s /= (BI.c2w '>')

-- Drop in replacement for sequence
parallel :: [IO a] -> IO [a]
parallel actions = do
    vars <- forM actions $ \action -> do
        var <- newEmptyMVar
        forkIO $ do
            answer <- action
            return $! answer
            putMVar var answer
        return var
    forM vars takeMVar

fragment :: Int -> B.ByteString -> [B.ByteString]
fragment chunkSize bs =
    let (start, rem) = B.splitAt chunkSize bs
    in  if B.null rem
            then [start]
            else start : fragment chunkSize rem

-- Precompile regexes
subRegexes :: [(Regex, B.ByteString)]
subRegexes = flip map subs $ \(pattern, sub) ->
    (makeRegex pattern :: Regex, B.pack (map BI.c2w sub))

substituteAll :: B.ByteString -> IO B.ByteString
substituteAll !txt = do
    let BI.PS srcFP srcOff srcLen = txt
    withForeignPtr srcFP $ \src0 -> do
        let src = src0 `plusPtr` srcOff
            -- Capacity of -1 guarantees that a new buffer will be allocated
        (dst, dstLen, _) <- foldM substitute_ (src, srcLen, -1) subRegexes
        dstFP <- newForeignPtr finalizerFree dst
        return $ BI.PS dstFP 0 dstLen

foreign import ccall unsafe "string.h memmove" memmove
    :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8)

-- A function that does nothing
foreign import ccall unsafe "static unistd.h &getpid" c_null_finalizer
    :: FunPtr (Ptr Word8 -> IO ())

-- Do a single regex substitution, returning the modified string
substitute_ :: (Ptr Word8, Int, Int) -> (Regex, B.ByteString) -> IO (Ptr Word8, Int, Int)
substitute_ (src, srcLen, srcCap) (regex, sub) = do
    -- Make a new string given the input string, regex match positions, and
    -- string to substitute
    -- Turn the source buffer into a byte string to pass to 'match'
    srcFP <- newForeignPtr c_null_finalizer src
    let srcBS = BI.PS srcFP 0 srcLen
        am :: Array Int (MatchOffset, MatchLength)
        AllMatches am = match regex srcBS
        (start, end) = bounds am
        matches = end + 1
        BI.PS subFP subOff subLen = sub
        newLength = srcLen + matches * (subLen - 1)
    withForeignPtr subFP $ \sub0 -> do
        let sub = sub0 `plusPtr` subOff
        (dst, dstCap) <-
            if newLength > srcCap
                then do
                    let dstCap = if srcCap < 0
                            then srcLen * 2
                            else srcCap * 2
                    dst <- mallocBytes dstCap
                    return (dst, dstCap)
                else
                    return (src, srcCap)

        let copy :: Int -> Int -> Int -> IO ()
            copy idx sOff dOff | idx < 0 = do
                let chunkLen = sOff
                when (dst /= src) $
                    BI.memcpy dst src (fromIntegral chunkLen)

            copy idx sOff dOff = do
                let (matchOff, _) = am ! idx
                    sOff' = matchOff + 1
                    chunkLen =  sOff - sOff'
                    dOff' = dOff - chunkLen
                memmove (dst `plusPtr` dOff') (src `plusPtr` sOff')
                        (fromIntegral chunkLen)
                let dOff'' = dOff' - subLen
                    sOff'' = sOff' - 1
                BI.memcpy (dst `plusPtr` dOff'') sub
                        (fromIntegral subLen)
                copy (idx-1) sOff'' dOff''

        copy end srcLen newLength
        when (dst /= src && srcCap >= 0) $ free src
        return (dst, newLength, dstCap)