Personal tools

Shootout/Fasta

From HaskellWiki

< Shootout(Difference between revisions)
Jump to: navigation, search
(Add ByteString entry)
(cleaner version)
Line 16: Line 16:
   
 
<haskell>
 
<haskell>
-- The Computer Language Shootout
+
{- The Computer Language Shootout
-- http://shootout.alioth.debian.org/
+
http://shootout.alioth.debian.org/
-- contributed by Jeff Newbern
+
contributed by Jeff Newbern
-- updated by Spencer Janssen
+
updated by Spencer Janssen and Don Stewart -}
   
-- Uses random generation code derived from Simon Marlow and Einar Karttunen's
+
import System
-- "random" test entry.
 
 
import System(getArgs)
 
 
import qualified Data.ByteString.Char8 as B
 
import qualified Data.ByteString.Char8 as B
   
type Base = Char
+
randomSequence :: Int -> [(Char,Double)] -> Int -> (B.ByteString, Int)
type Sequence = B.ByteString
 
type Seed = Int
 
type BaseFrequency = (Base,Double)
 
 
randomSequence :: Int -> [BaseFrequency] -> Seed -> (Sequence, Seed)
 
 
randomSequence n bf seed = (sequence, seed')
 
randomSequence n bf seed = (sequence, seed')
where
+
where (sequence, Just seed') = B.unfoldrN n f seed
(sequence, Just seed') = B.unfoldrN n f seed
+
f s = Just (chooseBase bf (normalize s), nextSeed s)
f s = Just (chooseBase bf (normalize s), nextSeed s)
 
   
chooseBase :: [BaseFrequency] -> Double -> Base
+
chooseBase :: [(Char,Double)] -> Double -> Char
 
chooseBase [(b,_)] _ = b
 
chooseBase [(b,_)] _ = b
 
chooseBase ((b,f):xs) p | p < f = b
 
chooseBase ((b,f):xs) p | p < f = b
 
| otherwise = chooseBase xs (p-f)
 
| otherwise = chooseBase xs (p-f)
   
writeFasta :: String -> String -> Sequence -> IO ()
+
writeFasta label title sequence = do
writeFasta label title sequence =
+
putStrLn $ ">" ++ label ++ " " ++ title
do putStrLn $ ">" ++ label ++ " " ++ title
 
 
mapM_ B.putStrLn $ split 60 sequence
 
mapM_ B.putStrLn $ split 60 sequence
   
split :: Int -> Sequence -> [Sequence]
 
 
split n xs | B.null xs = []
 
split n xs | B.null xs = []
 
| otherwise = l : split n r
 
| otherwise = l : split n r
where (l, r) = B.splitAt n xs
+
where (l, r) = B.splitAt n xs
   
 
main = do
 
main = do
args <- getArgs
+
n <- getArgs >>= readIO . head
let n = if (null args) then 1000 else read (head args)
+
let aluLen = 1 + 2 * n `div` B.length alu
aluLen = 1 + 2 * n `div` B.length alu
 
 
aluSeq = B.take (2 * n) . B.concat . replicate aluLen $ alu
 
aluSeq = B.take (2 * n) . B.concat . replicate aluLen $ alu
 
(iubSeq, seed) = randomSequence (3 * n) iub initSeed
 
(iubSeq, seed) = randomSequence (3 * n) iub initSeed
Line 54: Line 53:
 
writeFasta "THREE" "Homo sapiens frequency" homSeq
 
writeFasta "THREE" "Homo sapiens frequency" homSeq
   
-- predefined sequences and tables:
 
alu :: Sequence
 
 
alu = B.pack
 
alu = B.pack
("GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" ++
+
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\
"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" ++
+
\GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\
"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" ++
+
\CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\
"ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" ++
+
\ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\
"GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" ++
+
\GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\
"AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" ++
+
\AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\
"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA")
+
\AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
   
iub :: [BaseFrequency]
+
iub = [ ('a', 0.27), ('c', 0.12), ('g', 0.12), ('t', 0.27), ('B', 0.02)
iub = [ ('a', 0.27), ('c', 0.12), ('g', 0.12), ('t', 0.27),
+
, ('D', 0.02), ('H', 0.02), ('K', 0.02), ('M', 0.02), ('N', 0.02)
('B', 0.02), ('D', 0.02), ('H', 0.02), ('K', 0.02), ('M', 0.02), ('N', 0.02),
+
, ('R', 0.02), ('S', 0.02), ('V', 0.02), ('W', 0.02), ('Y', 0.02) ]
('R', 0.02), ('S', 0.02), ('V', 0.02), ('W', 0.02), ('Y', 0.02) ]
 
   
homosapiens :: [BaseFrequency]
 
 
homosapiens = [ ('a', 0.3029549426680), ('c', 0.1979883004921),
 
homosapiens = [ ('a', 0.3029549426680), ('c', 0.1979883004921),
 
('g', 0.1975473066391), ('t', 0.3015094502008) ]
 
('g', 0.1975473066391), ('t', 0.3015094502008) ]
   
-- random number generation:
 
 
im = 139968
 
im = 139968
 
ia = 3877
 
ia = 3877
 
ic = 29573
 
ic = 29573
nextSeed s = (s * ia + ic) `mod` im
+
nextSeed s = (s * ia + ic) `rem` im
 
normalize n = (fromIntegral n) * (1.0 / fromIntegral im)
 
normalize n = (fromIntegral n) * (1.0 / fromIntegral im)
 
initSeed = nextSeed 42
 
initSeed = nextSeed 42

Revision as of 05:57, 19 November 2006

This is a Shootout Entry for [1].

The description of the program:

Each program should

  • encode the expected cumulative probabilities for 2 alphabets
  • generate DNA sequences, by weighted random selection from the alphabets (using the pseudo-random number generator from the random benchmark)
  • generate DNA sequences, by copying from a given sequence
  • write 3 sequences line-by-line in FASTA format

We'll use the generated FASTA file as input for other benchmarks (reverse-complement, k-nucleotide).

Contents

1 Proposed entry

This entry is similar to the "Old entry" on this page, but has been updated to use Data.ByteString. It appears to be respectably fast, execution time is about 3.5 times the C++ version in initial tests.

{- The Computer Language Shootout
   http://shootout.alioth.debian.org/
   contributed by Jeff Newbern
   updated by Spencer Janssen and Don Stewart -}
 
import System
import qualified Data.ByteString.Char8 as B
 
randomSequence :: Int -> [(Char,Double)] -> Int -> (B.ByteString, Int)
randomSequence n bf seed = (sequence, seed')
  where (sequence, Just seed') = B.unfoldrN n f seed
        f s = Just (chooseBase bf (normalize s), nextSeed s)
 
chooseBase :: [(Char,Double)] -> Double -> Char
chooseBase [(b,_)]    _ = b
chooseBase ((b,f):xs) p | p < f     = b
                        | otherwise = chooseBase xs (p-f)
 
writeFasta label title sequence = do
     putStrLn $ ">" ++ label ++ " " ++ title
     mapM_ B.putStrLn $ split 60 sequence
 
split n xs | B.null xs = []
           | otherwise = l : split n r
    where (l, r) = B.splitAt n xs
 
main = do
    n <- getArgs >>= readIO . head
    let aluLen         = 1 + 2 * n `div` B.length alu
        aluSeq         = B.take (2 * n) . B.concat . replicate aluLen $ alu
        (iubSeq, seed) = randomSequence (3 * n) iub         initSeed
        (homSeq, _)    = randomSequence (5 * n) homosapiens seed
 
    writeFasta "ONE"   "Homo sapiens alu"       aluSeq
    writeFasta "TWO"   "IUB ambiguity codes"    iubSeq
    writeFasta "THREE" "Homo sapiens frequency" homSeq
 
alu = B.pack
         "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\
         \GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\
         \CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\
         \ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\
         \GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\
         \AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\
         \AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
 
iub = [ ('a', 0.27), ('c', 0.12), ('g', 0.12), ('t', 0.27), ('B', 0.02)
      , ('D', 0.02), ('H', 0.02), ('K', 0.02), ('M', 0.02), ('N', 0.02)
      , ('R', 0.02), ('S', 0.02), ('V', 0.02), ('W', 0.02), ('Y', 0.02) ]
 
homosapiens = [ ('a', 0.3029549426680), ('c', 0.1979883004921),
                ('g', 0.1975473066391), ('t', 0.3015094502008) ]
 
im          = 139968
ia          = 3877
ic          = 29573
nextSeed s  = (s * ia + ic) `rem` im
normalize n = (fromIntegral n) * (1.0 / fromIntegral im)
initSeed    = nextSeed 42

2 Current best entry (Haskell GHC #3)

This entry has been disqualified. From the Shootout judges: "Preconverting to Int is a really interesting approach - we're looking for the vanilla approach."

It now has respectable performance and memory usage.

{-# OPTIONS_GHC -O2 -funbox-strict-fields #-}
-- The Great Computer Language Shootout
-- http://shootout.alioth.debian.org/
--
-- contributed by Jeff Newbern
-- Modified to fastest.hs by Chris Kuklewicz, 6 Jan 2006
-- Modified to fixed-fasta.hs by Chris Kuklewicz, 17 Jan 2006
--
-- Uses random generation code derived from Simon Marlow and Einar
-- Karttunen's "random" test entry.  No longer uses Double during run,
-- everything has been pre-converted to Int.  And pre-converted to a
-- binary tree for lookup.  Ideally this tree could be constructed
-- with the probabilities in mind, but it isn't in this version.
--
-- Compile with ghc --make resub-fasta.hs -o resub-fasta.ghc_run
-- Run with "./rsub-fasta.ghc_run %A" where %A is the parameter
 
import Control.Monad
import Data.Char(chr,ord)
import Data.List(mapAccumL)
import Data.Word(Word8)
import Data.IORef
import Foreign
import System(getArgs)
import System.IO
 
type Base = Word8
data BaseFrequencyTree = Node !Base
                       | TreeNodes !Int !Base !Base
                       | Tree !Int !BaseFrequencyTree !BaseFrequencyTree
data Seed = Seed !Int
 
b2c :: Word8 -> Char
b2c = chr . fromEnum
c2b = toEnum . ord
 
alu = "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" ++
      "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" ++
      "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" ++
      "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" ++
      "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" ++
      "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" ++
      "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
 
im = 139968 :: Double
 
iub = mkTree $ snd . mapAccumL (\rt (c,f) -> (f+rt,(c2b c,ceiling $ im*(f+rt)))) 0.0 $
  [ ('a', 0.27), ('c', 0.12), ('g', 0.12), ('t', 0.27), ('B', 0.02),
    ('D', 0.02), ('H', 0.02), ('K', 0.02), ('M', 0.02), ('N', 0.02),
    ('R', 0.02), ('S', 0.02), ('V', 0.02), ('W', 0.02), ('Y', 0.02) ]
 
homosapiens =  mkTree $ snd . mapAccumL (\rt (c,f) -> (f+rt,(c2b c,ceiling $ im*(f+rt)))) 0.0 $
  [ ('a', 0.3029549426680), ('c', 0.1979883004921), ('g', 0.1975473066391), ('t', 0.3015094502008) ]
 
mkTree [(b,_)] = Node b
mkTree [(b,f),(b',_)] = TreeNodes f b b'
mkTree xs = let (h,t) = splitAt (length xs `div` 2) xs
                (_,f) = last h
            in Tree f (mkTree h) (mkTree t)
 
chooseBase (Node b) _ = b
chooseBase (TreeNodes f b b') p = if (p<f) then b else b'
chooseBase (Tree f l r) p | p < f     = chooseBase l p
                          | otherwise = chooseBase r p
 
writeFastaHeader label title =  (putStrLn (('>':label) ++ (' ':title)))
perLine = 60
 
writeAluBuffer total = do
  let l = length alu
      bufSize = l + perLine - 1
  aluBuf <- mallocArray bufSize
  foldM_ (\ptr c -> poke ptr (c2b c) >> return (advancePtr ptr 1)) aluBuf (take bufSize (cycle alu))
  let (full,end) = total `divMod` perLine
      fullLine n = let ptr = advancePtr aluBuf ((n * perLine) `mod` l)
                   in hPutBuf stdout ptr perLine >> hPutChar stdout '\n'
      lastLine = let ptr = advancePtr aluBuf ((full*perLine) `mod` l)
                 in hPutBuf stdout ptr end >> hPutChar stdout '\n'
  mapM_ fullLine [0..pred full]
  when (end>0) lastLine
 
writeWrapped total trans initSeed = do
  seedRef <- newIORef initSeed
  let l = succ perLine
      (im,ia,ic)=(139968,3877,29573)
      nextSeed (Seed s) = Seed ( (s * ia + ic) `mod` im )
      prng = do newSeed <- return.nextSeed =<< readIORef seedRef
                writeIORef seedRef newSeed
                return newSeed
  buf <- mallocArray l
  poke (advancePtr buf perLine) (c2b '\n')
  let (full,end) = total `divMod` perLine
      fill 0 _   = return ()
      fill i ptr = do (Seed b) <- prng
                      poke ptr (trans b)
                      fill (pred i) (advancePtr ptr 1)
      fullLine = do fill perLine buf
                    hPutBuf stdout buf l
      lastLine = do fill end buf
                    poke (advancePtr buf end) (c2b '\n')
                    hPutBuf stdout buf (succ end)
  replicateM_ full fullLine
  when (end>0) lastLine
  readIORef seedRef
 
main = do args <- getArgs
          let n = if null args then 2500000 else read (head args)
          writeFastaHeader "ONE" "Homo sapiens alu"
          writeAluBuffer (2*n)
          writeFastaHeader "TWO" "IUB ambiguity codes"
          seed' <- writeWrapped (3*n) (chooseBase iub) (Seed 42)
          writeFastaHeader "THREE" "Homo sapiens frequency"
          writeWrapped (5*n) (chooseBase homosapiens) seed'

An extremely minor gripe, how about using:

alu = "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\
      \GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\
      \CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\
      \ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\
      \GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\
      \AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\
      \AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"

Rather than all those ++... This won't help performance one bit I would wager, but uses the "proper" way of doing multiline strings.--SebastianSylvan 11:40, 10 November 2006 (UTC)

3 Non-leaking Entry

This a new entry, which is mostly the old entry below modified to use StateT to allow the random number stream to be finite, so no splitAt or drop functions are needed. This fixes the space leak and improves the time by almost a factor of 2. Serious speed will require changing from String to a Word8 array of some sort.

{-
   The Great Computer Language Shootout
   http://shootout.alioth.debian.org/
 
   contributed by Jeff Newbern
   Modified by Chris Kuklewicz, 3 Jan 2006
 
   Uses random generation code derived from Simon Marlow and Einar
   Karttunen's "random" test entry.
 
   Modified version note: Use a StateT around IO to manage
   the seed.  This makes interleaving the generation and output of the
   sequences easier.
-}
 
import Control.Monad.Trans
import Control.Monad.State
import System(getArgs)
 
type Base = Char
type Sequence = [Base]
 
alu :: Sequence  -- predefined sequence
alu = "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" ++
      "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" ++
      "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" ++
      "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" ++
      "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" ++
      "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" ++
      "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
 
type BaseFrequency = (Base,Double)
 
iub :: [BaseFrequency]
iub = [ ('a', 0.27), ('c', 0.12), ('g', 0.12), ('t', 0.27),
        ('B', 0.02), ('D', 0.02), ('H', 0.02), ('K', 0.02), ('M', 0.02), ('N', 0.02),
        ('R', 0.02), ('S', 0.02), ('V', 0.02), ('W', 0.02), ('Y', 0.02) ]
 
homosapiens :: [BaseFrequency]
homosapiens = [ ('a', 0.3029549426680), ('c', 0.1979883004921),
                ('g', 0.1975473066391), ('t', 0.3015094502008) ]
 
-- select a base whose interval covers the given double
chooseBase :: [BaseFrequency] -> Double -> Base
chooseBase [(b,_)]    _ = b
chooseBase ((b,f):xs) p | p < f     = b
                        | otherwise = chooseBase xs (p-f)
 
type Seed = Int
type Pseudo a = StateT Seed IO a
 
prng :: Pseudo Double
prng = let nextSeed s = (s * ia + ic) `mod` im
           normalize n = (fromIntegral n) * (1.0 / fromIntegral im)
           im = 139968; ia = 3877; ic = 29573
       in do seed <- get
             let seed' = nextSeed seed
             put seed'
             return (normalize seed')
 
prngN count = sequence $ replicate count prng
 
-- write a sequence in Fasta format
writeFasta :: String -> String -> Sequence -> IO ()
writeFasta label title sequence =
  do putStrLn $ ">" ++ (label ++ (" " ++ title))
     writeWrapped 60 sequence
  where writeWrapped _   []  = do return ()
        writeWrapped len str = do let (s1,s2) = splitAt len str
                                  putStrLn s1
                                  writeWrapped len s2
 
writeFastaHeader :: (MonadIO m) => String -> String -> m ()
writeFastaHeader label title = liftIO $ putStrLn $ ">" ++ (label ++ (" " ++ title))
 
writeWrapped' :: Int -> Int -> (Double->Base) -> Pseudo ()
writeWrapped' wrap total trans = 
    let work c = case c of
                   0 -> return ()
                   n -> do let c' = min wrap n
                               nextC = c - c'
                           s <- liftM (map trans) (prngN c')
                           liftIO $ putStrLn s
                           work nextC
    in work total
 
writeWrapped = writeWrapped' 60
 
main = do args <- getArgs
          let n = if (null args) then 1000 else read (head args)
          writeFasta "ONE" "Homo sapiens alu" (take (2*n) (cycle alu))
          writeFastaHeader "TWO" "IUB ambiguity codes"
          seed' <- execStateT (writeWrapped (3*n) (chooseBase iub))  42
          writeFastaHeader "THREE" "Homo sapiens frequency"
          execStateT (writeWrapped (5*n) (chooseBase homosapiens)) seed'


4 Old Entry

This is the old entry. It has a huge memory leak due to laziness.

-- The Great Computer Language Shootout
-- http://shootout.alioth.debian.org/
-- contributed by Jeff Newbern
 
-- Uses random generation code derived from Simon Marlow and Einar Karttunen's
-- "random" test entry.
 
-- Note: This code has not been optimized *at all*.  It is written to be clear
-- and to follow standard Haskell idioms as much as possible (but we have to match
-- the stateful PRNG idiom in the test definition, which is oriented toward an
-- imperative language).  Performance is decent with ghc -O2, but if it can be
-- improved without sacrificing the clarity of the code, by all means go for it!
 
import System(getArgs)
 
type Base = Char
type Sequence = [Base]
 
alu :: Sequence  -- predefined sequence
alu = "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" ++
      "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" ++
      "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" ++
      "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" ++
      "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" ++
      "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" ++
      "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
 
type BaseFrequency = (Base,Double)
 
iub :: [BaseFrequency]
iub = [ ('a', 0.27), ('c', 0.12), ('g', 0.12), ('t', 0.27),
        ('B', 0.02), ('D', 0.02), ('H', 0.02), ('K', 0.02), ('M', 0.02), ('N', 0.02),
        ('R', 0.02), ('S', 0.02), ('V', 0.02), ('W', 0.02), ('Y', 0.02) ]
 
homosapiens :: [BaseFrequency]
homosapiens = [ ('a', 0.3029549426680), ('c', 0.1979883004921),
                ('g', 0.1975473066391), ('t', 0.3015094502008) ]
 
-- select a base whose interval covers the given double
chooseBase :: [BaseFrequency] -> Double -> Base
chooseBase [(b,_)]    _ = b
chooseBase ((b,f):xs) p | p < f     = b
                        | otherwise = chooseBase xs (p-f)
 
-- write a sequence in Fasta format
writeFasta :: String -> String -> Sequence -> IO ()
writeFasta label title sequence =
  do putStrLn $ ">" ++ (label ++ (" " ++ title))
     writeWrapped 60 sequence
  where writeWrapped _   []  = do return ()
        writeWrapped len str = do let (s1,s2) = splitAt len str
                                  putStrLn s1
                                  writeWrapped len s2
 
-- generate an infinite sequence of random doubles using the
-- prng from the "random" test
probs :: Int -> [Double]
probs seed = tail $ map normalize (iterate nextSeed seed)
  where nextSeed s = (s * ia + ic) `mod` im
        im = 139968
        ia = 3877
        ic = 29573
        normalize n = (fromIntegral n) * (1.0 / fromIntegral im)
 
main = do args <- getArgs
          let n = if (null args) then 1000 else read (head args)
          writeFasta "ONE" "Homo sapiens alu" (take (2*n) (cycle alu))
          let (seq1,seq2) = splitAt (3*n) (probs 42) -- we have to match the imperative version
          let seq2' = take (5*n) seq2                -- instead of using the Haskell idiom
          writeFasta "TWO" "IUB ambiguity codes" (map (chooseBase iub) seq1)
          writeFasta "THREE" "Homo sapiens frequency" (map (chooseBase homosapiens) seq2')