Data.ByteString

Haskell Hierarchical Libraries (base package)

Data.ByteString

Description

A time and space-efficient implementation of byte vectors using packed Word8 arrays, suitable for high performance use, both in terms of large data quantities, or high speed requirements. Byte vectors are encoded as strict Word8 arrays of bytes, held in a ForeignPtr, and can be passed between C and Haskell with little effort.

This module is intended to be imported qualified, to avoid name clashes with Prelude functions. eg.

 import qualified Data.ByteString as B

Original GHC implementation by Bryan O'Sullivan. Rewritten to use UArray by Simon Marlow. Rewritten to support slices and use ForeignPtr by David Roundy. Polished and extended by Don Stewart.

Synopsis

data ByteString = PS !(ForeignPtr Word8) !Int !Int

empty :: ByteString

packByte :: Word8 -> ByteString

pack :: [Word8] -> ByteString

unpack :: ByteString -> [Word8]

packWith :: (a -> Word8) -> [a] -> ByteString

unpackWith :: (Word8 -> a) -> ByteString -> [a]

cons :: Word8 -> ByteString -> ByteString

snoc :: ByteString -> Word8 -> ByteString

null :: ByteString -> Bool

length :: ByteString -> Int

head :: ByteString -> Word8

tail :: ByteString -> ByteString

last :: ByteString -> Word8

init :: ByteString -> ByteString

append :: ByteString -> ByteString -> ByteString

inits :: ByteString -> [ByteString]

tails :: ByteString -> [ByteString]

elems :: ByteString -> [ByteString]

map :: (Word8 -> Word8) -> ByteString -> ByteString

reverse :: ByteString -> ByteString

intersperse :: Word8 -> ByteString -> ByteString

transpose :: [ByteString] -> [ByteString]

foldl :: (a -> Word8 -> a) -> a -> ByteString -> a

foldr :: (Word8 -> a -> a) -> a -> ByteString -> a

foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8

foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8

concat :: [ByteString] -> ByteString

concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString

any :: (Word8 -> Bool) -> ByteString -> Bool

all :: (Word8 -> Bool) -> ByteString -> Bool

maximum :: ByteString -> Word8

minimum :: ByteString -> Word8

mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString

replicate :: Int -> Word8 -> ByteString

unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString

take :: Int -> ByteString -> ByteString

drop :: Int -> ByteString -> ByteString

splitAt :: Int -> ByteString -> (ByteString, ByteString)

takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString

dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString

break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)

span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)

spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)

breakByte :: Word8 -> ByteString -> (ByteString, ByteString)

spanByte :: Word8 -> ByteString -> (ByteString, ByteString)

breakFirst :: Word8 -> ByteString -> Maybe (ByteString, ByteString)

breakLast :: Word8 -> ByteString -> Maybe (ByteString, ByteString)

split :: Word8 -> ByteString -> [ByteString]

splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]

tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]

group :: ByteString -> [ByteString]

groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]

join :: ByteString -> [ByteString] -> ByteString

joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString

index :: ByteString -> Int -> Word8

elemIndex :: Word8 -> ByteString -> Maybe Int

elemIndices :: Word8 -> ByteString -> [Int]

elemIndexLast :: Word8 -> ByteString -> Maybe Int

findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int

findIndices :: (Word8 -> Bool) -> ByteString -> [Int]

count :: Word8 -> ByteString -> Int

sort :: ByteString -> ByteString

elem :: Word8 -> ByteString -> Bool

notElem :: Word8 -> ByteString -> Bool

filterByte :: Word8 -> ByteString -> ByteString

filterNotByte :: Word8 -> ByteString -> ByteString

filter :: (Word8 -> Bool) -> ByteString -> ByteString

find :: (Word8 -> Bool) -> ByteString -> Maybe Word8

isPrefixOf :: ByteString -> ByteString -> Bool

isSuffixOf :: ByteString -> ByteString -> Bool

isSubstringOf :: ByteString -> ByteString -> Bool

findSubstring :: ByteString -> ByteString -> Maybe Int

findSubstrings :: ByteString -> ByteString -> [Int]

zip :: ByteString -> ByteString -> [(Word8, Word8)]

zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]

unzip :: [(Word8, Word8)] -> (ByteString, ByteString)

unsafeHead :: ByteString -> Word8

unsafeTail :: ByteString -> ByteString

unsafeIndex :: ByteString -> Int -> Word8

generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString

create :: Int -> (Ptr Word8 -> IO ()) -> ByteString

fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString

toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)

skipIndex :: ByteString -> Int

packCString :: CString -> ByteString

packCStringLen :: CStringLen -> ByteString

packMallocCString :: CString -> ByteString

packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString

packAddress :: Addr# -> ByteString

unsafePackAddress :: Int -> Addr# -> ByteString

unsafeFinalize :: ByteString -> IO ()

useAsCString :: ByteString -> (CString -> IO a) -> IO a

unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a

unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a

copy :: ByteString -> ByteString

copyCString :: CString -> ByteString

copyCStringLen :: CStringLen -> ByteString

getLine :: IO ByteString

getContents :: IO ByteString

putStr :: ByteString -> IO ()

putStrLn :: ByteString -> IO ()

readFile :: FilePath -> IO ByteString

writeFile :: FilePath -> ByteString -> IO ()

getArgs :: IO [ByteString]

hGetLine :: Handle -> IO ByteString

hGetNonBlocking :: Handle -> Int -> IO ByteString

hGetContents :: Handle -> IO ByteString

hGet :: Handle -> Int -> IO ByteString

hPut :: Handle -> ByteString -> IO ()

unpackList :: ByteString -> [Word8]

noAL :: NoAL

data NoAL

loopArr :: (ByteString, acc) -> ByteString

loopAcc :: (ByteString, acc) -> acc

loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)

loopU :: (acc -> Word8 -> (acc, Maybe Word8)) -> acc -> ByteString -> (ByteString, acc)

mapEFL :: (Word8 -> Word8) -> NoAL -> Word8 -> (NoAL, Maybe Word8)

filterEFL :: (Word8 -> Bool) -> NoAL -> Word8 -> (NoAL, Maybe Word8)

foldEFL :: (acc -> Word8 -> acc) -> acc -> Word8 -> (acc, Maybe Word8)

fuseEFL :: (a1 -> Word8 -> (a1, Maybe Word8)) -> (a2 -> Word8 -> (a2, Maybe Word8)) -> (a1, a2) -> Word8 -> ((a1, a2), Maybe Word8)

filterF :: (Word8 -> Bool) -> ByteString -> ByteString

mapF :: (Word8 -> Word8) -> ByteString -> ByteString

The ByteString type

data ByteString

A space-efficient representation of a Word8 vector, supporting many efficient operations. A ByteString contains 8-bit characters only.

Instances of Eq, Ord, Read, Show, Data, Typeable

Constructors

PS !(ForeignPtr Word8) !Int !Int

Instances

Introducing and eliminating ByteStrings

empty :: ByteString

O(1) The empty ByteString

packByte :: Word8 -> ByteString

O(1) Convert a Word8 into a ByteString

pack :: [Word8] -> ByteString

O(n) Convert a '[Word8]' into a ByteString.

For applications with large numbers of string literals, pack can be a bottleneck. In such cases, consider using packAddress (GHC only).

unpack :: ByteString -> [Word8]

O(n) Converts a ByteString to a '[Word8]'.

packWith :: (a -> Word8) -> [a] -> ByteString

O(n) Convert a '[a]' into a ByteString using some conversion function

unpackWith :: (Word8 -> a) -> ByteString -> [a]

O(n) Converts a ByteString to a '[a]', using a conversion function.

Basic interface

cons :: Word8 -> ByteString -> ByteString

O(n) cons is analogous to (:) for lists, but of different complexity, as it requires a memcpy.

snoc :: ByteString -> Word8 -> ByteString

O(n) Append a byte to the end of a ByteString

null :: ByteString -> Bool

O(1) Test whether a ByteString is empty.

length :: ByteString -> Int

O(1) length returns the length of a ByteString as an Int.

head :: ByteString -> Word8

O(1) Extract the first element of a ByteString, which must be non-empty.

tail :: ByteString -> ByteString

O(1) Extract the elements after the head of a ByteString, which must be non-empty.

last :: ByteString -> Word8

O(1) Extract the last element of a ByteString, which must be finite and non-empty.

init :: ByteString -> ByteString

O(1) Return all the elements of a ByteString except the last one.

append :: ByteString -> ByteString -> ByteString

O(n) Append two ByteStrings

Special ByteStrings

inits :: ByteString -> [ByteString]

O(n) Return all initial segments of the given ByteString, shortest first.

tails :: ByteString -> [ByteString]

O(n) Return all final segments of the given ByteString, longest first.

elems :: ByteString -> [ByteString]

O(n) breaks a ByteString to a list of ByteStrings, one byte each.

Transformating ByteStrings

map :: (Word8 -> Word8) -> ByteString -> ByteString

O(n) map f xs is the ByteString obtained by applying f to each element of xs. This function is subject to array fusion.

reverse :: ByteString -> ByteString

O(n) reverse xs efficiently returns the elements of xs in reverse order.

intersperse :: Word8 -> ByteString -> ByteString

O(n) The intersperse function takes a Word8 and a ByteString and `intersperses' that byte between the elements of the ByteString. It is analogous to the intersperse function on Lists.

transpose :: [ByteString] -> [ByteString]

The transpose function transposes the rows and columns of its ByteString argument.

Reducing ByteStrings

foldl :: (a -> Word8 -> a) -> a -> ByteString -> a

foldl, applied to a binary operator, a starting value (typically the left-identity of the operator), and a ByteString, reduces the ByteString using the binary operator, from left to right. This function is subject to array fusion.

foldr :: (Word8 -> a -> a) -> a -> ByteString -> a

foldr, applied to a binary operator, a starting value (typically the right-identity of the operator), and a ByteString, reduces the ByteString using the binary operator, from right to left.

foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8

foldl1 is a variant of foldl that has no starting value argument, and thus must be applied to non-empty ByteStrings. This function is subject to array fusion.

foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8

foldr1 is a variant of foldr that has no starting value argument, and thus must be applied to non-empty ByteStrings

Special folds

concat :: [ByteString] -> ByteString

O(n) Concatenate a list of ByteStrings.

concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString

Map a function over a ByteString and concatenate the results

any :: (Word8 -> Bool) -> ByteString -> Bool

O(n) Applied to a predicate and a ByteString, any determines if any element of the ByteString satisfies the predicate.

all :: (Word8 -> Bool) -> ByteString -> Bool

O(n) Applied to a predicate and a ByteString, all determines if all elements of the ByteString satisfy the predicate.

maximum :: ByteString -> Word8

O(n) maximum returns the maximum value from a ByteString

minimum :: ByteString -> Word8

O(n) minimum returns the minimum value from a ByteString

mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString

O(n) map Word8 functions, provided with the index at each position

Generating and unfolding ByteStrings

replicate :: Int -> Word8 -> ByteString

O(n) replicate n x is a ByteString of length n with x the value of every element. The following holds:

 replicate w c = unfoldr w (\u -> Just (u,u)) c

This implemenation uses memset(3)

unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString

O(n) The unfoldrN function is analogous to the List 'unfoldr'. unfoldrN builds a ByteString from a seed value. The function takes the element and returns Nothing if it is done producing the ByteString or returns Just (a,b), in which case, a is a prepending to the ByteString and b is used as the next element in a recursive call.

To preven unfoldrN having O(n^2) complexity (as prepending a character to a ByteString is O(n), this unfoldr requires a maximum final size of the ByteString as an argument. cons can then be implemented in O(1) (i.e. a poke), and the unfoldr itself has linear complexity. The depth of the recursion is limited to this size, but may be less. For lazy, infinite unfoldr, use unfoldr (from List).

Examples:

 unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"

The following equation connects the depth-limited unfoldr to the List unfoldr:

 unfoldrN n == take n $ List.unfoldr

Substrings

Breaking strings

take :: Int -> ByteString -> ByteString

O(1) take n, applied to a ByteString xs, returns the prefix of xs of length n, or xs itself if n > length xs.

drop :: Int -> ByteString -> ByteString

O(1) drop n xs returns the suffix of xs after the first n elements, or [] if n > length xs.

splitAt :: Int -> ByteString -> (ByteString, ByteString)

O(1) splitAt n xs is equivalent to (take n xs, drop n xs).

takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString

takeWhile, applied to a predicate p and a ByteString xs, returns the longest prefix (possibly empty) of xs of elements that satisfy p.

dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString

dropWhile p xs returns the suffix remaining after takeWhile p xs.

break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)

break p is equivalent to span (not . p).

span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)

span p xs breaks the ByteString into two segments. It is equivalent to (takeWhile p xs, dropWhile p xs)

spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)

spanEnd behaves like span but from the end of the ByteString. We have

 spanEnd (not.isSpace) "x y z" == ("x y ","z")

and

 spanEnd (not . isSpace) ps
    == 
 let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)

Breaking and dropping on specific bytes

breakByte :: Word8 -> ByteString -> (ByteString, ByteString)

breakByte breaks its ByteString argument at the first occurence of the specified byte. It is more efficient than break as it is implemented with memchr(3). I.e.

 break (=='c') "abcd" == breakByte 'c' "abcd"

spanByte :: Word8 -> ByteString -> (ByteString, ByteString)

spanByte breaks its ByteString argument at the first occurence of a byte other than its argument. It is more efficient than 'span (==)'

 span  (=='c') "abcd" == spanByte 'c' "abcd"

breakFirst :: Word8 -> ByteString -> Maybe (ByteString, ByteString)

O(n) breakFirst breaks the given ByteString on the first occurence of w. It behaves like break, except the delimiter is not returned, and Nothing is returned if the delimiter is not in the ByteString. I.e.

 breakFirst 'b' "aabbcc" == Just ("aa","bcc")

 breakFirst c xs ==
 let (x,y) = break (== c) xs 
 in if null y then Nothing else Just (x, drop 1 y))

breakLast :: Word8 -> ByteString -> Maybe (ByteString, ByteString)

O(n) breakLast behaves like breakFirst, but from the end of the ByteString.

 breakLast ('b') (pack "aabbcc") == Just ("aab","cc")

and the following are equivalent:

 breakLast 'c' "abcdef"
 let (x,y) = break (=='c') (reverse "abcdef") 
 in if null x then Nothing else Just (reverse (drop 1 y), reverse x)

Breaking into many substrings

split :: Word8 -> ByteString -> [ByteString]

O(n) Break a ByteString into pieces separated by the byte argument, consuming the delimiter. I.e.

 split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
 split 'a'  "aXaXaXa"    == ["","X","X","X"]
 split 'x'  "x"          == ["",""]

and

 join [c] . split c == id
 split == splitWith . (==)

As for all splitting functions in this library, this function does not copy the substrings, it just constructs new ByteStrings that are slices of the original.

splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]

O(n) Splits a ByteString into components delimited by separators, where the predicate returns True for a separator element. The resulting components do not contain the separators. Two adjacent separators result in an empty component in the output. eg.

 splitWith (=='a') "aabbaca" == ["","","bb","c",""]
 splitWith (=='a') []        == []

tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]

Like splitWith, except that sequences of adjacent separators are treated as a single separator. eg.

 tokens (=='a') "aabbaca" == ["bb","c"]

group :: ByteString -> [ByteString]

The group function takes a ByteString and returns a list of ByteStrings such that the concatenation of the result is equal to the argument. Moreover, each sublist in the result contains only equal elements. For example,

 group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]

It is a special case of groupBy, which allows the programmer to supply their own equality test. It is about 40% faster than groupBy (==)

groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]

The groupBy function is the non-overloaded version of group.

Joining strings

join :: ByteString -> [ByteString] -> ByteString

O(n) The join function takes a ByteString and a list of ByteStrings and concatenates the list after interspersing the first argument between each element of the list.

joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString

O(n) joinWithByte. An efficient way to join to two ByteStrings with a char. Around 4 times faster than the generalised join.

Indexing ByteStrings

index :: ByteString -> Int -> Word8

O(1) ByteString index (subscript) operator, starting from 0.

elemIndex :: Word8 -> ByteString -> Maybe Int

O(n) The elemIndex function returns the index of the first element in the given ByteString which is equal to the query element, or Nothing if there is no such element. This implementation uses memchr(3).

elemIndices :: Word8 -> ByteString -> [Int]

O(n) The elemIndices function extends elemIndex, by returning the indices of all elements equal to the query element, in ascending order. This implementation uses memchr(3).

elemIndexLast :: Word8 -> ByteString -> Maybe Int

O(n) The elemIndexLast function returns the last index of the element in the given ByteString which is equal to the query element, or Nothing if there is no such element. The following holds:

 elemIndexLast c xs == 
 (-) (length xs - 1) `fmap` elemIndex c (reverse xs)

findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int

The findIndex function takes a predicate and a ByteString and returns the index of the first element in the ByteString satisfying the predicate.

findIndices :: (Word8 -> Bool) -> ByteString -> [Int]

The findIndices function extends findIndex, by returning the indices of all elements satisfying the predicate, in ascending order.

count :: Word8 -> ByteString -> Int

count returns the number of times its argument appears in the ByteString

 count = length . elemIndices

But more efficiently than using length on the intermediate list.

Ordered ByteStrings

sort :: ByteString -> ByteString

O(n) Sort a ByteString efficiently, using counting sort.

Searching ByteStrings

Searching by equality

These functions use memchr(3) to efficiently search the ByteString

elem :: Word8 -> ByteString -> Bool

O(n) elem is the ByteString membership predicate.

notElem :: Word8 -> ByteString -> Bool

O(n) notElem is the inverse of elem

filterByte :: Word8 -> ByteString -> ByteString

O(n) A first order equivalent of filter . (==), for the common case of filtering a single byte. It is more efficient to use filterByte in this case.

 filterByte == filter . (==)

filterByte is around 10x faster, and uses much less space, than its filter equivalent

filterNotByte :: Word8 -> ByteString -> ByteString

O(n) A first order equivalent of filter . (/=), for the common case of filtering a single byte out of a list. It is more efficient to use filterNotByte in this case.

 filterNotByte == filter . (/=)

filterNotByte is around 2x faster than its filter equivalent.

Searching with a predicate

filter :: (Word8 -> Bool) -> ByteString -> ByteString

O(n) filter, applied to a predicate and a ByteString, returns a ByteString containing those characters that satisfy the predicate. This function is subject to array fusion.

find :: (Word8 -> Bool) -> ByteString -> Maybe Word8

O(n) The find function takes a predicate and a ByteString, and returns the first element in matching the predicate, or Nothing if there is no such element.

 find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing

Prefixes and suffixes

These functions use memcmp(3) to efficiently compare substrings

isPrefixOf :: ByteString -> ByteString -> Bool

O(n) The isPrefixOf function takes two ByteStrings and returns True iff the first is a prefix of the second.

isSuffixOf :: ByteString -> ByteString -> Bool

O(n) The isSuffixOf function takes two ByteStrings and returns True iff the first is a suffix of the second.

The following holds:

 isSuffixOf x y == reverse x `isPrefixOf` reverse y

However, the real implemenation uses memcmp to compare the end of the string only, with no reverse required..

Search for arbitrary substrings

isSubstringOf

:: ByteString	String to search for.
-> ByteString	String to search in.
-> Bool
Check whether one string is a substring of another. `isSubstringOf p s` is equivalent to `not (null (findSubstrings p s))`.

findSubstring

:: ByteString	String to search for.
-> ByteString	String to seach in.
-> Maybe Int
Get the first index of a substring in another string, or `Nothing` if the string is not found. `findSubstring p s` is equivalent to `listToMaybe (findSubstrings p s)`.

findSubstrings

:: ByteString	String to search for.
-> ByteString	String to seach in.
-> [Int]
Find the indexes of all (possibly overlapping) occurances of a substring in a string. This function uses the Knuth-Morris-Pratt string matching algorithm.

Zipping and unzipping ByteStrings

zip :: ByteString -> ByteString -> [(Word8, Word8)]

O(n) zip takes two ByteStrings and returns a list of corresponding pairs of bytes. If one input ByteString is short, excess elements of the longer ByteString are discarded. This is equivalent to a pair of unpack operations.

zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]

zipWith generalises zip by zipping with the function given as the first argument, instead of a tupling function. For example, zipWith (+) is applied to two ByteStrings to produce the list of corresponding sums.

unzip :: [(Word8, Word8)] -> (ByteString, ByteString)

O(n) unzip transforms a list of pairs of bytes into a pair of ByteStrings. Note that this performs two pack operations.

Unchecked access

unsafeHead :: ByteString -> Word8

The sortBy function is the non-overloaded version of sort.

Try some linear sorts: radix, counting Or mergesort.

sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString sortBy f ps = undefined

A variety of head for non-empty ByteStrings. unsafeHead omits the check for the empty case, so there is an obligation on the programmer to provide a proof that the ByteString is non-empty.

unsafeTail :: ByteString -> ByteString

A variety of tail for non-empty ByteStrings. unsafeTail omits the check for the empty case. As with unsafeHead, the programmer must provide a separate proof that the ByteString is non-empty.

unsafeIndex :: ByteString -> Int -> Word8

Unsafe ByteString index (subscript) operator, starting from 0, returning a Word8 This omits the bounds check, which means there is an accompanying obligation on the programmer to ensure the bounds are checked in some other way.

Low level introduction and elimination

generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString

Given the maximum size needed and a function to make the contents of a ByteString, generate makes the ByteString. The generating function is required to return the actual final size (<= the maximum size), and the resulting byte array is realloced to this size. The string is padded at the end with a null byte.

generate is the main mechanism for creating custom, efficient ByteString functions, using Haskell or C functions to fill the space.

create :: Int -> (Ptr Word8 -> IO ()) -> ByteString

A way of creating ForeignPtrs outside the IO monad. The Int argument gives the final size of the ByteString. Unlike generate the ByteString is not reallocated if the final size is less than the estimated size. Also, unlike generate ByteString's created this way are managed on the Haskell heap.

fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString

O(1) Build a ByteString from a ForeignPtr

toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)

O(1) Deconstruct a ForeignPtr from a ByteString

skipIndex :: ByteString -> Int

O(1) skipIndex returns the internal skipped index of the current ByteString from any larger string it was created from, as an Int.

Packing CStrings and pointers

packCString :: CString -> ByteString

O(n) Build a ByteString from a CString. This value will have no finalizer associated to it. The ByteString length is calculated using strlen(3), and thus the complexity is a O(n).

packCStringLen :: CStringLen -> ByteString

O(1) Build a ByteString from a CStringLen. This value will have no finalizer associated with it. This operation has O(1) complexity as we already know the final size, so no strlen(3) is required.

packMallocCString :: CString -> ByteString

O(n) Build a ByteString from a malloced CString. This value will have a free(3) finalizer associated to it.

packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString

O(1) Construct a ByteString given a C Ptr Word8 buffer, a length, and an IO action representing a finalizer. This function is not available on Hugs.

packAddress :: Addr# -> ByteString

O(n) Pack a null-terminated sequence of bytes, pointed to by an Addr# (an arbitrary machine address assumed to point outside the garbage-collected heap) into a ByteString. A much faster way to create an Addr# is with an unboxed string literal, than to pack a boxed string. A unboxed string literal is compiled to a static char [] by GHC. Establishing the length of the string requires a call to strlen(3), so the Addr# must point to a null-terminated buffer (as is the case with string# literals in GHC). Use unsafePackAddress if you know the length of the string statically.

An example:

 literalFS = packAddress "literal"#

unsafePackAddress :: Int -> Addr# -> ByteString

O(1) unsafePackAddress provides constant-time construction of ByteStrings -- which is ideal for string literals. It packs a null-terminated sequence of bytes into a ByteString, given a raw Addr to the string, and the length of the string. Make sure the length is correct, otherwise use the safer packAddress (where the length will be calculated once at runtime).

unsafeFinalize :: ByteString -> IO ()

Explicitly run the finaliser associated with a ByteString. Further references to this value may generate invalid memory references. This operation is unsafe, as there may be other ByteStrings referring to the same underlying pages. If you use this, you need to have a proof of some kind that all ByteStrings ever generated from the underlying byte array are no longer live.

Using ByteStrings as CStrings

useAsCString :: ByteString -> (CString -> IO a) -> IO a

O(n) construction Use a ByteString with a function requiring a null-terminated CString. The CString should not be freed afterwards. This is a memcpy(3).

unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a

O(1) construction Use a ByteString with a function requiring a CString. Warning: modifying the CString will affect the ByteString. Why is this function unsafe? It relies on the null byte at the end of the ByteString to be there. This is not the case if your ByteString has been spliced from a larger string (i.e. with take or drop). Unless you can guarantee the null byte, you should use the safe version, which will copy the string first.

unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a

O(1) construction Use a ByteString with a function requiring a CStringLen. Warning: modifying the CStringLen will affect the ByteString. This is analogous to unsafeUseAsCString, and comes with the same safety requirements.

Copying ByteStrings

These functions perform memcpy(3) operations

copy :: ByteString -> ByteString

O(n) Make a copy of the ByteString with its own storage. This is mainly useful to allow the rest of the data pointed to by the ByteString to be garbage collected, for example if a large string has been read in, and only a small part of it is needed in the rest of the program.

copyCString :: CString -> ByteString

O(n) Duplicate a CString as a ByteString. Useful if you know the CString is going to be deallocated from C land.

copyCStringLen :: CStringLen -> ByteString

O(n) Same as copyCString, but saves a strlen call when the length is known.

I/O with ByteStrings

Standard input and output

getLine :: IO ByteString

getLine, read a line from stdin.

getContents :: IO ByteString

getContents. Equivalent to hGetContents stdin

putStr :: ByteString -> IO ()

Write a ByteString to stdout

putStrLn :: ByteString -> IO ()

Write a ByteString to stdout, appending a newline byte

Files

readFile :: FilePath -> IO ByteString

Read an entire file directly into a ByteString. This is far more efficient than reading the characters into a String and then using pack. It also may be more efficient than opening the file and reading it using hGet.

writeFile :: FilePath -> ByteString -> IO ()

Write a ByteString to a file.

I/O with Handles

getArgs :: IO [ByteString]

A ByteString equivalent for getArgs. More efficient for large argument lists

hGetLine :: Handle -> IO ByteString

hGetLine. read a ByteString from a handle

hGetNonBlocking :: Handle -> Int -> IO ByteString

hGetNonBlocking is identical to hGet, except that it will never block waiting for data to become available, instead it returns only whatever data is available.

hGetContents :: Handle -> IO ByteString

Read entire handle contents into a ByteString.

As with hGet, the string representation in the file is assumed to be ISO-8859-1.

hGet :: Handle -> Int -> IO ByteString

Read a ByteString directly from the specified Handle. This is far more efficient than reading the characters into a String and then using pack.

hPut :: Handle -> ByteString -> IO ()

Outputs a ByteString to the specified Handle.

Fusion utilities

unpackList :: ByteString -> [Word8]

noAL :: NoAL

No accumulator

data NoAL

Data type for accumulators which can be ignored. The rewrite rules rely on the fact that no bottoms of this type are ever constructed; hence, we can assume (_ :: NoAL) seq x = x.

loopArr :: (ByteString, acc) -> ByteString

Projection functions that are fusion friendly (as in, we determine when they are inlined)

loopAcc :: (ByteString, acc) -> acc

loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)

loopU

:: (acc -> Word8 -> (acc, Maybe Word8))	mapping & folding, once per elem
-> acc	initial acc value
-> ByteString	input ByteString
-> (ByteString, acc)
Iteration over over ByteStrings

mapEFL :: (Word8 -> Word8) -> NoAL -> Word8 -> (NoAL, Maybe Word8)

Special forms of loop arguments

These are common special cases for the three function arguments of gen and loop; we give them special names to make it easier to trigger RULES applying in the special cases represented by these arguments. The INLINE [1] makes sure that these functions are only inlined in the last two simplifier phases.
In the case where the accumulator is not needed, it is better to always explicitly return a value `()', rather than just copy the input to the output, as the former gives GHC better local information.

Element function expressing a mapping only

filterEFL :: (Word8 -> Bool) -> NoAL -> Word8 -> (NoAL, Maybe Word8)

Element function implementing a filter function only

foldEFL :: (acc -> Word8 -> acc) -> acc -> Word8 -> (acc, Maybe Word8)

Element function expressing a reduction only

fuseEFL :: (a1 -> Word8 -> (a1, Maybe Word8)) -> (a2 -> Word8 -> (a2, Maybe Word8)) -> (a1, a2) -> Word8 -> ((a1, a2), Maybe Word8)

Fuse to flat loop functions

filterF :: (Word8 -> Bool) -> ByteString -> ByteString

O(n) filterF is a non-fuseable version of filter, that may be around 2x faster for some one-shot applications.

mapF :: (Word8 -> Word8) -> ByteString -> ByteString

O(n) Like map, but not fuseable. The benefit is that it is slightly faster for one-shot cases.

Produced by Haddock version 0.7