Haskell Hierarchical Libraries (base package)ContentsIndex
Data.ByteString
Contents
The ByteString type
Introducing and eliminating ByteStrings
Basic interface
Special ByteStrings
Transformating ByteStrings
Reducing ByteStrings
Special folds
Generating and unfolding ByteStrings
Substrings
Breaking strings
Breaking and dropping on specific bytes
Breaking into many substrings
Joining strings
Indexing ByteStrings
Ordered ByteStrings
Searching ByteStrings
Searching by equality
Searching with a predicate
Prefixes and suffixes
Search for arbitrary substrings
Zipping and unzipping ByteStrings
Unchecked access
Low level introduction and elimination
Packing CStrings and pointers
Using ByteStrings as CStrings
Copying ByteStrings
I/O with ByteStrings
Standard input and output
Files
I/O with Handles
Fusion utilities
Description

A time and space-efficient implementation of byte vectors using packed Word8 arrays, suitable for high performance use, both in terms of large data quantities, or high speed requirements. Byte vectors are encoded as strict Word8 arrays of bytes, held in a ForeignPtr, and can be passed between C and Haskell with little effort.

This module is intended to be imported qualified, to avoid name clashes with Prelude functions. eg.

 import qualified Data.ByteString as B

Original GHC implementation by Bryan O'Sullivan. Rewritten to use UArray by Simon Marlow. Rewritten to support slices and use ForeignPtr by David Roundy. Polished and extended by Don Stewart.

Synopsis
data ByteString = PS !(ForeignPtr Word8) !Int !Int
empty :: ByteString
packByte :: Word8 -> ByteString
pack :: [Word8] -> ByteString
unpack :: ByteString -> [Word8]
packWith :: (a -> Word8) -> [a] -> ByteString
unpackWith :: (Word8 -> a) -> ByteString -> [a]
cons :: Word8 -> ByteString -> ByteString
snoc :: ByteString -> Word8 -> ByteString
null :: ByteString -> Bool
length :: ByteString -> Int
head :: ByteString -> Word8
tail :: ByteString -> ByteString
last :: ByteString -> Word8
init :: ByteString -> ByteString
append :: ByteString -> ByteString -> ByteString
inits :: ByteString -> [ByteString]
tails :: ByteString -> [ByteString]
elems :: ByteString -> [ByteString]
map :: (Word8 -> Word8) -> ByteString -> ByteString
reverse :: ByteString -> ByteString
intersperse :: Word8 -> ByteString -> ByteString
transpose :: [ByteString] -> [ByteString]
foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
concat :: [ByteString] -> ByteString
concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
any :: (Word8 -> Bool) -> ByteString -> Bool
all :: (Word8 -> Bool) -> ByteString -> Bool
maximum :: ByteString -> Word8
minimum :: ByteString -> Word8
mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
replicate :: Int -> Word8 -> ByteString
unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
take :: Int -> ByteString -> ByteString
drop :: Int -> ByteString -> ByteString
splitAt :: Int -> ByteString -> (ByteString, ByteString)
takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
breakFirst :: Word8 -> ByteString -> Maybe (ByteString, ByteString)
breakLast :: Word8 -> ByteString -> Maybe (ByteString, ByteString)
split :: Word8 -> ByteString -> [ByteString]
splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
group :: ByteString -> [ByteString]
groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
join :: ByteString -> [ByteString] -> ByteString
joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
index :: ByteString -> Int -> Word8
elemIndex :: Word8 -> ByteString -> Maybe Int
elemIndices :: Word8 -> ByteString -> [Int]
elemIndexLast :: Word8 -> ByteString -> Maybe Int
findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
count :: Word8 -> ByteString -> Int
sort :: ByteString -> ByteString
elem :: Word8 -> ByteString -> Bool
notElem :: Word8 -> ByteString -> Bool
filterByte :: Word8 -> ByteString -> ByteString
filterNotByte :: Word8 -> ByteString -> ByteString
filter :: (Word8 -> Bool) -> ByteString -> ByteString
find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
isPrefixOf :: ByteString -> ByteString -> Bool
isSuffixOf :: ByteString -> ByteString -> Bool
isSubstringOf :: ByteString -> ByteString -> Bool
findSubstring :: ByteString -> ByteString -> Maybe Int
findSubstrings :: ByteString -> ByteString -> [Int]
zip :: ByteString -> ByteString -> [(Word8, Word8)]
zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
unzip :: [(Word8, Word8)] -> (ByteString, ByteString)
unsafeHead :: ByteString -> Word8
unsafeTail :: ByteString -> ByteString
unsafeIndex :: ByteString -> Int -> Word8
generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
skipIndex :: ByteString -> Int
packCString :: CString -> ByteString
packCStringLen :: CStringLen -> ByteString
packMallocCString :: CString -> ByteString
packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
packAddress :: Addr# -> ByteString
unsafePackAddress :: Int -> Addr# -> ByteString
unsafeFinalize :: ByteString -> IO ()
useAsCString :: ByteString -> (CString -> IO a) -> IO a
unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
copy :: ByteString -> ByteString
copyCString :: CString -> ByteString
copyCStringLen :: CStringLen -> ByteString
getLine :: IO ByteString
getContents :: IO ByteString
putStr :: ByteString -> IO ()
putStrLn :: ByteString -> IO ()
readFile :: FilePath -> IO ByteString
writeFile :: FilePath -> ByteString -> IO ()
getArgs :: IO [ByteString]
hGetLine :: Handle -> IO ByteString
hGetNonBlocking :: Handle -> Int -> IO ByteString
hGetContents :: Handle -> IO ByteString
hGet :: Handle -> Int -> IO ByteString
hPut :: Handle -> ByteString -> IO ()
unpackList :: ByteString -> [Word8]
noAL :: NoAL
data NoAL
loopArr :: (ByteString, acc) -> ByteString
loopAcc :: (ByteString, acc) -> acc
loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)
loopU :: (acc -> Word8 -> (acc, Maybe Word8)) -> acc -> ByteString -> (ByteString, acc)
mapEFL :: (Word8 -> Word8) -> NoAL -> Word8 -> (NoAL, Maybe Word8)
filterEFL :: (Word8 -> Bool) -> NoAL -> Word8 -> (NoAL, Maybe Word8)
foldEFL :: (acc -> Word8 -> acc) -> acc -> Word8 -> (acc, Maybe Word8)
fuseEFL :: (a1 -> Word8 -> (a1, Maybe Word8)) -> (a2 -> Word8 -> (a2, Maybe Word8)) -> (a1, a2) -> Word8 -> ((a1, a2), Maybe Word8)
filterF :: (Word8 -> Bool) -> ByteString -> ByteString
mapF :: (Word8 -> Word8) -> ByteString -> ByteString
The ByteString type
data ByteString

A space-efficient representation of a Word8 vector, supporting many efficient operations. A ByteString contains 8-bit characters only.

Instances of Eq, Ord, Read, Show, Data, Typeable

Constructors
PS !(ForeignPtr Word8) !Int !Int
show/hide Instances
Introducing and eliminating ByteStrings
empty :: ByteString
O(1) The empty ByteString
packByte :: Word8 -> ByteString
O(1) Convert a Word8 into a ByteString
pack :: [Word8] -> ByteString

O(n) Convert a '[Word8]' into a ByteString.

For applications with large numbers of string literals, pack can be a bottleneck. In such cases, consider using packAddress (GHC only).

unpack :: ByteString -> [Word8]
O(n) Converts a ByteString to a '[Word8]'.
packWith :: (a -> Word8) -> [a] -> ByteString
O(n) Convert a '[a]' into a ByteString using some conversion function
unpackWith :: (Word8 -> a) -> ByteString -> [a]
O(n) Converts a ByteString to a '[a]', using a conversion function.
Basic interface
cons :: Word8 -> ByteString -> ByteString
O(n) cons is analogous to (:) for lists, but of different complexity, as it requires a memcpy.
snoc :: ByteString -> Word8 -> ByteString
O(n) Append a byte to the end of a ByteString
null :: ByteString -> Bool
O(1) Test whether a ByteString is empty.
length :: ByteString -> Int
O(1) length returns the length of a ByteString as an Int.
head :: ByteString -> Word8
O(1) Extract the first element of a ByteString, which must be non-empty.
tail :: ByteString -> ByteString
O(1) Extract the elements after the head of a ByteString, which must be non-empty.
last :: ByteString -> Word8
O(1) Extract the last element of a ByteString, which must be finite and non-empty.
init :: ByteString -> ByteString
O(1) Return all the elements of a ByteString except the last one.
append :: ByteString -> ByteString -> ByteString
O(n) Append two ByteStrings
Special ByteStrings
inits :: ByteString -> [ByteString]
O(n) Return all initial segments of the given ByteString, shortest first.
tails :: ByteString -> [ByteString]
O(n) Return all final segments of the given ByteString, longest first.
elems :: ByteString -> [ByteString]
O(n) breaks a ByteString to a list of ByteStrings, one byte each.
Transformating ByteStrings
map :: (Word8 -> Word8) -> ByteString -> ByteString
O(n) map f xs is the ByteString obtained by applying f to each element of xs. This function is subject to array fusion.
reverse :: ByteString -> ByteString
O(n) reverse xs efficiently returns the elements of xs in reverse order.
intersperse :: Word8 -> ByteString -> ByteString
O(n) The intersperse function takes a Word8 and a ByteString and `intersperses' that byte between the elements of the ByteString. It is analogous to the intersperse function on Lists.
transpose :: [ByteString] -> [ByteString]
The transpose function transposes the rows and columns of its ByteString argument.
Reducing ByteStrings
foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
foldl, applied to a binary operator, a starting value (typically the left-identity of the operator), and a ByteString, reduces the ByteString using the binary operator, from left to right. This function is subject to array fusion.
foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
foldr, applied to a binary operator, a starting value (typically the right-identity of the operator), and a ByteString, reduces the ByteString using the binary operator, from right to left.
foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
foldl1 is a variant of foldl that has no starting value argument, and thus must be applied to non-empty ByteStrings. This function is subject to array fusion.
foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
foldr1 is a variant of foldr that has no starting value argument, and thus must be applied to non-empty ByteStrings
Special folds
concat :: [ByteString] -> ByteString
O(n) Concatenate a list of ByteStrings.
concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
Map a function over a ByteString and concatenate the results
any :: (Word8 -> Bool) -> ByteString -> Bool
O(n) Applied to a predicate and a ByteString, any determines if any element of the ByteString satisfies the predicate.
all :: (Word8 -> Bool) -> ByteString -> Bool
O(n) Applied to a predicate and a ByteString, all determines if all elements of the ByteString satisfy the predicate.
maximum :: ByteString -> Word8
O(n) maximum returns the maximum value from a ByteString
minimum :: ByteString -> Word8
O(n) minimum returns the minimum value from a ByteString
mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
O(n) map Word8 functions, provided with the index at each position
Generating and unfolding ByteStrings
replicate :: Int -> Word8 -> ByteString

O(n) replicate n x is a ByteString of length n with x the value of every element. The following holds:

 replicate w c = unfoldr w (\u -> Just (u,u)) c

This implemenation uses memset(3)

unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString

O(n) The unfoldrN function is analogous to the List 'unfoldr'. unfoldrN builds a ByteString from a seed value. The function takes the element and returns Nothing if it is done producing the ByteString or returns Just (a,b), in which case, a is a prepending to the ByteString and b is used as the next element in a recursive call.

To preven unfoldrN having O(n^2) complexity (as prepending a character to a ByteString is O(n), this unfoldr requires a maximum final size of the ByteString as an argument. cons can then be implemented in O(1) (i.e. a poke), and the unfoldr itself has linear complexity. The depth of the recursion is limited to this size, but may be less. For lazy, infinite unfoldr, use unfoldr (from List).

Examples:

 unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"

The following equation connects the depth-limited unfoldr to the List unfoldr:

 unfoldrN n == take n $ List.unfoldr
Substrings
Breaking strings
take :: Int -> ByteString -> ByteString
O(1) take n, applied to a ByteString xs, returns the prefix of xs of length n, or xs itself if n > length xs.
drop :: Int -> ByteString -> ByteString
O(1) drop n xs returns the suffix of xs after the first n elements, or [] if n > length xs.
splitAt :: Int -> ByteString -> (ByteString, ByteString)
O(1) splitAt n xs is equivalent to (take n xs, drop n xs).
takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
takeWhile, applied to a predicate p and a ByteString xs, returns the longest prefix (possibly empty) of xs of elements that satisfy p.
dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
dropWhile p xs returns the suffix remaining after takeWhile p xs.
break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
break p is equivalent to span (not . p).
span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
span p xs breaks the ByteString into two segments. It is equivalent to (takeWhile p xs, dropWhile p xs)
spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)

spanEnd behaves like span but from the end of the ByteString. We have

 spanEnd (not.isSpace) "x y z" == ("x y ","z")

and

 spanEnd (not . isSpace) ps
    == 
 let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x) 
Breaking and dropping on specific bytes
breakByte :: Word8 -> ByteString -> (ByteString, ByteString)

breakByte breaks its ByteString argument at the first occurence of the specified byte. It is more efficient than break as it is implemented with memchr(3). I.e.

 break (=='c') "abcd" == breakByte 'c' "abcd"
spanByte :: Word8 -> ByteString -> (ByteString, ByteString)

spanByte breaks its ByteString argument at the first occurence of a byte other than its argument. It is more efficient than 'span (==)'

 span  (=='c') "abcd" == spanByte 'c' "abcd"
breakFirst :: Word8 -> ByteString -> Maybe (ByteString, ByteString)

O(n) breakFirst breaks the given ByteString on the first occurence of w. It behaves like break, except the delimiter is not returned, and Nothing is returned if the delimiter is not in the ByteString. I.e.

 breakFirst 'b' "aabbcc" == Just ("aa","bcc")
 breakFirst c xs ==
 let (x,y) = break (== c) xs 
 in if null y then Nothing else Just (x, drop 1 y))
breakLast :: Word8 -> ByteString -> Maybe (ByteString, ByteString)

O(n) breakLast behaves like breakFirst, but from the end of the ByteString.

 breakLast ('b') (pack "aabbcc") == Just ("aab","cc")

and the following are equivalent:

 breakLast 'c' "abcdef"
 let (x,y) = break (=='c') (reverse "abcdef") 
 in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
Breaking into many substrings
split :: Word8 -> ByteString -> [ByteString]

O(n) Break a ByteString into pieces separated by the byte argument, consuming the delimiter. I.e.

 split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
 split 'a'  "aXaXaXa"    == ["","X","X","X"]
 split 'x'  "x"          == ["",""]

and

 join [c] . split c == id
 split == splitWith . (==)

As for all splitting functions in this library, this function does not copy the substrings, it just constructs new ByteStrings that are slices of the original.

splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]

O(n) Splits a ByteString into components delimited by separators, where the predicate returns True for a separator element. The resulting components do not contain the separators. Two adjacent separators result in an empty component in the output. eg.

 splitWith (=='a') "aabbaca" == ["","","bb","c",""]
 splitWith (=='a') []        == []
tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]

Like splitWith, except that sequences of adjacent separators are treated as a single separator. eg.

 tokens (=='a') "aabbaca" == ["bb","c"]
group :: ByteString -> [ByteString]

The group function takes a ByteString and returns a list of ByteStrings such that the concatenation of the result is equal to the argument. Moreover, each sublist in the result contains only equal elements. For example,

 group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]

It is a special case of groupBy, which allows the programmer to supply their own equality test. It is about 40% faster than groupBy (==)

groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
The groupBy function is the non-overloaded version of group.
Joining strings
join :: ByteString -> [ByteString] -> ByteString
O(n) The join function takes a ByteString and a list of ByteStrings and concatenates the list after interspersing the first argument between each element of the list.
joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
O(n) joinWithByte. An efficient way to join to two ByteStrings with a char. Around 4 times faster than the generalised join.
Indexing ByteStrings
index :: ByteString -> Int -> Word8
O(1) ByteString index (subscript) operator, starting from 0.
elemIndex :: Word8 -> ByteString -> Maybe Int
O(n) The elemIndex function returns the index of the first element in the given ByteString which is equal to the query element, or Nothing if there is no such element. This implementation uses memchr(3).
elemIndices :: Word8 -> ByteString -> [Int]
O(n) The elemIndices function extends elemIndex, by returning the indices of all elements equal to the query element, in ascending order. This implementation uses memchr(3).
elemIndexLast :: Word8 -> ByteString -> Maybe Int

O(n) The elemIndexLast function returns the last index of the element in the given ByteString which is equal to the query element, or Nothing if there is no such element. The following holds:

 elemIndexLast c xs == 
 (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
The findIndex function takes a predicate and a ByteString and returns the index of the first element in the ByteString satisfying the predicate.
findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
The findIndices function extends findIndex, by returning the indices of all elements satisfying the predicate, in ascending order.
count :: Word8 -> ByteString -> Int

count returns the number of times its argument appears in the ByteString

 count = length . elemIndices

But more efficiently than using length on the intermediate list.

Ordered ByteStrings
sort :: ByteString -> ByteString
O(n) Sort a ByteString efficiently, using counting sort.
Searching ByteStrings
Searching by equality
These functions use memchr(3) to efficiently search the ByteString
elem :: Word8 -> ByteString -> Bool
O(n) elem is the ByteString membership predicate.
notElem :: Word8 -> ByteString -> Bool
O(n) notElem is the inverse of elem
filterByte :: Word8 -> ByteString -> ByteString

O(n) A first order equivalent of filter . (==), for the common case of filtering a single byte. It is more efficient to use filterByte in this case.

 filterByte == filter . (==)

filterByte is around 10x faster, and uses much less space, than its filter equivalent

filterNotByte :: Word8 -> ByteString -> ByteString

O(n) A first order equivalent of filter . (/=), for the common case of filtering a single byte out of a list. It is more efficient to use filterNotByte in this case.

 filterNotByte == filter . (/=)

filterNotByte is around 2x faster than its filter equivalent.

Searching with a predicate
filter :: (Word8 -> Bool) -> ByteString -> ByteString
O(n) filter, applied to a predicate and a ByteString, returns a ByteString containing those characters that satisfy the predicate. This function is subject to array fusion.
find :: (Word8 -> Bool) -> ByteString -> Maybe Word8

O(n) The find function takes a predicate and a ByteString, and returns the first element in matching the predicate, or Nothing if there is no such element.

 find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
Prefixes and suffixes
These functions use memcmp(3) to efficiently compare substrings
isPrefixOf :: ByteString -> ByteString -> Bool
O(n) The isPrefixOf function takes two ByteStrings and returns True iff the first is a prefix of the second.
isSuffixOf :: ByteString -> ByteString -> Bool

O(n) The isSuffixOf function takes two ByteStrings and returns True iff the first is a suffix of the second.

The following holds:

 isSuffixOf x y == reverse x `isPrefixOf` reverse y

However, the real implemenation uses memcmp to compare the end of the string only, with no reverse required..

Search for arbitrary substrings
isSubstringOf
:: ByteStringString to search for.
-> ByteStringString to search in.
-> Bool
Check whether one string is a substring of another. isSubstringOf p s is equivalent to not (null (findSubstrings p s)).
findSubstring
:: ByteStringString to search for.
-> ByteStringString to seach in.
-> Maybe Int
Get the first index of a substring in another string, or Nothing if the string is not found. findSubstring p s is equivalent to listToMaybe (findSubstrings p s).
findSubstrings
:: ByteStringString to search for.
-> ByteStringString to seach in.
-> [Int]
Find the indexes of all (possibly overlapping) occurances of a substring in a string. This function uses the Knuth-Morris-Pratt string matching algorithm.
Zipping and unzipping ByteStrings
zip :: ByteString -> ByteString -> [(Word8, Word8)]
O(n) zip takes two ByteStrings and returns a list of corresponding pairs of bytes. If one input ByteString is short, excess elements of the longer ByteString are discarded. This is equivalent to a pair of unpack operations.
zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
zipWith generalises zip by zipping with the function given as the first argument, instead of a tupling function. For example, zipWith (+) is applied to two ByteStrings to produce the list of corresponding sums.
unzip :: [(Word8, Word8)] -> (ByteString, ByteString)
O(n) unzip transforms a list of pairs of bytes into a pair of ByteStrings. Note that this performs two pack operations.
Unchecked access
unsafeHead :: ByteString -> Word8

The sortBy function is the non-overloaded version of sort.

Try some linear sorts: radix, counting Or mergesort.

sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString sortBy f ps = undefined

A variety of head for non-empty ByteStrings. unsafeHead omits the check for the empty case, so there is an obligation on the programmer to provide a proof that the ByteString is non-empty.

unsafeTail :: ByteString -> ByteString
A variety of tail for non-empty ByteStrings. unsafeTail omits the check for the empty case. As with unsafeHead, the programmer must provide a separate proof that the ByteString is non-empty.
unsafeIndex :: ByteString -> Int -> Word8
Unsafe ByteString index (subscript) operator, starting from 0, returning a Word8 This omits the bounds check, which means there is an accompanying obligation on the programmer to ensure the bounds are checked in some other way.
Low level introduction and elimination
generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString

Given the maximum size needed and a function to make the contents of a ByteString, generate makes the ByteString. The generating function is required to return the actual final size (<= the maximum size), and the resulting byte array is realloced to this size. The string is padded at the end with a null byte.

generate is the main mechanism for creating custom, efficient ByteString functions, using Haskell or C functions to fill the space.

create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
A way of creating ForeignPtrs outside the IO monad. The Int argument gives the final size of the ByteString. Unlike generate the ByteString is not reallocated if the final size is less than the estimated size. Also, unlike generate ByteString's created this way are managed on the Haskell heap.
fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
O(1) Build a ByteString from a ForeignPtr
toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
O(1) Deconstruct a ForeignPtr from a ByteString
skipIndex :: ByteString -> Int
O(1) skipIndex returns the internal skipped index of the current ByteString from any larger string it was created from, as an Int.
Packing CStrings and pointers
packCString :: CString -> ByteString
O(n) Build a ByteString from a CString. This value will have no finalizer associated to it. The ByteString length is calculated using strlen(3), and thus the complexity is a O(n).
packCStringLen :: CStringLen -> ByteString
O(1) Build a ByteString from a CStringLen. This value will have no finalizer associated with it. This operation has O(1) complexity as we already know the final size, so no strlen(3) is required.
packMallocCString :: CString -> ByteString
O(n) Build a ByteString from a malloced CString. This value will have a free(3) finalizer associated to it.
packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
O(1) Construct a ByteString given a C Ptr Word8 buffer, a length, and an IO action representing a finalizer. This function is not available on Hugs.
packAddress :: Addr# -> ByteString

O(n) Pack a null-terminated sequence of bytes, pointed to by an Addr# (an arbitrary machine address assumed to point outside the garbage-collected heap) into a ByteString. A much faster way to create an Addr# is with an unboxed string literal, than to pack a boxed string. A unboxed string literal is compiled to a static char [] by GHC. Establishing the length of the string requires a call to strlen(3), so the Addr# must point to a null-terminated buffer (as is the case with string# literals in GHC). Use unsafePackAddress if you know the length of the string statically.

An example:

 literalFS = packAddress "literal"#
unsafePackAddress :: Int -> Addr# -> ByteString
O(1) unsafePackAddress provides constant-time construction of ByteStrings -- which is ideal for string literals. It packs a null-terminated sequence of bytes into a ByteString, given a raw Addr to the string, and the length of the string. Make sure the length is correct, otherwise use the safer packAddress (where the length will be calculated once at runtime).
unsafeFinalize :: ByteString -> IO ()
Explicitly run the finaliser associated with a ByteString. Further references to this value may generate invalid memory references. This operation is unsafe, as there may be other ByteStrings referring to the same underlying pages. If you use this, you need to have a proof of some kind that all ByteStrings ever generated from the underlying byte array are no longer live.
Using ByteStrings as CStrings
useAsCString :: ByteString -> (CString -> IO a) -> IO a
O(n) construction Use a ByteString with a function requiring a null-terminated CString. The CString should not be freed afterwards. This is a memcpy(3).
unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
O(1) construction Use a ByteString with a function requiring a CString. Warning: modifying the CString will affect the ByteString. Why is this function unsafe? It relies on the null byte at the end of the ByteString to be there. This is not the case if your ByteString has been spliced from a larger string (i.e. with take or drop). Unless you can guarantee the null byte, you should use the safe version, which will copy the string first.
unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
O(1) construction Use a ByteString with a function requiring a CStringLen. Warning: modifying the CStringLen will affect the ByteString. This is analogous to unsafeUseAsCString, and comes with the same safety requirements.
Copying ByteStrings
These functions perform memcpy(3) operations
copy :: ByteString -> ByteString
O(n) Make a copy of the ByteString with its own storage. This is mainly useful to allow the rest of the data pointed to by the ByteString to be garbage collected, for example if a large string has been read in, and only a small part of it is needed in the rest of the program.
copyCString :: CString -> ByteString
O(n) Duplicate a CString as a ByteString. Useful if you know the CString is going to be deallocated from C land.
copyCStringLen :: CStringLen -> ByteString
O(n) Same as copyCString, but saves a strlen call when the length is known.
I/O with ByteStrings
Standard input and output
getLine :: IO ByteString
getLine, read a line from stdin.
getContents :: IO ByteString
getContents. Equivalent to hGetContents stdin
putStr :: ByteString -> IO ()
Write a ByteString to stdout
putStrLn :: ByteString -> IO ()
Write a ByteString to stdout, appending a newline byte
Files
readFile :: FilePath -> IO ByteString
Read an entire file directly into a ByteString. This is far more efficient than reading the characters into a String and then using pack. It also may be more efficient than opening the file and reading it using hGet.
writeFile :: FilePath -> ByteString -> IO ()
Write a ByteString to a file.
I/O with Handles
getArgs :: IO [ByteString]
A ByteString equivalent for getArgs. More efficient for large argument lists
hGetLine :: Handle -> IO ByteString
hGetLine. read a ByteString from a handle
hGetNonBlocking :: Handle -> Int -> IO ByteString
hGetNonBlocking is identical to hGet, except that it will never block waiting for data to become available, instead it returns only whatever data is available.
hGetContents :: Handle -> IO ByteString

Read entire handle contents into a ByteString.

As with hGet, the string representation in the file is assumed to be ISO-8859-1.

hGet :: Handle -> Int -> IO ByteString
Read a ByteString directly from the specified Handle. This is far more efficient than reading the characters into a String and then using pack.
hPut :: Handle -> ByteString -> IO ()
Outputs a ByteString to the specified Handle.
Fusion utilities
unpackList :: ByteString -> [Word8]
noAL :: NoAL
No accumulator
data NoAL
Data type for accumulators which can be ignored. The rewrite rules rely on the fact that no bottoms of this type are ever constructed; hence, we can assume (_ :: NoAL) seq x = x.
loopArr :: (ByteString, acc) -> ByteString
Projection functions that are fusion friendly (as in, we determine when they are inlined)
loopAcc :: (ByteString, acc) -> acc
loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)
loopU
:: (acc -> Word8 -> (acc, Maybe Word8))mapping & folding, once per elem
-> accinitial acc value
-> ByteStringinput ByteString
-> (ByteString, acc)
Iteration over over ByteStrings
mapEFL :: (Word8 -> Word8) -> NoAL -> Word8 -> (NoAL, Maybe Word8)

Special forms of loop arguments

  • These are common special cases for the three function arguments of gen and loop; we give them special names to make it easier to trigger RULES applying in the special cases represented by these arguments. The INLINE [1] makes sure that these functions are only inlined in the last two simplifier phases.
  • In the case where the accumulator is not needed, it is better to always explicitly return a value `()', rather than just copy the input to the output, as the former gives GHC better local information.

Element function expressing a mapping only

filterEFL :: (Word8 -> Bool) -> NoAL -> Word8 -> (NoAL, Maybe Word8)
Element function implementing a filter function only
foldEFL :: (acc -> Word8 -> acc) -> acc -> Word8 -> (acc, Maybe Word8)
Element function expressing a reduction only
fuseEFL :: (a1 -> Word8 -> (a1, Maybe Word8)) -> (a2 -> Word8 -> (a2, Maybe Word8)) -> (a1, a2) -> Word8 -> ((a1, a2), Maybe Word8)
Fuse to flat loop functions
filterF :: (Word8 -> Bool) -> ByteString -> ByteString
O(n) filterF is a non-fuseable version of filter, that may be around 2x faster for some one-shot applications.
mapF :: (Word8 -> Word8) -> ByteString -> ByteString
O(n) Like map, but not fuseable. The benefit is that it is slightly faster for one-shot cases.
Produced by Haddock version 0.7