Data.ByteString.Char8

Haskell Hierarchical Libraries (base package)

Description

Manipulate ByteStrings using Char operations. All Chars will be truncated to 8 bits. It can be expected that these functions will run at identical speeds to their Word8 equivalents in Data.ByteString.

More specifically these byte strings are taken to be in the subset of Unicode covered by code points 0-255. This covers Unicode Basic Latin, Latin-1 Supplement and C0+C1 Controls.

See:

This module is intended to be imported qualified, to avoid name clashes with Prelude functions. eg.

 import qualified Data.ByteString.Char8 as B

Synopsis

data ByteString = PS !(ForeignPtr Word8) !Int !Int

empty :: ByteString

packChar :: Char -> ByteString

pack :: String -> ByteString

unpack :: ByteString -> [Char]

cons :: Char -> ByteString -> ByteString

snoc :: ByteString -> Char -> ByteString

null :: ByteString -> Bool

length :: ByteString -> Int

head :: ByteString -> Char

tail :: ByteString -> ByteString

last :: ByteString -> Char

init :: ByteString -> ByteString

append :: ByteString -> ByteString -> ByteString

inits :: ByteString -> [ByteString]

tails :: ByteString -> [ByteString]

elems :: ByteString -> [ByteString]

map :: (Char -> Char) -> ByteString -> ByteString

reverse :: ByteString -> ByteString

intersperse :: Char -> ByteString -> ByteString

transpose :: [ByteString] -> [ByteString]

foldl :: (a -> Char -> a) -> a -> ByteString -> a

foldr :: (Char -> a -> a) -> a -> ByteString -> a

foldl1 :: (Char -> Char -> Char) -> ByteString -> Char

foldr1 :: (Char -> Char -> Char) -> ByteString -> Char

concat :: [ByteString] -> ByteString

concatMap :: (Char -> ByteString) -> ByteString -> ByteString

any :: (Char -> Bool) -> ByteString -> Bool

all :: (Char -> Bool) -> ByteString -> Bool

maximum :: ByteString -> Char

minimum :: ByteString -> Char

mapIndexed :: (Int -> Char -> Char) -> ByteString -> ByteString

replicate :: Int -> Char -> ByteString

unfoldrN :: Int -> (Char -> Maybe (Char, Char)) -> Char -> ByteString

take :: Int -> ByteString -> ByteString

drop :: Int -> ByteString -> ByteString

splitAt :: Int -> ByteString -> (ByteString, ByteString)

takeWhile :: (Char -> Bool) -> ByteString -> ByteString

dropWhile :: (Char -> Bool) -> ByteString -> ByteString

break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)

span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)

spanEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)

breakChar :: Char -> ByteString -> (ByteString, ByteString)

spanChar :: Char -> ByteString -> (ByteString, ByteString)

breakFirst :: Char -> ByteString -> Maybe (ByteString, ByteString)

breakLast :: Char -> ByteString -> Maybe (ByteString, ByteString)

breakSpace :: ByteString -> (ByteString, ByteString)

dropSpace :: ByteString -> ByteString

dropSpaceEnd :: ByteString -> ByteString

split :: Char -> ByteString -> [ByteString]

splitWith :: (Char -> Bool) -> ByteString -> [ByteString]

tokens :: (Char -> Bool) -> ByteString -> [ByteString]

group :: ByteString -> [ByteString]

groupBy :: (Char -> Char -> Bool) -> ByteString -> [ByteString]

lines :: ByteString -> [ByteString]

words :: ByteString -> [ByteString]

unlines :: [ByteString] -> ByteString

unwords :: [ByteString] -> ByteString

lines' :: ByteString -> [ByteString]

unlines' :: [ByteString] -> ByteString

linesCRLF' :: ByteString -> [ByteString]

unlinesCRLF' :: [ByteString] -> ByteString

words' :: ByteString -> [ByteString]

unwords' :: [ByteString] -> ByteString

lineIndices :: ByteString -> [Int]

betweenLines :: ByteString -> ByteString -> ByteString -> Maybe ByteString

join :: ByteString -> [ByteString] -> ByteString

joinWithChar :: Char -> ByteString -> ByteString -> ByteString

index :: ByteString -> Int -> Char

elemIndex :: Char -> ByteString -> Maybe Int

elemIndexLast :: Char -> ByteString -> Maybe Int

elemIndices :: Char -> ByteString -> [Int]

findIndex :: (Char -> Bool) -> ByteString -> Maybe Int

findIndices :: (Char -> Bool) -> ByteString -> [Int]

count :: Char -> ByteString -> Int

sort :: ByteString -> ByteString

elem :: Char -> ByteString -> Bool

notElem :: Char -> ByteString -> Bool

filterChar :: Char -> ByteString -> ByteString

filterNotChar :: Char -> ByteString -> ByteString

filter :: (Char -> Bool) -> ByteString -> ByteString

find :: (Char -> Bool) -> ByteString -> Maybe Char

isPrefixOf :: ByteString -> ByteString -> Bool

isSuffixOf :: ByteString -> ByteString -> Bool

isSubstringOf :: ByteString -> ByteString -> Bool

findSubstring :: ByteString -> ByteString -> Maybe Int

findSubstrings :: ByteString -> ByteString -> [Int]

zip :: ByteString -> ByteString -> [(Char, Char)]

zipWith :: (Char -> Char -> a) -> ByteString -> ByteString -> [a]

unzip :: [(Char, Char)] -> (ByteString, ByteString)

unsafeHead :: ByteString -> Char

unsafeTail :: ByteString -> ByteString

unsafeIndex :: ByteString -> Int -> Char

w2c :: Word8 -> Char

c2w :: Char -> Word8

readInt :: ByteString -> Maybe (Int, ByteString)

unsafeReadInt :: ByteString -> Maybe (Int, ByteString)

copy :: ByteString -> ByteString

getLine :: IO ByteString

getContents :: IO ByteString

putStr :: ByteString -> IO ()

putStrLn :: ByteString -> IO ()

readFile :: FilePath -> IO ByteString

writeFile :: FilePath -> ByteString -> IO ()

getArgs :: IO [ByteString]

hGetLine :: Handle -> IO ByteString

hGetNonBlocking :: Handle -> Int -> IO ByteString

hGetContents :: Handle -> IO ByteString

hGet :: Handle -> Int -> IO ByteString

hPut :: Handle -> ByteString -> IO ()

packAddress :: Addr# -> ByteString

unsafePackAddress :: Int -> Addr# -> ByteString

unpackList :: ByteString -> [Word8]

noAL :: NoAL

data NoAL

loopArr :: (ByteString, acc) -> ByteString

loopAcc :: (ByteString, acc) -> acc

loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)

loopU :: (acc -> Word8 -> (acc, Maybe Word8)) -> acc -> ByteString -> (ByteString, acc)

mapEFL :: (Word8 -> Word8) -> NoAL -> Word8 -> (NoAL, Maybe Word8)

filterEFL :: (Word8 -> Bool) -> NoAL -> Word8 -> (NoAL, Maybe Word8)

foldEFL :: (acc -> Word8 -> acc) -> acc -> Word8 -> (acc, Maybe Word8)

fuseEFL :: (a1 -> Word8 -> (a1, Maybe Word8)) -> (a2 -> Word8 -> (a2, Maybe Word8)) -> (a1, a2) -> Word8 -> ((a1, a2), Maybe Word8)

filterF :: (Char -> Bool) -> ByteString -> ByteString

mapF :: (Char -> Char) -> ByteString -> ByteString

The ByteString type

data ByteString

A space-efficient representation of a Word8 vector, supporting many efficient operations. A ByteString contains 8-bit characters only.

Instances of Eq, Ord, Read, Show, Data, Typeable

Constructors

PS !(ForeignPtr Word8) !Int !Int

Instances

Introducing and eliminating ByteStrings

empty :: ByteString

O(1) The empty ByteString

packChar :: Char -> ByteString

O(1) Convert a Char into a ByteString

pack :: String -> ByteString

O(n) Convert a String into a ByteString

For applications with large numbers of string literals, pack can be a bottleneck. In such cases, consider using packAddress (GHC only).

unpack :: ByteString -> [Char]

O(n) Converts a ByteString to a String.

Basic interface

cons :: Char -> ByteString -> ByteString

O(n) cons is analogous to (:) for lists, but of different complexity, as it requires a memcpy.

snoc :: ByteString -> Char -> ByteString

O(n) Append a Char to the end of a ByteString. Similar to cons, this function performs a memcpy.

null :: ByteString -> Bool

O(1) Test whether a ByteString is empty.

length :: ByteString -> Int

O(1) length returns the length of a ByteString as an Int.

head :: ByteString -> Char

O(1) Extract the first element of a ByteString, which must be non-empty.

tail :: ByteString -> ByteString

O(1) Extract the elements after the head of a ByteString, which must be non-empty.

last :: ByteString -> Char

O(1) Extract the last element of a packed string, which must be non-empty.

init :: ByteString -> ByteString

O(1) Return all the elements of a ByteString except the last one.

append :: ByteString -> ByteString -> ByteString

O(n) Append two ByteStrings

Special ByteStrings

inits :: ByteString -> [ByteString]

O(n) Return all initial segments of the given ByteString, shortest first.

tails :: ByteString -> [ByteString]

O(n) Return all final segments of the given ByteString, longest first.

elems :: ByteString -> [ByteString]

O(n) breaks a ByteString to a list of ByteStrings, one byte each.

Transformating ByteStrings

map :: (Char -> Char) -> ByteString -> ByteString

O(n) map f xs is the ByteString obtained by applying f to each element of xs

reverse :: ByteString -> ByteString

O(n) reverse xs efficiently returns the elements of xs in reverse order.

intersperse :: Char -> ByteString -> ByteString

O(n) The intersperse function takes a Char and a ByteString and `intersperses' that Char between the elements of the ByteString. It is analogous to the intersperse function on Lists.

transpose :: [ByteString] -> [ByteString]

The transpose function transposes the rows and columns of its ByteString argument.

Reducing ByteStrings

foldl :: (a -> Char -> a) -> a -> ByteString -> a

foldl, applied to a binary operator, a starting value (typically the left-identity of the operator), and a ByteString, reduces the ByteString using the binary operator, from left to right.

foldr :: (Char -> a -> a) -> a -> ByteString -> a

foldr, applied to a binary operator, a starting value (typically the right-identity of the operator), and a packed string, reduces the packed string using the binary operator, from right to left.

foldl1 :: (Char -> Char -> Char) -> ByteString -> Char

foldl1 is a variant of foldl that has no starting value argument, and thus must be applied to non-empty ByteStrings.

foldr1 :: (Char -> Char -> Char) -> ByteString -> Char

foldr1 is a variant of foldr that has no starting value argument, and thus must be applied to non-empty ByteStrings

Special folds

concat :: [ByteString] -> ByteString

O(n) Concatenate a list of ByteStrings.

concatMap :: (Char -> ByteString) -> ByteString -> ByteString

Map a function over a ByteString and concatenate the results

any :: (Char -> Bool) -> ByteString -> Bool

Applied to a predicate and a ByteString, any determines if any element of the ByteString satisfies the predicate.

all :: (Char -> Bool) -> ByteString -> Bool

Applied to a predicate and a ByteString, all determines if all elements of the ByteString satisfy the predicate.

maximum :: ByteString -> Char

maximum returns the maximum value from a ByteString

minimum :: ByteString -> Char

minimum returns the minimum value from a ByteString

mapIndexed :: (Int -> Char -> Char) -> ByteString -> ByteString

O(n) map Char functions, provided with the index at each position

Generating and unfolding ByteStrings

replicate :: Int -> Char -> ByteString

O(n) replicate n x is a ByteString of length n with x the value of every element. The following holds:

 replicate w c = unfoldr w (\u -> Just (u,u)) c

This implemenation uses memset(3)

unfoldrN :: Int -> (Char -> Maybe (Char, Char)) -> Char -> ByteString

O(n) The unfoldrN function is analogous to the List 'unfoldr'. unfoldrN builds a ByteString from a seed value. The function takes the element and returns Nothing if it is done producing the ByteString or returns Just (a,b), in which case, a is a prepending to the ByteString and b is used as the next element in a recursive call.

To preven unfoldrN having O(n^2) complexity (as prepending a character to a ByteString is O(n), this unfoldr requires a maximum final size of the ByteString as an argument. cons can then be implemented in O(1) (i.e. a poke), and the unfoldr itself has linear complexity. The depth of the recursion is limited to this size, but may be less. For lazy, infinite unfoldr, use unfoldr (from List).

Examples:

 unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"

The following equation connects the depth-limited unfoldr to the List unfoldr:

 unfoldrN n == take n $ List.unfoldr

Substrings

Breaking strings

take :: Int -> ByteString -> ByteString

O(1) take n, applied to a ByteString xs, returns the prefix of xs of length n, or xs itself if n > length xs.

drop :: Int -> ByteString -> ByteString

O(1) drop n xs returns the suffix of xs after the first n elements, or [] if n > length xs.

splitAt :: Int -> ByteString -> (ByteString, ByteString)

O(1) splitAt n xs is equivalent to (take n xs, drop n xs).

takeWhile :: (Char -> Bool) -> ByteString -> ByteString

takeWhile, applied to a predicate p and a ByteString xs, returns the longest prefix (possibly empty) of xs of elements that satisfy p.

dropWhile :: (Char -> Bool) -> ByteString -> ByteString

dropWhile p xs returns the suffix remaining after takeWhile p xs.

break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)

break p is equivalent to span (not . p).

span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)

span p xs breaks the ByteString into two segments. It is equivalent to (takeWhile p xs, dropWhile p xs)

spanEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)

spanEnd behaves like span but from the end of the ByteString. We have

 spanEnd (not.isSpace) "x y z" == ("x y ","z")

and

 spanEnd (not . isSpace) ps
    == 
 let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)

Breaking and dropping on specific Chars

breakChar :: Char -> ByteString -> (ByteString, ByteString)

breakChar breaks its ByteString argument at the first occurence of the specified Char. It is more efficient than break as it is implemented with memchr(3). I.e.

 break (=='c') "abcd" == breakChar 'c' "abcd"

spanChar :: Char -> ByteString -> (ByteString, ByteString)

spanChar breaks its ByteString argument at the first occurence of a Char other than its argument. It is more efficient than 'span (==)'

 span  (=='c') "abcd" == spanByte 'c' "abcd"

breakFirst :: Char -> ByteString -> Maybe (ByteString, ByteString)

O(n) breakFirst breaks the given ByteString on the first occurence of w. It behaves like break, except the delimiter is not returned, and Nothing is returned if the delimiter is not in the ByteString. I.e.

 breakFirst 'b' "aabbcc" == Just ("aa","bcc")

 breakFirst c xs ==
 let (x,y) = break (== c) xs 
 in if null y then Nothing else Just (x, drop 1 y))

breakLast :: Char -> ByteString -> Maybe (ByteString, ByteString)

O(n) breakLast behaves like breakFirst, but from the end of the ByteString.

 breakLast ('b') (pack "aabbcc") == Just ("aab","cc")

and the following are equivalent:

 breakLast 'c' "abcdef"
 let (x,y) = break (=='c') (reverse "abcdef") 
 in if null x then Nothing else Just (reverse (drop 1 y), reverse x)

breakSpace :: ByteString -> (ByteString, ByteString)

breakSpace returns the pair of ByteStrings when the argument is broken at the first whitespace byte. I.e.

 break isSpace == breakSpace

dropSpace :: ByteString -> ByteString

dropSpace efficiently returns the ByteString argument with white space Chars removed from the front. It is more efficient than calling dropWhile for removing whitespace. I.e.

 dropWhile isSpace == dropSpace

dropSpaceEnd :: ByteString -> ByteString

dropSpaceEnd efficiently returns the ByteString argument with white space removed from the end. I.e.

 reverse . (dropWhile isSpace) . reverse == dropSpaceEnd

but it is more efficient than using multiple reverses.

Breaking into many substrings

split :: Char -> ByteString -> [ByteString]

O(n) Break a ByteString into pieces separated by the byte argument, consuming the delimiter. I.e.

 split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
 split 'a'  "aXaXaXa"    == ["","X","X","X"]
 split 'x'  "x"          == ["",""]

and

 join [c] . split c == id
 split == splitWith . (==)

As for all splitting functions in this library, this function does not copy the substrings, it just constructs new ByteStrings that are slices of the original.

splitWith :: (Char -> Bool) -> ByteString -> [ByteString]

O(n) Splits a ByteString into components delimited by separators, where the predicate returns True for a separator element. The resulting components do not contain the separators. Two adjacent separators result in an empty component in the output. eg.

 splitWith (=='a') "aabbaca" == ["","","bb","c",""]

tokens :: (Char -> Bool) -> ByteString -> [ByteString]

Like splitWith, except that sequences of adjacent separators are treated as a single separator. eg.

 tokens (=='a') "aabbaca" == ["bb","c"]

group :: ByteString -> [ByteString]

The group function takes a ByteString and returns a list of ByteStrings such that the concatenation of the result is equal to the argument. Moreover, each sublist in the result contains only equal elements. For example,

 group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]

It is a special case of groupBy, which allows the programmer to supply their own equality test. It is about 40% faster than groupBy (==)

groupBy :: (Char -> Char -> Bool) -> ByteString -> [ByteString]

The groupBy function is the non-overloaded version of group.

Breaking into lines and words

lines :: ByteString -> [ByteString]

lines breaks a ByteString up into a list of ByteStrings at newline Chars. The resulting strings do not contain newlines.

words :: ByteString -> [ByteString]

words breaks a ByteString up into a list of words, which were delimited by Chars representing white space. And

 tokens isSpace = words

unlines :: [ByteString] -> ByteString

unlines is an inverse operation to lines. It joins lines, after appending a terminating newline to each.

unwords :: [ByteString] -> ByteString

The unwords function is analogous to the unlines function, on words.

lines' :: ByteString -> [ByteString]

lines behaves like lines, in that it breaks a ByteString on newline Chars. However, unlike the Prelude functions, lines and unlines correctly reconstruct lines that are missing terminating newlines characters. I.e.

 unlines  (lines "a\nb\nc")  == "a\nb\nc\n"
 unlines' (lines' "a\nb\nc") == "a\nb\nc"

Note that this means:

 lines  "a\nb\nc\n" == ["a","b","c"]
 lines' "a\nb\nc\n" == ["a","b","c",""]

unlines' :: [ByteString] -> ByteString

unlines behaves like unlines, except that it also correctly retores lines that do not have terminating newlines (see the description for lines).

linesCRLF' :: ByteString -> [ByteString]

linesCRLF behaves like lines, but breaks on (\cr?\lf)

unlinesCRLF' :: [ByteString] -> ByteString

unlines behaves like unlines, except that it also correctly retores lines that do not have terminating newlines (see the description for lines). Uses CRLF instead of LF.

words' :: ByteString -> [ByteString]

words behaves like words, with the exception that it produces output on ByteStrings with trailing whitespace that can be correctly inverted by unwords. I.e.

 words  "a b c " == ["a","b","c"]
 words' "a b c " == ["a","b","c",""]

 unwords $ words  "a b c " == "a b c"
 unwords $ words' "a b c " == "a b c "

unwords' :: [ByteString] -> ByteString

unwords behaves like unwords. It is provided for consistency with the other invertable words and lines functions.

lineIndices :: ByteString -> [Int]

O(n) Indicies of newlines. Shorthand for

 elemIndices '\n'

betweenLines

:: ByteString	First line to look for
-> ByteString	Second line to look for
-> ByteString	`ByteString` to look in
-> Maybe ByteString
`betweenLines` returns the ByteString between the two lines given, or Nothing if they do not appear. The returned string is the first and shortest string such that the line before it is the given first line, and the line after it is the given second line.

Joining strings

join :: ByteString -> [ByteString] -> ByteString

O(n) The join function takes a ByteString and a list of ByteStrings and concatenates the list after interspersing the first argument between each element of the list.

joinWithChar :: Char -> ByteString -> ByteString -> ByteString

O(n) joinWithChar. An efficient way to join to two ByteStrings with a char. Around 4 times faster than the generalised join.

Indexing ByteStrings

index :: ByteString -> Int -> Char

O(1) ByteString index (subscript) operator, starting from 0.

elemIndex :: Char -> ByteString -> Maybe Int

O(n) The elemIndex function returns the index of the first element in the given ByteString which is equal (by memchr) to the query element, or Nothing if there is no such element.

elemIndexLast :: Char -> ByteString -> Maybe Int

O(n) The elemIndexLast function returns the last index of the element in the given ByteString which is equal to the query element, or Nothing if there is no such element. The following holds:

 elemIndexLast c xs == 
 (-) (length xs - 1) `fmap` elemIndex c (reverse xs)

elemIndices :: Char -> ByteString -> [Int]

O(n) The elemIndices function extends elemIndex, by returning the indices of all elements equal to the query element, in ascending order.

findIndex :: (Char -> Bool) -> ByteString -> Maybe Int

The findIndex function takes a predicate and a ByteString and returns the index of the first element in the ByteString satisfying the predicate.

findIndices :: (Char -> Bool) -> ByteString -> [Int]

The findIndices function extends findIndex, by returning the indices of all elements satisfying the predicate, in ascending order.

count :: Char -> ByteString -> Int

count returns the number of times its argument appears in the ByteString

 count = length . elemIndices

Also

 count '\n' == length . lines

But more efficiently than using length on the intermediate list.

Ordered ByteStrings

sort :: ByteString -> ByteString

O(n) Sort a ByteString efficiently, using counting sort.

Searching ByteStrings

Searching by equality

elem :: Char -> ByteString -> Bool

O(n) elem is the ByteString membership predicate. This implementation uses memchr(3).

notElem :: Char -> ByteString -> Bool

O(n) notElem is the inverse of elem

filterChar :: Char -> ByteString -> ByteString

O(n) A first order equivalent of filter . (==), for the common case of filtering a single Char. It is more efficient to use filterChar in this case.

 filterChar == filter . (==)

filterChar is around 10x faster, and uses much less space, than its filter equivalent

filterNotChar :: Char -> ByteString -> ByteString

O(n) A first order equivalent of filter . (/=), for the common case of filtering a single Char out of a list. It is more efficient to use filterNotChar in this case.

 filterNotChar == filter . (/=)

filterNotChar is around 3x faster, and uses much less space, than its filter equivalent

Searching with a predicate

filter :: (Char -> Bool) -> ByteString -> ByteString

O(n) filter, applied to a predicate and a ByteString, returns a ByteString containing those characters that satisfy the predicate.

find :: (Char -> Bool) -> ByteString -> Maybe Char

O(n) The find function takes a predicate and a ByteString, and returns the first element in matching the predicate, or Nothing if there is no such element.

Searching for substrings

isPrefixOf :: ByteString -> ByteString -> Bool

O(n) The isPrefixOf function takes two ByteStrings and returns True iff the first is a prefix of the second.

isSuffixOf :: ByteString -> ByteString -> Bool

O(n) The isSuffixOf function takes two ByteStrings and returns True iff the first is a suffix of the second.

The following holds:

 isSuffixOf x y == reverse x `isPrefixOf` reverse y

However, the real implemenation uses memcmp to compare the end of the string only, with no reverse required..

isSubstringOf

:: ByteString	String to search for.
-> ByteString	String to search in.
-> Bool
Check whether one string is a substring of another. `isSubstringOf p s` is equivalent to `not (null (findSubstrings p s))`.

findSubstring

:: ByteString	String to search for.
-> ByteString	String to seach in.
-> Maybe Int
Get the first index of a substring in another string, or `Nothing` if the string is not found. `findSubstring p s` is equivalent to `listToMaybe (findSubstrings p s)`.

findSubstrings

:: ByteString	String to search for.
-> ByteString	String to seach in.
-> [Int]
Find the indexes of all (possibly overlapping) occurances of a substring in a string. This function uses the Knuth-Morris-Pratt string matching algorithm.

Zipping and unzipping ByteString

zip :: ByteString -> ByteString -> [(Char, Char)]

O(n) zip takes two ByteStrings and returns a list of corresponding pairs of Chars. If one input ByteString is short, excess elements of the longer ByteString are discarded. This is equivalent to a pair of unpack operations, and so space usage may be large for multi-megabyte ByteStrings

zipWith :: (Char -> Char -> a) -> ByteString -> ByteString -> [a]

zipWith generalises zip by zipping with the function given as the first argument, instead of a tupling function. For example, zipWith (+) is applied to two ByteStrings to produce the list of corresponding sums.

unzip :: [(Char, Char)] -> (ByteString, ByteString)

unzip transforms a list of pairs of Chars into a pair of ByteStrings. Note that this performs two pack operations.

Unchecked access

unsafeHead :: ByteString -> Char

A variety of head for non-empty ByteStrings. unsafeHead omits the check for the empty case, which is good for performance, but there is an obligation on the programmer to provide a proof that the ByteString is non-empty.

unsafeTail :: ByteString -> ByteString

A variety of tail for non-empty ByteStrings. unsafeTail omits the check for the empty case. As with unsafeHead, the programmer must provide a separate proof that the ByteString is non-empty.

unsafeIndex :: ByteString -> Int -> Char

Unsafe ByteString index (subscript) operator, starting from 0, returning a Char. This omits the bounds check, which means there is an accompanying obligation on the programmer to ensure the bounds are checked in some other way.

w2c :: Word8 -> Char

Conversion between Word8 and Char. Should compile to a no-op.

c2w :: Char -> Word8

Unsafe conversion between Char and Word8. This is a no-op and silently truncates to 8 bits Chars > '\255'. It is provided as convenience for ByteString construction.

Reading from ByteStrings

readInt :: ByteString -> Maybe (Int, ByteString)

readInt skips any whitespace at the beginning of its argument, and reads an Int from the beginning of the ByteString. If there is no integer at the beginning of the string, it returns Nothing, otherwise it just returns the int read, and the rest of the string.

unsafeReadInt :: ByteString -> Maybe (Int, ByteString)

unsafeReadInt is like readInt, but requires a null terminated ByteString. It avoids a copy if this is the case. It returns the Int read, if any, and the rest of the string.

Copying ByteStrings

copy :: ByteString -> ByteString

O(n) Make a copy of the ByteString with its own storage. This is mainly useful to allow the rest of the data pointed to by the ByteString to be garbage collected, for example if a large string has been read in, and only a small part of it is needed in the rest of the program.

I/O with ByteStrings

Standard input and output

getLine :: IO ByteString

getLine, read a line from stdin.

getContents :: IO ByteString

getContents. Equivalent to hGetContents stdin

putStr :: ByteString -> IO ()

Write a ByteString to stdout

putStrLn :: ByteString -> IO ()

Write a ByteString to stdout, appending a newline byte

Files

readFile :: FilePath -> IO ByteString

Read an entire file directly into a ByteString. This is far more efficient than reading the characters into a String and then using pack. It also may be more efficient than opening the file and reading it using hGet.

writeFile :: FilePath -> ByteString -> IO ()

Write a ByteString to a file.

I/O with Handles

getArgs :: IO [ByteString]

A ByteString equivalent for getArgs. More efficient for large argument lists

hGetLine :: Handle -> IO ByteString

hGetLine. read a ByteString from a handle

hGetNonBlocking :: Handle -> Int -> IO ByteString

hGetNonBlocking is identical to hGet, except that it will never block waiting for data to become available, instead it returns only whatever data is available.

hGetContents :: Handle -> IO ByteString

Read entire handle contents into a ByteString.

As with hGet, the string representation in the file is assumed to be ISO-8859-1.

hGet :: Handle -> Int -> IO ByteString

Read a ByteString directly from the specified Handle. This is far more efficient than reading the characters into a String and then using pack.

hPut :: Handle -> ByteString -> IO ()

Outputs a ByteString to the specified Handle.

Low level construction

For constructors from foreign language types see Data.ByteString

packAddress :: Addr# -> ByteString

O(n) Pack a null-terminated sequence of bytes, pointed to by an Addr# (an arbitrary machine address assumed to point outside the garbage-collected heap) into a ByteString. A much faster way to create an Addr# is with an unboxed string literal, than to pack a boxed string. A unboxed string literal is compiled to a static char [] by GHC. Establishing the length of the string requires a call to strlen(3), so the Addr# must point to a null-terminated buffer (as is the case with string# literals in GHC). Use unsafePackAddress if you know the length of the string statically.

An example:

 literalFS = packAddress "literal"#

unsafePackAddress :: Int -> Addr# -> ByteString

O(1) unsafePackAddress provides constant-time construction of ByteStrings -- which is ideal for string literals. It packs a null-terminated sequence of bytes into a ByteString, given a raw Addr to the string, and the length of the string. Make sure the length is correct, otherwise use the safer packAddress (where the length will be calculated once at runtime).

Utilities (needed for array fusion)

unpackList :: ByteString -> [Word8]

noAL :: NoAL

No accumulator

data NoAL

Data type for accumulators which can be ignored. The rewrite rules rely on the fact that no bottoms of this type are ever constructed; hence, we can assume (_ :: NoAL) seq x = x.

loopArr :: (ByteString, acc) -> ByteString

Projection functions that are fusion friendly (as in, we determine when they are inlined)

loopAcc :: (ByteString, acc) -> acc

loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)

loopU

:: (acc -> Word8 -> (acc, Maybe Word8))	mapping & folding, once per elem
-> acc	initial acc value
-> ByteString	input ByteString
-> (ByteString, acc)
Iteration over over ByteStrings

mapEFL :: (Word8 -> Word8) -> NoAL -> Word8 -> (NoAL, Maybe Word8)

Special forms of loop arguments

These are common special cases for the three function arguments of gen and loop; we give them special names to make it easier to trigger RULES applying in the special cases represented by these arguments. The INLINE [1] makes sure that these functions are only inlined in the last two simplifier phases.
In the case where the accumulator is not needed, it is better to always explicitly return a value `()', rather than just copy the input to the output, as the former gives GHC better local information.

Element function expressing a mapping only

filterEFL :: (Word8 -> Bool) -> NoAL -> Word8 -> (NoAL, Maybe Word8)

Element function implementing a filter function only

foldEFL :: (acc -> Word8 -> acc) -> acc -> Word8 -> (acc, Maybe Word8)

Element function expressing a reduction only

fuseEFL :: (a1 -> Word8 -> (a1, Maybe Word8)) -> (a2 -> Word8 -> (a2, Maybe Word8)) -> (a1, a2) -> Word8 -> ((a1, a2), Maybe Word8)

Fuse to flat loop functions

filterF :: (Char -> Bool) -> ByteString -> ByteString

O(n) filterF is a non-fuseable version of filter, that may be around 2x faster for some one-shot applications.

mapF :: (Char -> Char) -> ByteString -> ByteString

O(n) Like map, but not fuseable. The benefit is that it is slightly faster for one-shot cases.

Produced by Haddock version 0.7