Haskell Hierarchical Libraries (base package)ContentsIndex
Data.ByteString.Char8
Contents
The ByteString type
Introducing and eliminating ByteStrings
Basic interface
Special ByteStrings
Transformating ByteStrings
Reducing ByteStrings
Special folds
Generating and unfolding ByteStrings
Substrings
Breaking strings
Breaking and dropping on specific Chars
Breaking into many substrings
Breaking into lines and words
Joining strings
Indexing ByteStrings
Ordered ByteStrings
Searching ByteStrings
Searching by equality
Searching with a predicate
Searching for substrings
Zipping and unzipping ByteString
Unchecked access
Reading from ByteStrings
Copying ByteStrings
I/O with ByteStrings
Standard input and output
Files
I/O with Handles
Low level construction
Utilities (needed for array fusion)
Description

Manipulate ByteStrings using Char operations. All Chars will be truncated to 8 bits. It can be expected that these functions will run at identical speeds to their Word8 equivalents in Data.ByteString.

More specifically these byte strings are taken to be in the subset of Unicode covered by code points 0-255. This covers Unicode Basic Latin, Latin-1 Supplement and C0+C1 Controls.

See:

This module is intended to be imported qualified, to avoid name clashes with Prelude functions. eg.

 import qualified Data.ByteString.Char8 as B
Synopsis
data ByteString = PS !(ForeignPtr Word8) !Int !Int
empty :: ByteString
packChar :: Char -> ByteString
pack :: String -> ByteString
unpack :: ByteString -> [Char]
cons :: Char -> ByteString -> ByteString
snoc :: ByteString -> Char -> ByteString
null :: ByteString -> Bool
length :: ByteString -> Int
head :: ByteString -> Char
tail :: ByteString -> ByteString
last :: ByteString -> Char
init :: ByteString -> ByteString
append :: ByteString -> ByteString -> ByteString
inits :: ByteString -> [ByteString]
tails :: ByteString -> [ByteString]
elems :: ByteString -> [ByteString]
map :: (Char -> Char) -> ByteString -> ByteString
reverse :: ByteString -> ByteString
intersperse :: Char -> ByteString -> ByteString
transpose :: [ByteString] -> [ByteString]
foldl :: (a -> Char -> a) -> a -> ByteString -> a
foldr :: (Char -> a -> a) -> a -> ByteString -> a
foldl1 :: (Char -> Char -> Char) -> ByteString -> Char
foldr1 :: (Char -> Char -> Char) -> ByteString -> Char
concat :: [ByteString] -> ByteString
concatMap :: (Char -> ByteString) -> ByteString -> ByteString
any :: (Char -> Bool) -> ByteString -> Bool
all :: (Char -> Bool) -> ByteString -> Bool
maximum :: ByteString -> Char
minimum :: ByteString -> Char
mapIndexed :: (Int -> Char -> Char) -> ByteString -> ByteString
replicate :: Int -> Char -> ByteString
unfoldrN :: Int -> (Char -> Maybe (Char, Char)) -> Char -> ByteString
take :: Int -> ByteString -> ByteString
drop :: Int -> ByteString -> ByteString
splitAt :: Int -> ByteString -> (ByteString, ByteString)
takeWhile :: (Char -> Bool) -> ByteString -> ByteString
dropWhile :: (Char -> Bool) -> ByteString -> ByteString
break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
spanEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
breakChar :: Char -> ByteString -> (ByteString, ByteString)
spanChar :: Char -> ByteString -> (ByteString, ByteString)
breakFirst :: Char -> ByteString -> Maybe (ByteString, ByteString)
breakLast :: Char -> ByteString -> Maybe (ByteString, ByteString)
breakSpace :: ByteString -> (ByteString, ByteString)
dropSpace :: ByteString -> ByteString
dropSpaceEnd :: ByteString -> ByteString
split :: Char -> ByteString -> [ByteString]
splitWith :: (Char -> Bool) -> ByteString -> [ByteString]
tokens :: (Char -> Bool) -> ByteString -> [ByteString]
group :: ByteString -> [ByteString]
groupBy :: (Char -> Char -> Bool) -> ByteString -> [ByteString]
lines :: ByteString -> [ByteString]
words :: ByteString -> [ByteString]
unlines :: [ByteString] -> ByteString
unwords :: [ByteString] -> ByteString
lines' :: ByteString -> [ByteString]
unlines' :: [ByteString] -> ByteString
linesCRLF' :: ByteString -> [ByteString]
unlinesCRLF' :: [ByteString] -> ByteString
words' :: ByteString -> [ByteString]
unwords' :: [ByteString] -> ByteString
lineIndices :: ByteString -> [Int]
betweenLines :: ByteString -> ByteString -> ByteString -> Maybe ByteString
join :: ByteString -> [ByteString] -> ByteString
joinWithChar :: Char -> ByteString -> ByteString -> ByteString
index :: ByteString -> Int -> Char
elemIndex :: Char -> ByteString -> Maybe Int
elemIndexLast :: Char -> ByteString -> Maybe Int
elemIndices :: Char -> ByteString -> [Int]
findIndex :: (Char -> Bool) -> ByteString -> Maybe Int
findIndices :: (Char -> Bool) -> ByteString -> [Int]
count :: Char -> ByteString -> Int
sort :: ByteString -> ByteString
elem :: Char -> ByteString -> Bool
notElem :: Char -> ByteString -> Bool
filterChar :: Char -> ByteString -> ByteString
filterNotChar :: Char -> ByteString -> ByteString
filter :: (Char -> Bool) -> ByteString -> ByteString
find :: (Char -> Bool) -> ByteString -> Maybe Char
isPrefixOf :: ByteString -> ByteString -> Bool
isSuffixOf :: ByteString -> ByteString -> Bool
isSubstringOf :: ByteString -> ByteString -> Bool
findSubstring :: ByteString -> ByteString -> Maybe Int
findSubstrings :: ByteString -> ByteString -> [Int]
zip :: ByteString -> ByteString -> [(Char, Char)]
zipWith :: (Char -> Char -> a) -> ByteString -> ByteString -> [a]
unzip :: [(Char, Char)] -> (ByteString, ByteString)
unsafeHead :: ByteString -> Char
unsafeTail :: ByteString -> ByteString
unsafeIndex :: ByteString -> Int -> Char
w2c :: Word8 -> Char
c2w :: Char -> Word8
readInt :: ByteString -> Maybe (Int, ByteString)
unsafeReadInt :: ByteString -> Maybe (Int, ByteString)
copy :: ByteString -> ByteString
getLine :: IO ByteString
getContents :: IO ByteString
putStr :: ByteString -> IO ()
putStrLn :: ByteString -> IO ()
readFile :: FilePath -> IO ByteString
writeFile :: FilePath -> ByteString -> IO ()
getArgs :: IO [ByteString]
hGetLine :: Handle -> IO ByteString
hGetNonBlocking :: Handle -> Int -> IO ByteString
hGetContents :: Handle -> IO ByteString
hGet :: Handle -> Int -> IO ByteString
hPut :: Handle -> ByteString -> IO ()
packAddress :: Addr# -> ByteString
unsafePackAddress :: Int -> Addr# -> ByteString
unpackList :: ByteString -> [Word8]
noAL :: NoAL
data NoAL
loopArr :: (ByteString, acc) -> ByteString
loopAcc :: (ByteString, acc) -> acc
loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)
loopU :: (acc -> Word8 -> (acc, Maybe Word8)) -> acc -> ByteString -> (ByteString, acc)
mapEFL :: (Word8 -> Word8) -> NoAL -> Word8 -> (NoAL, Maybe Word8)
filterEFL :: (Word8 -> Bool) -> NoAL -> Word8 -> (NoAL, Maybe Word8)
foldEFL :: (acc -> Word8 -> acc) -> acc -> Word8 -> (acc, Maybe Word8)
fuseEFL :: (a1 -> Word8 -> (a1, Maybe Word8)) -> (a2 -> Word8 -> (a2, Maybe Word8)) -> (a1, a2) -> Word8 -> ((a1, a2), Maybe Word8)
filterF :: (Char -> Bool) -> ByteString -> ByteString
mapF :: (Char -> Char) -> ByteString -> ByteString
The ByteString type
data ByteString

A space-efficient representation of a Word8 vector, supporting many efficient operations. A ByteString contains 8-bit characters only.

Instances of Eq, Ord, Read, Show, Data, Typeable

Constructors
PS !(ForeignPtr Word8) !Int !Int
show/hide Instances
Introducing and eliminating ByteStrings
empty :: ByteString
O(1) The empty ByteString
packChar :: Char -> ByteString
O(1) Convert a Char into a ByteString
pack :: String -> ByteString

O(n) Convert a String into a ByteString

For applications with large numbers of string literals, pack can be a bottleneck. In such cases, consider using packAddress (GHC only).

unpack :: ByteString -> [Char]
O(n) Converts a ByteString to a String.
Basic interface
cons :: Char -> ByteString -> ByteString
O(n) cons is analogous to (:) for lists, but of different complexity, as it requires a memcpy.
snoc :: ByteString -> Char -> ByteString
O(n) Append a Char to the end of a ByteString. Similar to cons, this function performs a memcpy.
null :: ByteString -> Bool
O(1) Test whether a ByteString is empty.
length :: ByteString -> Int
O(1) length returns the length of a ByteString as an Int.
head :: ByteString -> Char
O(1) Extract the first element of a ByteString, which must be non-empty.
tail :: ByteString -> ByteString
O(1) Extract the elements after the head of a ByteString, which must be non-empty.
last :: ByteString -> Char
O(1) Extract the last element of a packed string, which must be non-empty.
init :: ByteString -> ByteString
O(1) Return all the elements of a ByteString except the last one.
append :: ByteString -> ByteString -> ByteString
O(n) Append two ByteStrings
Special ByteStrings
inits :: ByteString -> [ByteString]
O(n) Return all initial segments of the given ByteString, shortest first.
tails :: ByteString -> [ByteString]
O(n) Return all final segments of the given ByteString, longest first.
elems :: ByteString -> [ByteString]
O(n) breaks a ByteString to a list of ByteStrings, one byte each.
Transformating ByteStrings
map :: (Char -> Char) -> ByteString -> ByteString
O(n) map f xs is the ByteString obtained by applying f to each element of xs
reverse :: ByteString -> ByteString
O(n) reverse xs efficiently returns the elements of xs in reverse order.
intersperse :: Char -> ByteString -> ByteString
O(n) The intersperse function takes a Char and a ByteString and `intersperses' that Char between the elements of the ByteString. It is analogous to the intersperse function on Lists.
transpose :: [ByteString] -> [ByteString]
The transpose function transposes the rows and columns of its ByteString argument.
Reducing ByteStrings
foldl :: (a -> Char -> a) -> a -> ByteString -> a
foldl, applied to a binary operator, a starting value (typically the left-identity of the operator), and a ByteString, reduces the ByteString using the binary operator, from left to right.
foldr :: (Char -> a -> a) -> a -> ByteString -> a
foldr, applied to a binary operator, a starting value (typically the right-identity of the operator), and a packed string, reduces the packed string using the binary operator, from right to left.
foldl1 :: (Char -> Char -> Char) -> ByteString -> Char
foldl1 is a variant of foldl that has no starting value argument, and thus must be applied to non-empty ByteStrings.
foldr1 :: (Char -> Char -> Char) -> ByteString -> Char
foldr1 is a variant of foldr that has no starting value argument, and thus must be applied to non-empty ByteStrings
Special folds
concat :: [ByteString] -> ByteString
O(n) Concatenate a list of ByteStrings.
concatMap :: (Char -> ByteString) -> ByteString -> ByteString
Map a function over a ByteString and concatenate the results
any :: (Char -> Bool) -> ByteString -> Bool
Applied to a predicate and a ByteString, any determines if any element of the ByteString satisfies the predicate.
all :: (Char -> Bool) -> ByteString -> Bool
Applied to a predicate and a ByteString, all determines if all elements of the ByteString satisfy the predicate.
maximum :: ByteString -> Char
maximum returns the maximum value from a ByteString
minimum :: ByteString -> Char
minimum returns the minimum value from a ByteString
mapIndexed :: (Int -> Char -> Char) -> ByteString -> ByteString
O(n) map Char functions, provided with the index at each position
Generating and unfolding ByteStrings
replicate :: Int -> Char -> ByteString

O(n) replicate n x is a ByteString of length n with x the value of every element. The following holds:

 replicate w c = unfoldr w (\u -> Just (u,u)) c

This implemenation uses memset(3)

unfoldrN :: Int -> (Char -> Maybe (Char, Char)) -> Char -> ByteString

O(n) The unfoldrN function is analogous to the List 'unfoldr'. unfoldrN builds a ByteString from a seed value. The function takes the element and returns Nothing if it is done producing the ByteString or returns Just (a,b), in which case, a is a prepending to the ByteString and b is used as the next element in a recursive call.

To preven unfoldrN having O(n^2) complexity (as prepending a character to a ByteString is O(n), this unfoldr requires a maximum final size of the ByteString as an argument. cons can then be implemented in O(1) (i.e. a poke), and the unfoldr itself has linear complexity. The depth of the recursion is limited to this size, but may be less. For lazy, infinite unfoldr, use unfoldr (from List).

Examples:

 unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"

The following equation connects the depth-limited unfoldr to the List unfoldr:

 unfoldrN n == take n $ List.unfoldr
Substrings
Breaking strings
take :: Int -> ByteString -> ByteString
O(1) take n, applied to a ByteString xs, returns the prefix of xs of length n, or xs itself if n > length xs.
drop :: Int -> ByteString -> ByteString
O(1) drop n xs returns the suffix of xs after the first n elements, or [] if n > length xs.
splitAt :: Int -> ByteString -> (ByteString, ByteString)
O(1) splitAt n xs is equivalent to (take n xs, drop n xs).
takeWhile :: (Char -> Bool) -> ByteString -> ByteString
takeWhile, applied to a predicate p and a ByteString xs, returns the longest prefix (possibly empty) of xs of elements that satisfy p.
dropWhile :: (Char -> Bool) -> ByteString -> ByteString
dropWhile p xs returns the suffix remaining after takeWhile p xs.
break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
break p is equivalent to span (not . p).
span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
span p xs breaks the ByteString into two segments. It is equivalent to (takeWhile p xs, dropWhile p xs)
spanEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)

spanEnd behaves like span but from the end of the ByteString. We have

 spanEnd (not.isSpace) "x y z" == ("x y ","z")

and

 spanEnd (not . isSpace) ps
    == 
 let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x) 
Breaking and dropping on specific Chars
breakChar :: Char -> ByteString -> (ByteString, ByteString)

breakChar breaks its ByteString argument at the first occurence of the specified Char. It is more efficient than break as it is implemented with memchr(3). I.e.

 break (=='c') "abcd" == breakChar 'c' "abcd"
spanChar :: Char -> ByteString -> (ByteString, ByteString)

spanChar breaks its ByteString argument at the first occurence of a Char other than its argument. It is more efficient than 'span (==)'

 span  (=='c') "abcd" == spanByte 'c' "abcd"
breakFirst :: Char -> ByteString -> Maybe (ByteString, ByteString)

O(n) breakFirst breaks the given ByteString on the first occurence of w. It behaves like break, except the delimiter is not returned, and Nothing is returned if the delimiter is not in the ByteString. I.e.

 breakFirst 'b' "aabbcc" == Just ("aa","bcc")
 breakFirst c xs ==
 let (x,y) = break (== c) xs 
 in if null y then Nothing else Just (x, drop 1 y))
breakLast :: Char -> ByteString -> Maybe (ByteString, ByteString)

O(n) breakLast behaves like breakFirst, but from the end of the ByteString.

 breakLast ('b') (pack "aabbcc") == Just ("aab","cc")

and the following are equivalent:

 breakLast 'c' "abcdef"
 let (x,y) = break (=='c') (reverse "abcdef") 
 in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
breakSpace :: ByteString -> (ByteString, ByteString)

breakSpace returns the pair of ByteStrings when the argument is broken at the first whitespace byte. I.e.

 break isSpace == breakSpace
dropSpace :: ByteString -> ByteString

dropSpace efficiently returns the ByteString argument with white space Chars removed from the front. It is more efficient than calling dropWhile for removing whitespace. I.e.

 dropWhile isSpace == dropSpace
dropSpaceEnd :: ByteString -> ByteString

dropSpaceEnd efficiently returns the ByteString argument with white space removed from the end. I.e.

 reverse . (dropWhile isSpace) . reverse == dropSpaceEnd

but it is more efficient than using multiple reverses.

Breaking into many substrings
split :: Char -> ByteString -> [ByteString]

O(n) Break a ByteString into pieces separated by the byte argument, consuming the delimiter. I.e.

 split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
 split 'a'  "aXaXaXa"    == ["","X","X","X"]
 split 'x'  "x"          == ["",""]

and

 join [c] . split c == id
 split == splitWith . (==)

As for all splitting functions in this library, this function does not copy the substrings, it just constructs new ByteStrings that are slices of the original.

splitWith :: (Char -> Bool) -> ByteString -> [ByteString]

O(n) Splits a ByteString into components delimited by separators, where the predicate returns True for a separator element. The resulting components do not contain the separators. Two adjacent separators result in an empty component in the output. eg.

 splitWith (=='a') "aabbaca" == ["","","bb","c",""]
tokens :: (Char -> Bool) -> ByteString -> [ByteString]

Like splitWith, except that sequences of adjacent separators are treated as a single separator. eg.

 tokens (=='a') "aabbaca" == ["bb","c"]
group :: ByteString -> [ByteString]

The group function takes a ByteString and returns a list of ByteStrings such that the concatenation of the result is equal to the argument. Moreover, each sublist in the result contains only equal elements. For example,

 group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]

It is a special case of groupBy, which allows the programmer to supply their own equality test. It is about 40% faster than groupBy (==)

groupBy :: (Char -> Char -> Bool) -> ByteString -> [ByteString]
The groupBy function is the non-overloaded version of group.
Breaking into lines and words
lines :: ByteString -> [ByteString]
lines breaks a ByteString up into a list of ByteStrings at newline Chars. The resulting strings do not contain newlines.
words :: ByteString -> [ByteString]

words breaks a ByteString up into a list of words, which were delimited by Chars representing white space. And

 tokens isSpace = words
unlines :: [ByteString] -> ByteString
unlines is an inverse operation to lines. It joins lines, after appending a terminating newline to each.
unwords :: [ByteString] -> ByteString
The unwords function is analogous to the unlines function, on words.
lines' :: ByteString -> [ByteString]

lines behaves like lines, in that it breaks a ByteString on newline Chars. However, unlike the Prelude functions, lines and unlines correctly reconstruct lines that are missing terminating newlines characters. I.e.

 unlines  (lines "a\nb\nc")  == "a\nb\nc\n"
 unlines' (lines' "a\nb\nc") == "a\nb\nc"

Note that this means:

 lines  "a\nb\nc\n" == ["a","b","c"]
 lines' "a\nb\nc\n" == ["a","b","c",""]
unlines' :: [ByteString] -> ByteString
unlines behaves like unlines, except that it also correctly retores lines that do not have terminating newlines (see the description for lines).
linesCRLF' :: ByteString -> [ByteString]
linesCRLF behaves like lines, but breaks on (\cr?\lf)
unlinesCRLF' :: [ByteString] -> ByteString
unlines behaves like unlines, except that it also correctly retores lines that do not have terminating newlines (see the description for lines). Uses CRLF instead of LF.
words' :: ByteString -> [ByteString]

words behaves like words, with the exception that it produces output on ByteStrings with trailing whitespace that can be correctly inverted by unwords. I.e.

 words  "a b c " == ["a","b","c"]
 words' "a b c " == ["a","b","c",""]
 unwords $ words  "a b c " == "a b c"
 unwords $ words' "a b c " == "a b c "
unwords' :: [ByteString] -> ByteString
unwords behaves like unwords. It is provided for consistency with the other invertable words and lines functions.
lineIndices :: ByteString -> [Int]

O(n) Indicies of newlines. Shorthand for

 elemIndices '\n'
betweenLines
:: ByteStringFirst line to look for
-> ByteStringSecond line to look for
-> ByteStringByteString to look in
-> Maybe ByteString
betweenLines returns the ByteString between the two lines given, or Nothing if they do not appear. The returned string is the first and shortest string such that the line before it is the given first line, and the line after it is the given second line.
Joining strings
join :: ByteString -> [ByteString] -> ByteString
O(n) The join function takes a ByteString and a list of ByteStrings and concatenates the list after interspersing the first argument between each element of the list.
joinWithChar :: Char -> ByteString -> ByteString -> ByteString
O(n) joinWithChar. An efficient way to join to two ByteStrings with a char. Around 4 times faster than the generalised join.
Indexing ByteStrings
index :: ByteString -> Int -> Char
O(1) ByteString index (subscript) operator, starting from 0.
elemIndex :: Char -> ByteString -> Maybe Int
O(n) The elemIndex function returns the index of the first element in the given ByteString which is equal (by memchr) to the query element, or Nothing if there is no such element.
elemIndexLast :: Char -> ByteString -> Maybe Int

O(n) The elemIndexLast function returns the last index of the element in the given ByteString which is equal to the query element, or Nothing if there is no such element. The following holds:

 elemIndexLast c xs == 
 (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
elemIndices :: Char -> ByteString -> [Int]
O(n) The elemIndices function extends elemIndex, by returning the indices of all elements equal to the query element, in ascending order.
findIndex :: (Char -> Bool) -> ByteString -> Maybe Int
The findIndex function takes a predicate and a ByteString and returns the index of the first element in the ByteString satisfying the predicate.
findIndices :: (Char -> Bool) -> ByteString -> [Int]
The findIndices function extends findIndex, by returning the indices of all elements satisfying the predicate, in ascending order.
count :: Char -> ByteString -> Int

count returns the number of times its argument appears in the ByteString

 count = length . elemIndices

Also

 count '\n' == length . lines

But more efficiently than using length on the intermediate list.

Ordered ByteStrings
sort :: ByteString -> ByteString
O(n) Sort a ByteString efficiently, using counting sort.
Searching ByteStrings
Searching by equality
elem :: Char -> ByteString -> Bool
O(n) elem is the ByteString membership predicate. This implementation uses memchr(3).
notElem :: Char -> ByteString -> Bool
O(n) notElem is the inverse of elem
filterChar :: Char -> ByteString -> ByteString

O(n) A first order equivalent of filter . (==), for the common case of filtering a single Char. It is more efficient to use filterChar in this case.

 filterChar == filter . (==)

filterChar is around 10x faster, and uses much less space, than its filter equivalent

filterNotChar :: Char -> ByteString -> ByteString

O(n) A first order equivalent of filter . (/=), for the common case of filtering a single Char out of a list. It is more efficient to use filterNotChar in this case.

 filterNotChar == filter . (/=)

filterNotChar is around 3x faster, and uses much less space, than its filter equivalent

Searching with a predicate
filter :: (Char -> Bool) -> ByteString -> ByteString
O(n) filter, applied to a predicate and a ByteString, returns a ByteString containing those characters that satisfy the predicate.
find :: (Char -> Bool) -> ByteString -> Maybe Char
O(n) The find function takes a predicate and a ByteString, and returns the first element in matching the predicate, or Nothing if there is no such element.
Searching for substrings
isPrefixOf :: ByteString -> ByteString -> Bool
O(n) The isPrefixOf function takes two ByteStrings and returns True iff the first is a prefix of the second.
isSuffixOf :: ByteString -> ByteString -> Bool

O(n) The isSuffixOf function takes two ByteStrings and returns True iff the first is a suffix of the second.

The following holds:

 isSuffixOf x y == reverse x `isPrefixOf` reverse y

However, the real implemenation uses memcmp to compare the end of the string only, with no reverse required..

isSubstringOf
:: ByteStringString to search for.
-> ByteStringString to search in.
-> Bool
Check whether one string is a substring of another. isSubstringOf p s is equivalent to not (null (findSubstrings p s)).
findSubstring
:: ByteStringString to search for.
-> ByteStringString to seach in.
-> Maybe Int
Get the first index of a substring in another string, or Nothing if the string is not found. findSubstring p s is equivalent to listToMaybe (findSubstrings p s).
findSubstrings
:: ByteStringString to search for.
-> ByteStringString to seach in.
-> [Int]
Find the indexes of all (possibly overlapping) occurances of a substring in a string. This function uses the Knuth-Morris-Pratt string matching algorithm.
Zipping and unzipping ByteString
zip :: ByteString -> ByteString -> [(Char, Char)]
O(n) zip takes two ByteStrings and returns a list of corresponding pairs of Chars. If one input ByteString is short, excess elements of the longer ByteString are discarded. This is equivalent to a pair of unpack operations, and so space usage may be large for multi-megabyte ByteStrings
zipWith :: (Char -> Char -> a) -> ByteString -> ByteString -> [a]
zipWith generalises zip by zipping with the function given as the first argument, instead of a tupling function. For example, zipWith (+) is applied to two ByteStrings to produce the list of corresponding sums.
unzip :: [(Char, Char)] -> (ByteString, ByteString)
unzip transforms a list of pairs of Chars into a pair of ByteStrings. Note that this performs two pack operations.
Unchecked access
unsafeHead :: ByteString -> Char
A variety of head for non-empty ByteStrings. unsafeHead omits the check for the empty case, which is good for performance, but there is an obligation on the programmer to provide a proof that the ByteString is non-empty.
unsafeTail :: ByteString -> ByteString
A variety of tail for non-empty ByteStrings. unsafeTail omits the check for the empty case. As with unsafeHead, the programmer must provide a separate proof that the ByteString is non-empty.
unsafeIndex :: ByteString -> Int -> Char
Unsafe ByteString index (subscript) operator, starting from 0, returning a Char. This omits the bounds check, which means there is an accompanying obligation on the programmer to ensure the bounds are checked in some other way.
w2c :: Word8 -> Char
Conversion between Word8 and Char. Should compile to a no-op.
c2w :: Char -> Word8
Unsafe conversion between Char and Word8. This is a no-op and silently truncates to 8 bits Chars > '\255'. It is provided as convenience for ByteString construction.
Reading from ByteStrings
readInt :: ByteString -> Maybe (Int, ByteString)
readInt skips any whitespace at the beginning of its argument, and reads an Int from the beginning of the ByteString. If there is no integer at the beginning of the string, it returns Nothing, otherwise it just returns the int read, and the rest of the string.
unsafeReadInt :: ByteString -> Maybe (Int, ByteString)
unsafeReadInt is like readInt, but requires a null terminated ByteString. It avoids a copy if this is the case. It returns the Int read, if any, and the rest of the string.
Copying ByteStrings
copy :: ByteString -> ByteString
O(n) Make a copy of the ByteString with its own storage. This is mainly useful to allow the rest of the data pointed to by the ByteString to be garbage collected, for example if a large string has been read in, and only a small part of it is needed in the rest of the program.
I/O with ByteStrings
Standard input and output
getLine :: IO ByteString
getLine, read a line from stdin.
getContents :: IO ByteString
getContents. Equivalent to hGetContents stdin
putStr :: ByteString -> IO ()
Write a ByteString to stdout
putStrLn :: ByteString -> IO ()
Write a ByteString to stdout, appending a newline byte
Files
readFile :: FilePath -> IO ByteString
Read an entire file directly into a ByteString. This is far more efficient than reading the characters into a String and then using pack. It also may be more efficient than opening the file and reading it using hGet.
writeFile :: FilePath -> ByteString -> IO ()
Write a ByteString to a file.
I/O with Handles
getArgs :: IO [ByteString]
A ByteString equivalent for getArgs. More efficient for large argument lists
hGetLine :: Handle -> IO ByteString
hGetLine. read a ByteString from a handle
hGetNonBlocking :: Handle -> Int -> IO ByteString
hGetNonBlocking is identical to hGet, except that it will never block waiting for data to become available, instead it returns only whatever data is available.
hGetContents :: Handle -> IO ByteString

Read entire handle contents into a ByteString.

As with hGet, the string representation in the file is assumed to be ISO-8859-1.

hGet :: Handle -> Int -> IO ByteString
Read a ByteString directly from the specified Handle. This is far more efficient than reading the characters into a String and then using pack.
hPut :: Handle -> ByteString -> IO ()
Outputs a ByteString to the specified Handle.
Low level construction
For constructors from foreign language types see Data.ByteString
packAddress :: Addr# -> ByteString

O(n) Pack a null-terminated sequence of bytes, pointed to by an Addr# (an arbitrary machine address assumed to point outside the garbage-collected heap) into a ByteString. A much faster way to create an Addr# is with an unboxed string literal, than to pack a boxed string. A unboxed string literal is compiled to a static char [] by GHC. Establishing the length of the string requires a call to strlen(3), so the Addr# must point to a null-terminated buffer (as is the case with string# literals in GHC). Use unsafePackAddress if you know the length of the string statically.

An example:

 literalFS = packAddress "literal"#
unsafePackAddress :: Int -> Addr# -> ByteString
O(1) unsafePackAddress provides constant-time construction of ByteStrings -- which is ideal for string literals. It packs a null-terminated sequence of bytes into a ByteString, given a raw Addr to the string, and the length of the string. Make sure the length is correct, otherwise use the safer packAddress (where the length will be calculated once at runtime).
Utilities (needed for array fusion)
unpackList :: ByteString -> [Word8]
noAL :: NoAL
No accumulator
data NoAL
Data type for accumulators which can be ignored. The rewrite rules rely on the fact that no bottoms of this type are ever constructed; hence, we can assume (_ :: NoAL) seq x = x.
loopArr :: (ByteString, acc) -> ByteString
Projection functions that are fusion friendly (as in, we determine when they are inlined)
loopAcc :: (ByteString, acc) -> acc
loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)
loopU
:: (acc -> Word8 -> (acc, Maybe Word8))mapping & folding, once per elem
-> accinitial acc value
-> ByteStringinput ByteString
-> (ByteString, acc)
Iteration over over ByteStrings
mapEFL :: (Word8 -> Word8) -> NoAL -> Word8 -> (NoAL, Maybe Word8)

Special forms of loop arguments

  • These are common special cases for the three function arguments of gen and loop; we give them special names to make it easier to trigger RULES applying in the special cases represented by these arguments. The INLINE [1] makes sure that these functions are only inlined in the last two simplifier phases.
  • In the case where the accumulator is not needed, it is better to always explicitly return a value `()', rather than just copy the input to the output, as the former gives GHC better local information.

Element function expressing a mapping only

filterEFL :: (Word8 -> Bool) -> NoAL -> Word8 -> (NoAL, Maybe Word8)
Element function implementing a filter function only
foldEFL :: (acc -> Word8 -> acc) -> acc -> Word8 -> (acc, Maybe Word8)
Element function expressing a reduction only
fuseEFL :: (a1 -> Word8 -> (a1, Maybe Word8)) -> (a2 -> Word8 -> (a2, Maybe Word8)) -> (a1, a2) -> Word8 -> ((a1, a2), Maybe Word8)
Fuse to flat loop functions
filterF :: (Char -> Bool) -> ByteString -> ByteString
O(n) filterF is a non-fuseable version of filter, that may be around 2x faster for some one-shot applications.
mapF :: (Char -> Char) -> ByteString -> ByteString
O(n) Like map, but not fuseable. The benefit is that it is slightly faster for one-shot cases.
Produced by Haddock version 0.7