# Toy compression implementations

### From HaskellWiki

(Difference between revisions)

m |
(New source code! Now comes with examples...) |
||

Line 11: | Line 11: | ||

import Data.List |
import Data.List |
||

+ | import Data.Word -- In case you want it. (Not actually used anywhere!) |
||

+ | |||

+ | chars = [' '..'~'] -- Becuase ' ' = 0x20 and '~' = 0x7F. |
||

-- Run-length encoding |
-- Run-length encoding |
||

− | encode_RLE :: (Eq x) => [x] -> [(Int,x)] |
+ | encode_RLE :: (Eq t) => [t] -> [(Int,t)] |

encode_RLE = map (\xs -> (length xs, head xs)) . groupBy (==) |
encode_RLE = map (\xs -> (length xs, head xs)) . groupBy (==) |
||

− | decode_RLE :: [(Int,x)] -> [x] |
+ | decode_RLE :: [(Int,t)] -> [t] |

decode_RLE = concatMap (uncurry replicate) |
decode_RLE = concatMap (uncurry replicate) |
||

− | -- Limpel-Ziv-Welsh compression (Recommend using [Word8] or [SmallAlpha] for input!) |
+ | -- Limpel-Ziv-Welch encoding |

− | encode_LZW :: (Eq x, Enum x, Bounded x) => [x] -> [Int] |
+ | encode_LZW :: (Eq t) => [t] -> [t] -> [Int] |

− | encode_LZW [] = [] |
+ | encode_LZW _ [] = [] |

− | encode_LZW (x:xs) = work init [x] xs where |
+ | encode_LZW alphabet (x:xs) = work (make alphabet) [x] xs where |

− | init = map (\x -> [x]) $ enumFromTo minBound maxBound |
+ | make = map (\x -> [x]) |

− | work table buffer [] = [maybe undefined id (elemIndex buffer table)] |
+ | work table buffer [] = [maybe undefined id $ elemIndex buffer table] |

work table buffer (x:xs) = |
work table buffer (x:xs) = |
||

let new = buffer ++ [x] |
let new = buffer ++ [x] |
||

Line 35: | Line 38: | ||

Just _ -> work table new xs |
Just _ -> work table new xs |
||

− | -- TODO: Matching decode_LZW function. |
+ | decode_LZW :: [t] -> [Int] -> [t] |

+ | decode_LZW _ [] = [] |
||

+ | decode_LZW alphabet xs = work (length alphabet) (make alphabet) [] xs where |
||

+ | make = map (\x -> [x]) |
||

+ | work _ t _ [] = [] |
||

+ | work n table prev (x:xs) = case x >= n of |
||

+ | True -> error "underflow" -- THIS NEEDS FIXING! |
||

+ | False -> let out = table !! x |
||

+ | in out ++ |
||

+ | if null prev |
||

+ | then work n table out xs |
||

+ | else work (n+1) (table ++ [prev ++ [head out]]) out xs |
||

− | -- TODO: Huffman encoding. |
||

− | |||

− | -- TODO: Arithmetic coding. |
||

</haskell> |
</haskell> |
||

− | It may also be useful to add the following for test purposes: |
+ | Some examples are in order: |

<haskell> |
<haskell> |
||

− | import Data.Word |
+ | > encode_RLE "AAAABBBBDDCCCCEEEGGFFFF" |

− | data SmallAlpha = AA | BB | CC | DD deriving (Show, Eq, Ord, Enum, Bounded) |
+ | [(4,'A'),(4,'B'),(2,'D'),(4,'C'),(3,'E'),(2,'G'),(4,'F')] |

− | parse1 'a' = AA |
||

− | parse1 'b' = BB |
||

− | parse1 'c' = CC |
||

− | parse1 _ = DD -- For safety |
||

− | parse = map parse1 |
+ | > decode_RLE [(4,'A'),(4,'B'),(2,'D'),(4,'C'),(3,'E'),(2,'G'),(4,'F')] |

− | </haskell> |
||

− | Anybody know how to use <hask>newtype</hask> to make a type like <hask>Char</hask> but with <hask>minBound</hask> and <hask>maxBound</hask> much closer together? |
+ | "AAAABBBBDDCCCCEEEGGFFFF" |

+ | |||

+ | |||

+ | > encode_LZW chars "This is just a simple test." |
||

+ | |||

+ | [52,72,73,83,0,97,0,74,85,83,84,0,65,0,83,73,77,80,76,69,0,84,69,104,14] |
||

+ | |||

+ | |||

+ | > decode_LZW chars [52,72,73,83,0,97,0,74,85,83,84,0,65,0,83,73,77,80,76,69,0,84,69,104,14] |
||

+ | |||

+ | "This is just a simple test." |
||

+ | </haskell> |

## Revision as of 20:15, 15 February 2007

This code is provided in the hope that someone might find it interesting/entertaining, and to demonstrate what an excellent programming language Haskell truly is. (A working polymorphic LZW implementation in 10 lines? Try *that* in Java!)

This is 'toy' code. Please don't try to use it to compress multi-GB of data. It has not been thoroughly checked for correctness, and I shudder to think what the time and space complexity would be like! However, it is enlightening and entertaining to see how many algorithms you can implement with a handful of lines...

MathematicalOrchid 16:46, 15 February 2007 (UTC)

module Compression where import Data.List import Data.Word -- In case you want it. (Not actually used anywhere!) chars = [' '..'~'] -- Becuase ' ' = 0x20 and '~' = 0x7F. -- Run-length encoding encode_RLE :: (Eq t) => [t] -> [(Int,t)] encode_RLE = map (\xs -> (length xs, head xs)) . groupBy (==) decode_RLE :: [(Int,t)] -> [t] decode_RLE = concatMap (uncurry replicate) -- Limpel-Ziv-Welch encoding encode_LZW :: (Eq t) => [t] -> [t] -> [Int] encode_LZW _ [] = [] encode_LZW alphabet (x:xs) = work (make alphabet) [x] xs where make = map (\x -> [x]) work table buffer [] = [maybe undefined id $ elemIndex buffer table] work table buffer (x:xs) = let new = buffer ++ [x] in case elemIndex new table of Nothing -> maybe undefined id (elemIndex buffer table) : work (table ++ [new]) [x] xs Just _ -> work table new xs decode_LZW :: [t] -> [Int] -> [t] decode_LZW _ [] = [] decode_LZW alphabet xs = work (length alphabet) (make alphabet) [] xs where make = map (\x -> [x]) work _ t _ [] = [] work n table prev (x:xs) = case x >= n of True -> error "underflow" -- THIS NEEDS FIXING! False -> let out = table !! x in out ++ if null prev then work n table out xs else work (n+1) (table ++ [prev ++ [head out]]) out xs

Some examples are in order:

> encode_RLE "AAAABBBBDDCCCCEEEGGFFFF" [(4,'A'),(4,'B'),(2,'D'),(4,'C'),(3,'E'),(2,'G'),(4,'F')] > decode_RLE [(4,'A'),(4,'B'),(2,'D'),(4,'C'),(3,'E'),(2,'G'),(4,'F')] "AAAABBBBDDCCCCEEEGGFFFF" > encode_LZW chars "This is just a simple test." [52,72,73,83,0,97,0,74,85,83,84,0,65,0,83,73,77,80,76,69,0,84,69,104,14] > decode_LZW chars [52,72,73,83,0,97,0,74,85,83,84,0,65,0,83,73,77,80,76,69,0,84,69,104,14] "This is just a simple test."