Difference between revisions of "Toy compression implementations"

From HaskellWiki
Jump to navigation Jump to search
m
(New source code! Now comes with examples...)
Line 11: Line 11:
   
 
import Data.List
 
import Data.List
  +
import Data.Word -- In case you want it. (Not actually used anywhere!)
  +
  +
chars = [' '..'~'] -- Becuase ' ' = 0x20 and '~' = 0x7F.
   
   
 
-- Run-length encoding
 
-- Run-length encoding
   
encode_RLE :: (Eq x) => [x] -> [(Int,x)]
+
encode_RLE :: (Eq t) => [t] -> [(Int,t)]
 
encode_RLE = map (\xs -> (length xs, head xs)) . groupBy (==)
 
encode_RLE = map (\xs -> (length xs, head xs)) . groupBy (==)
   
decode_RLE :: [(Int,x)] -> [x]
+
decode_RLE :: [(Int,t)] -> [t]
 
decode_RLE = concatMap (uncurry replicate)
 
decode_RLE = concatMap (uncurry replicate)
   
   
 
-- Limpel-Ziv-Welch encoding
-- Limpel-Ziv-Welsh compression (Recommend using [Word8] or [SmallAlpha] for input!)
 
   
encode_LZW :: (Eq x, Enum x, Bounded x) => [x] -> [Int]
+
encode_LZW :: (Eq t) => [t] -> [t] -> [Int]
encode_LZW [] = []
+
encode_LZW _ [] = []
encode_LZW (x:xs) = work init [x] xs where
+
encode_LZW alphabet (x:xs) = work (make alphabet) [x] xs where
init = map (\x -> [x]) $ enumFromTo minBound maxBound
+
make = map (\x -> [x])
work table buffer [] = [maybe undefined id (elemIndex buffer table)]
+
work table buffer [] = [maybe undefined id $ elemIndex buffer table]
 
work table buffer (x:xs) =
 
work table buffer (x:xs) =
 
let new = buffer ++ [x]
 
let new = buffer ++ [x]
Line 35: Line 38:
 
Just _ -> work table new xs
 
Just _ -> work table new xs
   
  +
decode_LZW :: [t] -> [Int] -> [t]
-- TODO: Matching decode_LZW function.
 
  +
decode_LZW _ [] = []
  +
decode_LZW alphabet xs = work (length alphabet) (make alphabet) [] xs where
  +
make = map (\x -> [x])
  +
work _ t _ [] = []
  +
work n table prev (x:xs) = case x >= n of
  +
True -> error "underflow" -- THIS NEEDS FIXING!
  +
False -> let out = table !! x
  +
in out ++
  +
if null prev
  +
then work n table out xs
  +
else work (n+1) (table ++ [prev ++ [head out]]) out xs
   
-- TODO: Huffman encoding.
 
 
-- TODO: Arithmetic coding.
 
 
</haskell>
 
</haskell>
   
  +
Some examples are in order:
It may also be useful to add the following for test purposes:
 
   
 
<haskell>
 
<haskell>
  +
> encode_RLE "AAAABBBBDDCCCCEEEGGFFFF"
import Data.Word
 
   
  +
[(4,'A'),(4,'B'),(2,'D'),(4,'C'),(3,'E'),(2,'G'),(4,'F')]
data SmallAlpha = AA | BB | CC | DD deriving (Show, Eq, Ord, Enum, Bounded)
 
   
parse1 'a' = AA
 
parse1 'b' = BB
 
parse1 'c' = CC
 
parse1 _ = DD -- For safety
 
   
  +
> decode_RLE [(4,'A'),(4,'B'),(2,'D'),(4,'C'),(3,'E'),(2,'G'),(4,'F')]
parse = map parse1
 
</haskell>
 
   
  +
"AAAABBBBDDCCCCEEEGGFFFF"
Anybody know how to use <hask>newtype</hask> to make a type like <hask>Char</hask> but with <hask>minBound</hask> and <hask>maxBound</hask> much closer together?
 
  +
  +
  +
> encode_LZW chars "This is just a simple test."
  +
  +
[52,72,73,83,0,97,0,74,85,83,84,0,65,0,83,73,77,80,76,69,0,84,69,104,14]
  +
  +
  +
> decode_LZW chars [52,72,73,83,0,97,0,74,85,83,84,0,65,0,83,73,77,80,76,69,0,84,69,104,14]
  +
  +
"This is just a simple test."
 
</haskell>

Revision as of 20:15, 15 February 2007


This code is provided in the hope that someone might find it interesting/entertaining, and to demonstrate what an excellent programming language Haskell truly is. (A working polymorphic LZW implementation in 10 lines? Try that in Java!)

This is 'toy' code. Please don't try to use it to compress multi-GB of data. It has not been thoroughly checked for correctness, and I shudder to think what the time and space complexity would be like! However, it is enlightening and entertaining to see how many algorithms you can implement with a handful of lines...

MathematicalOrchid 16:46, 15 February 2007 (UTC)

module Compression where

import Data.List
import Data.Word   -- In case you want it. (Not actually used anywhere!)

chars = [' '..'~']   -- Becuase ' ' = 0x20 and '~' = 0x7F.


-- Run-length encoding

encode_RLE :: (Eq t) => [t] -> [(Int,t)]
encode_RLE = map (\xs -> (length xs, head xs)) . groupBy (==)

decode_RLE :: [(Int,t)] -> [t]
decode_RLE = concatMap (uncurry replicate)


-- Limpel-Ziv-Welch encoding

encode_LZW :: (Eq t) => [t] -> [t] -> [Int]
encode_LZW _        []     = []
encode_LZW alphabet (x:xs) = work (make alphabet) [x] xs where
  make = map (\x -> [x])
  work table buffer []     = [maybe undefined id $ elemIndex buffer table]
  work table buffer (x:xs) =
    let new = buffer ++ [x]
    in  case elemIndex new table of
          Nothing -> maybe undefined id (elemIndex buffer table) : work (table ++ [new]) [x] xs
          Just _  -> work table new xs

decode_LZW :: [t] -> [Int] -> [t]
decode_LZW _        []     = []
decode_LZW alphabet xs = work (length alphabet) (make alphabet) [] xs where
  make = map (\x -> [x])
  work _ t     _    []     = []
  work n table prev (x:xs) = case x >= n of
    True  -> error "underflow"   -- THIS NEEDS FIXING!
    False -> let out = table !! x
             in  out ++
                 if null prev
                   then work n table out xs
                   else work (n+1) (table ++ [prev ++ [head out]]) out xs

Some examples are in order:

> encode_RLE "AAAABBBBDDCCCCEEEGGFFFF"

[(4,'A'),(4,'B'),(2,'D'),(4,'C'),(3,'E'),(2,'G'),(4,'F')]


> decode_RLE [(4,'A'),(4,'B'),(2,'D'),(4,'C'),(3,'E'),(2,'G'),(4,'F')]

"AAAABBBBDDCCCCEEEGGFFFF"


> encode_LZW chars "This is just a simple test."

[52,72,73,83,0,97,0,74,85,83,84,0,65,0,83,73,77,80,76,69,0,84,69,104,14]


> decode_LZW chars [52,72,73,83,0,97,0,74,85,83,84,0,65,0,83,73,77,80,76,69,0,84,69,104,14]

"This is just a simple test."