Anagrams

From HaskellWiki
Revision as of 00:38, 8 October 2006 by DonStewart (talk | contribs) (category)
Jump to navigation Jump to search
module Main (main) where
import System.Environment (getArgs)
import Data.Char (toUpper,isAlpha)
import Data.List (sortBy)
import Control.Monad (liftM)
import qualified Data.ByteString.Char8 as B

data CharCount = CharCount !Char !Int deriving (Show, Eq)
data WordData = WordData B.ByteString [CharCount] deriving (Show, Eq)

main :: IO ()
main = 
 do n <- head `fmap` getArgs
    wl <- liftM makeWordList $ B.readFile n 
    user <- liftM countChars B.getLine 
    case (findAnagrams wl user) of
      [] -> print "No anagrams."
      x  -> B.putStr . B.unlines . map B.unwords $ x 
    
findAnagrams :: [WordData] -> [CharCount] -> [[B.ByteString]]
findAnagrams [] _ = []
findAnagrams lst@((WordData bs cc):rest) qcc =
 case (qcc `minus` cc) of
      Nothing -> remaining
      Just [] -> [bs]:remaining 
      Just x  -> map ((:) bs) (findAnagrams lst x) ++ remaining
 where remaining = findAnagrams rest qcc

minus :: (Monad m) => [CharCount] -> [CharCount] -> m [CharCount]
minus x [] = return x
minus [] _ = fail "can't subtract from empty" 
minus (lft@(CharCount c1 i1):xs) r@((CharCount c2 i2):ys)
  | (c1 == c2) && (i2 == i1) = xs `minus` ys
  | (c1 == c2) && (i2 < i1)  = do rem <- xs `minus` ys
                                  return $! (CharCount c1 (i1 - i2)):rem
  | (c1 == c2) && (i2 > i1)  = fail "right has more chars than left" 
  | (c1 > c2)                = fail "right has chars not in left" 
  | (c1 < c2)                = do rem <- xs `minus` r
                                  return $! lft:rem
  | otherwise                = error "Bad condition"

countChars :: B.ByteString -> [CharCount] 
countChars = map counts . B.group . B.sort . B.map toUpper . B.filter isAlpha 
 where counts x = CharCount (B.unsafeHead x) (B.length x)

makeWordList :: B.ByteString -> [WordData]
makeWordList = map (\w -> WordData w (countChars w)) . sortBy bsLenCmp . B.words
 where bsLenCmp x y = compare (B.length y) (B.length x)