[Haskell-cafe] Re: Why is there no splitBy in the list module?

Robert Dockins robdockins at fastmail.fm
Fri Jul 21 10:07:00 EDT 2006


On Jul 20, 2006, at 1:31 PM, Jon Fairbairn wrote:

> On 2006-07-13 at 10:16BST I wrote:
>> Hooray!  I've been waiting to ask "Why aren't we asking what
>> laws hold for these operations?"
>
> Having thought about this for a bit, I've come up with the
> below. This is intended to give the general idea -- it's not
> polished code, and I'm not at all wedded to the names I've
> used, and it almost certainly should be split up.

[snip an interesting new take on splitting strings]

>
> -- Jón Fairbairn                              Jon.Fairbairn at  
> cl.cam.ac.uk

Inspired by this, I've hacked together my own version, based around  
the ideas of list deforestation.  I've taken some liberties with the  
function names.  In particular, I've split Jon's 'parts' function  
into two pieces, called 'classify' and 'parts'.  Code follows.  I  
haven't tested it a lot, but things seem to work in ghci.  In  
particular I've not tested the RULES.  I have no idea if this is  
better or not in terms of performance, but it seems a little cleaner  
to me.


Rob Dockins

Speak softly and drive a Sherman tank.
Laugh hard; it's a long way to the bank.
           -- TMBG





{-# OPTIONS -fglasgow-exts  #-}
module Parts where

import GHC.Exts
import Data.Char

data PartList a
   = LeftPart  a (PartList a)
   | RightPart a (PartList a)
   | PartNil

foldrParts :: (a -> b -> b) -> (a -> b -> b) -> b -> PartList a -> b
foldrParts l r n (LeftPart  x xs) = l x (foldrParts l r n xs)
foldrParts l r n (RightPart x xs) = r x (foldrParts l r n xs)
foldrParts l r n PartNil = n

buildParts :: (forall b. (a -> b -> b) -> (a -> b -> b) -> b -> b) ->  
PartList a
buildParts g = g LeftPart RightPart PartNil

{-# RULES
"foldrParts/buildParts"  forall l r z (g::forall b. (a->b->b) -> (a- 
 >b->b) -> b -> b) .
          foldrParts l r z (buildParts g) = g l r z
"foldrParts/id"
          foldrParts LeftPart RightPart PartNil = id
   #-}

classify :: (a -> Bool) -> [a] -> PartList a
classify p zs = buildParts (\l r n -> foldr (\x xs -> if p x then r x  
xs else l x xs) n zs)

parts :: PartList a -> PartList [a]
parts = foldrParts fLeft fRight PartNil
where
    fLeft x xs = LeftPart
           (case xs of (LeftPart y _ ) -> x:y; _ -> x:[])
           (case xs of (LeftPart _ ys) ->  ys; _ -> xs)

    fRight x xs = RightPart
           (case xs of (RightPart y _ ) -> x:y; _ -> x:[])
           (case xs of (RightPart _ ys) ->  ys; _ -> xs)

partsSep :: PartList a -> PartList [a]
partsSep = foldrParts fLeft fRight PartNil
where
    fLeft  x xs = RightPart [] xs
    fRight x xs = RightPart
           (case xs of (RightPart y _ ) -> x:y; _ -> x:[])
           (case xs of (RightPart _ ys) ->  ys; _ -> xs)

fromParts :: PartList a -> [a]
fromParts xs = build (\c n -> foldrParts c c n xs)

{-
loose proof, ignore seq...

forall p xs. fromParts (classify p xs)

   === build (\c n -> foldrParts c c n (classify p  
xs)                          (definition of fromParts)
   === build (\c n -> foldrParts c c n (buildParts (\l r n - 
 >                   (definition of classify)
              foldr (\x xs -> if p x then r x xs else l x xs) n xs)))
   === build (\c n -> foldr (\x xs -> if p x then c x xs else c x xs)  
n xs)     (deforestation conjecture)
   === build (\c n -> foldr (\x xs -> c x xs) n  
xs)                             (by indifference on p x)
   === foldr (\x xs -> (:) x xs) []  
xs)                                         (definition of build)
   === foldr (:) []  
xs                                                          (eta  
contraction)
   ===  
xs                                                                       
  (well known)
-}

{-
forall p xs.  concat (fromParts (parts (classify p xs)))

   === concat (fromParts (foldrParts fLeft fRigth PartNil (classify p  
xs)))      (definition of parts)
   === concat (fromParts (foldrParts fLeft fRigth PartNil (buildParts  
(\l r n -> (definition of classify)
              foldr (\x xs -> if p x then r x xs else l x xs) n  
xs)))            (deforestation conjecture)
   === concat (fromParts (foldr
                 (\x xs -> if p x then fRight x xs else fLeft x xs)  
PartNil xs))
   === concat (build (\c n -> foldrParts c c n  
(foldr                            (definition of fromParts)
                 (\x xs -> if p x then fRight x xs else fLeft x xs)  
PartNil xs)))
   === foldr (++) [] (build (\c n -> foldrParts c c n  
(foldr                     (definition of concat)
                 (\x xs -> if p x then fRight x xs else fLeft x xs)  
PartNil xs)))
   === foldrParts (++) (++) []  
(foldr                                            (list deforestation)
                 (\x xs -> if p x then fRight x xs else fLeft x xs)  
PartNil xs)))

	??? I think the rest of the proof can go through via the  
approximation lemma, or maybe some more inlining is sufficient...

   === xs
-}


splitBy :: (a -> Bool) -> [a] -> [[a]]
splitBy p = fromParts . parts . classify p

contiguousParts :: (a -> Bool) -> [a] -> [[a]]
contiguousParts p xs = build (\c n -> foldrParts (\_ x -> x) c n  
(parts (classify p xs)))

segmentsSatisfying :: (a -> Bool) -> [a] -> [[a]]
segmentsSatisfying p = fromParts . partsSep . classify p

splitOn :: Eq a => a -> [a] -> [[a]]
splitOn x = splitBy (==x)

words :: String -> [String]
words = contiguousParts isAlphaNum

lines :: String -> [String]
lines = segmentsSatisfying (/= '\n')



More information about the Libraries mailing list