Personal tools

Collaborative filtering

From HaskellWiki

(Difference between revisions)
Jump to: navigation, search
m
(Add update2)
Line 6: Line 6:
   
 
The predict' function replaces predict.
 
The predict' function replaces predict.
  +
The update2
   
 
<haskell>
 
<haskell>
Line 24: Line 25:
 
-- then so is the (SlopeOne item) type.
 
-- then so is the (SlopeOne item) type.
 
newtype SlopeOne item = SlopeOne (M.Map item (M.Map item (Count,RatingValue)))
 
newtype SlopeOne item = SlopeOne (M.Map item (M.Map item (Count,RatingValue)))
  +
deriving (Show)
  +
  +
-- The SlopeOne' matrix is an unormalized version of SlopeOne
  +
newtype SlopeOne' item = SlopeOne' (M.Map item (M.Map item (Count,RatingValue)))
 
deriving (Show)
 
deriving (Show)
   
 
empty = SlopeOne M.empty
 
empty = SlopeOne M.empty
  +
empty' = SlopeOne' M.empty
   
 
-- This performs a strict addition on pairs made of two nuumeric types
 
-- This performs a strict addition on pairs made of two nuumeric types
 
addT (a,b) (c,d) = let (l,r) = (a+c, b+d) in l `seq` r `seq` (l, r)
 
addT (a,b) (c,d) = let (l,r) = (a+c, b+d) in l `seq` r `seq` (l, r)
  +
   
 
-- There is never an entry for the "diagonal" elements with equal
 
-- There is never an entry for the "diagonal" elements with equal
 
-- items in the pair: (foo,foo) is never in the SlopeOne.
 
-- items in the pair: (foo,foo) is never in the SlopeOne.
 
update :: Ord item => SlopeOne item -> [Rating item] -> SlopeOne item
 
update :: Ord item => SlopeOne item -> [Rating item] -> SlopeOne item
update (SlopeOne matrixIn) usersRatings = SlopeOne . M.map (M.map norm) . foldl' update' matrixIn $ usersRatings
+
update (SlopeOne matrixInNormed) usersRatings =
  +
SlopeOne . M.map (M.map norm) . foldl' update' matrixIn $ usersRatings
 
where update' oldMatrix userRatings =
 
where update' oldMatrix userRatings =
 
foldl' (\oldMatrix (itemPair, rating) -> insert oldMatrix itemPair rating)
 
foldl' (\oldMatrix (itemPair, rating) -> insert oldMatrix itemPair rating)
Line 47: Line 54:
 
outer _ innerMap = M.insertWith' addT item2 newRating innerMap
 
outer _ innerMap = M.insertWith' addT item2 newRating innerMap
 
norm (count,total_rating) = (count, total_rating / fromIntegral count)
 
norm (count,total_rating) = (count, total_rating / fromIntegral count)
  +
un_norm (count,rating) = (count, rating * fromIntegral count)
  +
matrixIn = M.map (M.map un_norm) matrixInNormed
  +
  +
-- This version of update2 makes an unnormalize slopeOne' from each
  +
-- Rating and combines them using Map.union* operations and addT.
  +
update2 :: Ord item => SlopeOne' item -> [Rating item] -> SlopeOne' item
  +
update2 s [] = s
  +
update2 (SlopeOne' matrixIn) usersRatings =
  +
SlopeOne' . M.unionsWith (M.unionWith addT) . (matrixIn:) . map fromRating $ usersRatings
  +
where fromRating userRating = M.mapWithKey expand1 userRating
  +
where expand1 item1 rating1 = M.mapMaybeWithKey expand2 userRating
  +
where expand2 item2 rating2 | item1 == item2 = Nothing
  +
| otherwise = Just (1,rating1 - rating2)
 
 
 
predict :: Ord a => SlopeOne a -> Rating a -> Rating a
 
predict :: Ord a => SlopeOne a -> Rating a -> Rating a
Line 65: Line 85:
 
in M.filter (\norm_rating -> norm_rating > 0) normM
 
in M.filter (\norm_rating -> norm_rating > 0) normM
   
+
-- This is a modified version of predict. It also expect the
predict' :: Ord a => SlopeOne a -> Rating a -> Rating a
+
-- unnormalized SlopeOne' but this is a small detail
predict' (SlopeOne matrixIn) userRatings = M.mapMaybeWithKey calcItem matrixIn
+
predict' :: Ord a => SlopeOne' a -> Rating a -> Rating a
  +
predict' (SlopeOne' matrixIn) userRatings = M.mapMaybeWithKey calcItem matrixIn
 
where calcItem item1 innerMap | M.member item1 userRatings = Nothing
 
where calcItem item1 innerMap | M.member item1 userRatings = Nothing
 
| M.null combined = Nothing
 
| M.null combined = Nothing
Line 76: Line 96:
 
norm_rating = total_rating / fromIntegral total_count
 
norm_rating = total_rating / fromIntegral total_count
 
weight (count,rating) user_rating =
 
weight (count,rating) user_rating =
(count,fromIntegral count * (rating + user_rating))
+
(count,rating + fromIntegral count * user_rating)
   
 
freqs (SlopeOne matrixIn) userRatings =
 
freqs (SlopeOne matrixIn) userRatings =
Line 93: Line 113:
 
]
 
]
   
matrix = update empty userData
 
 
userInfo = M.fromList [("squid", 0.4)]
 
userInfo = M.fromList [("squid", 0.4)]
predictions = predict matrix userInfo
+
predictions' = predict' matrix userInfo
+
predictions = predict (update empty userData) userInfo
  +
  +
predictions' = predict' (update2 empty' userData) userInfo
 
</haskell>
 
</haskell>

Revision as of 19:21, 28 August 2007

This page was added to discuss different versions of the code for collaborative filtering at Bryan's blog.

Chris' version

I renamed the variables and then reorganized the code a bit.

The predict' function replaces predict. The update2

module WeightedSlopeOne (Rating, SlopeOne, empty, predict, update) where
 
import Data.List (foldl',foldl1')
import qualified Data.Map as M
 
-- The item type is a polymorphic parameter.  Since it goes into a Map
-- it must be able to be compared, so item must be an instance of Ord.
type Count = Int
type RatingValue = Double
-- The Rating is the known (item,Rating) information for a particular "user"
type Rating item = M.Map item RatingValue
 
-- The SlopeOne matrix is indexed by pairs of items and is implmeneted
-- as a sparse map of maps.  If the item type is an instance of Show
-- then so is the (SlopeOne item) type.
newtype SlopeOne item = SlopeOne (M.Map item (M.Map item (Count,RatingValue)))
  deriving (Show)
 
-- The SlopeOne' matrix is an unormalized version of SlopeOne
newtype SlopeOne' item = SlopeOne' (M.Map item (M.Map item (Count,RatingValue)))
  deriving (Show)
 
empty = SlopeOne M.empty
empty' = SlopeOne' M.empty
 
-- This performs a strict addition on pairs made of two nuumeric types
addT (a,b) (c,d) = let (l,r) = (a+c, b+d) in l `seq` r `seq` (l, r)
 
 
-- There is never an entry for the "diagonal" elements with equal
-- items in the pair: (foo,foo) is never in the SlopeOne.
update :: Ord item => SlopeOne item -> [Rating item] -> SlopeOne item
update (SlopeOne matrixInNormed) usersRatings =
    SlopeOne . M.map (M.map norm) . foldl' update' matrixIn $ usersRatings
  where update' oldMatrix userRatings =
          foldl' (\oldMatrix (itemPair, rating) -> insert oldMatrix itemPair rating)
                 oldMatrix itemCombos
          where itemCombos = [ ((item1, item2), (1, rating1 - rating2)) 
                             | (item1, rating1) <- ratings
                             , (item2, rating2) <- ratings
                             , item1 /= item2]
                ratings = M.toList userRatings
        insert outerMap (item1, item2) newRating = M.insertWith' outer item1 newOuterEntry outerMap
          where newOuterEntry = M.singleton item2 newRating
                outer _ innerMap = M.insertWith' addT item2 newRating innerMap
        norm (count,total_rating) = (count, total_rating / fromIntegral count)
        un_norm (count,rating) = (count, rating * fromIntegral count)
        matrixIn = M.map (M.map un_norm) matrixInNormed
 
-- This version of update2 makes an unnormalize slopeOne' from each
-- Rating and combines them using Map.union* operations and addT.
update2 :: Ord item => SlopeOne' item -> [Rating item] -> SlopeOne' item
update2 s [] = s
update2 (SlopeOne' matrixIn) usersRatings =
    SlopeOne' . M.unionsWith (M.unionWith addT) . (matrixIn:) . map fromRating $ usersRatings
  where fromRating userRating = M.mapWithKey expand1 userRating
          where expand1 item1 rating1 = M.mapMaybeWithKey expand2 userRating
                  where expand2 item2 rating2 | item1 == item2 = Nothing
                                              | otherwise = Just (1,rating1 - rating2)
 
predict :: Ord a => SlopeOne a -> Rating a -> Rating a
predict (SlopeOne matrixIn) userRatings =
  let freqM = foldl' insert M.empty
                     [ (item1,found_rating,user_rating)
                     | (item1,innerMap) <- M.assocs matrixIn
                     , M.notMember item1 userRatings
                     , (user_item, user_rating) <- M.toList userRatings
                     , item1 /= user_item
                     , found_rating <- M.lookup user_item innerMap
                     ]
      insert oldM (item1,found_rating,user_rating) =
        let (count,norm_rating) = found_rating
            total_rating = fromIntegral count * (norm_rating + user_rating)
        in M.insertWith' addT item1 (count,total_rating) oldM
      normM = M.map (\(count, total_rating) -> total_rating / fromIntegral count) freqM
  in M.filter (\norm_rating -> norm_rating > 0) normM
 
-- This is a modified version of predict.  It also expect the
-- unnormalized SlopeOne' but this is a small detail
predict' :: Ord a => SlopeOne' a -> Rating a -> Rating a
predict' (SlopeOne' matrixIn) userRatings = M.mapMaybeWithKey calcItem matrixIn
  where calcItem item1 innerMap | M.member item1 userRatings = Nothing
                                | M.null combined = Nothing
                                | norm_rating <= 0 = Nothing
                                | otherwise = Just norm_rating
          where combined = M.intersectionWith weight innerMap userRatings
                (total_count,total_rating) = foldl1' addT (M.elems combined)
                norm_rating = total_rating / fromIntegral total_count
        weight (count,rating) user_rating =
          (count,rating + fromIntegral count *  user_rating)
 
freqs (SlopeOne matrixIn) userRatings =
  let freqs = [ (item1,item,rating,find (item1,item))
              | item1 <- M.keys matrixIn
              , (item, rating) <- M.toList userRatings]
      find (item1,item) = M.findWithDefault (0,0) item (matrixIn M.! item1)
  in freqs
 
userData :: [Rating String]
userData = map M.fromList [
 [("squid", 1.0), ("cuttlefish", 0.5), ("octopus", 0.2)],
 [("squid", 1.0), ("octopus", 0.5), ("nautilus", 0.2)],
 [("squid", 0.2), ("octopus", 1.0), ("cuttlefish", 0.4), ("nautilus", 0.4)],
 [("cuttlefish", 0.9), ("octopus", 0.4), ("nautilus", 0.5)]
 ]
 
userInfo = M.fromList [("squid", 0.4)]
 
predictions = predict (update empty userData) userInfo
 
predictions' = predict' (update2 empty' userData) userInfo