Collaborative filtering
From HaskellWiki
(Difference between revisions)
m |
(Add update2) |
||
| Line 6: | Line 6: | ||
The predict' function replaces predict. | The predict' function replaces predict. | ||
| + | The update2 | ||
<haskell> | <haskell> | ||
| Line 24: | Line 25: | ||
-- then so is the (SlopeOne item) type. | -- then so is the (SlopeOne item) type. | ||
newtype SlopeOne item = SlopeOne (M.Map item (M.Map item (Count,RatingValue))) | newtype SlopeOne item = SlopeOne (M.Map item (M.Map item (Count,RatingValue))) | ||
| + | deriving (Show) | ||
| + | |||
| + | -- The SlopeOne' matrix is an unormalized version of SlopeOne | ||
| + | newtype SlopeOne' item = SlopeOne' (M.Map item (M.Map item (Count,RatingValue))) | ||
deriving (Show) | deriving (Show) | ||
empty = SlopeOne M.empty | empty = SlopeOne M.empty | ||
| + | empty' = SlopeOne' M.empty | ||
-- This performs a strict addition on pairs made of two nuumeric types | -- This performs a strict addition on pairs made of two nuumeric types | ||
addT (a,b) (c,d) = let (l,r) = (a+c, b+d) in l `seq` r `seq` (l, r) | addT (a,b) (c,d) = let (l,r) = (a+c, b+d) in l `seq` r `seq` (l, r) | ||
| + | |||
-- There is never an entry for the "diagonal" elements with equal | -- There is never an entry for the "diagonal" elements with equal | ||
-- items in the pair: (foo,foo) is never in the SlopeOne. | -- items in the pair: (foo,foo) is never in the SlopeOne. | ||
update :: Ord item => SlopeOne item -> [Rating item] -> SlopeOne item | update :: Ord item => SlopeOne item -> [Rating item] -> SlopeOne item | ||
| - | update (SlopeOne | + | update (SlopeOne matrixInNormed) usersRatings = |
| + | SlopeOne . M.map (M.map norm) . foldl' update' matrixIn $ usersRatings | ||
where update' oldMatrix userRatings = | where update' oldMatrix userRatings = | ||
foldl' (\oldMatrix (itemPair, rating) -> insert oldMatrix itemPair rating) | foldl' (\oldMatrix (itemPair, rating) -> insert oldMatrix itemPair rating) | ||
| Line 47: | Line 55: | ||
outer _ innerMap = M.insertWith' addT item2 newRating innerMap | outer _ innerMap = M.insertWith' addT item2 newRating innerMap | ||
norm (count,total_rating) = (count, total_rating / fromIntegral count) | norm (count,total_rating) = (count, total_rating / fromIntegral count) | ||
| + | un_norm (count,rating) = (count, rating * fromIntegral count) | ||
| + | matrixIn = M.map (M.map un_norm) matrixInNormed | ||
| + | |||
| + | -- This version of update2 makes an unnormalize slopeOne' from each | ||
| + | -- Rating and combines them using Map.union* operations and addT. | ||
| + | update2 :: Ord item => SlopeOne' item -> [Rating item] -> SlopeOne' item | ||
| + | update2 s [] = s | ||
| + | update2 (SlopeOne' matrixIn) usersRatings = | ||
| + | SlopeOne' . M.unionsWith (M.unionWith addT) . (matrixIn:) . map fromRating $ usersRatings | ||
| + | where fromRating userRating = M.mapWithKey expand1 userRating | ||
| + | where expand1 item1 rating1 = M.mapMaybeWithKey expand2 userRating | ||
| + | where expand2 item2 rating2 | item1 == item2 = Nothing | ||
| + | | otherwise = Just (1,rating1 - rating2) | ||
predict :: Ord a => SlopeOne a -> Rating a -> Rating a | predict :: Ord a => SlopeOne a -> Rating a -> Rating a | ||
| Line 65: | Line 86: | ||
in M.filter (\norm_rating -> norm_rating > 0) normM | in M.filter (\norm_rating -> norm_rating > 0) normM | ||
| - | + | -- This is a modified version of predict. It also expect the | |
| - | predict' :: Ord a => SlopeOne a -> Rating a -> Rating a | + | -- unnormalized SlopeOne' but this is a small detail |
| - | predict' (SlopeOne matrixIn) userRatings = M.mapMaybeWithKey calcItem matrixIn | + | predict' :: Ord a => SlopeOne' a -> Rating a -> Rating a |
| + | predict' (SlopeOne' matrixIn) userRatings = M.mapMaybeWithKey calcItem matrixIn | ||
where calcItem item1 innerMap | M.member item1 userRatings = Nothing | where calcItem item1 innerMap | M.member item1 userRatings = Nothing | ||
| M.null combined = Nothing | | M.null combined = Nothing | ||
| Line 76: | Line 98: | ||
norm_rating = total_rating / fromIntegral total_count | norm_rating = total_rating / fromIntegral total_count | ||
weight (count,rating) user_rating = | weight (count,rating) user_rating = | ||
| - | (count,fromIntegral count * | + | (count,rating + fromIntegral count * user_rating) |
freqs (SlopeOne matrixIn) userRatings = | freqs (SlopeOne matrixIn) userRatings = | ||
| Line 93: | Line 115: | ||
] | ] | ||
| - | |||
userInfo = M.fromList [("squid", 0.4)] | userInfo = M.fromList [("squid", 0.4)] | ||
| - | predictions = predict | + | |
| - | predictions' = predict' | + | predictions = predict (update empty userData) userInfo |
| + | |||
| + | predictions' = predict' (update2 empty' userData) userInfo | ||
</haskell> | </haskell> | ||
Revision as of 19:21, 28 August 2007
This page was added to discuss different versions of the code for collaborative filtering at Bryan's blog.
Chris' version
I renamed the variables and then reorganized the code a bit.
The predict' function replaces predict. The update2
module WeightedSlopeOne (Rating, SlopeOne, empty, predict, update) where import Data.List (foldl',foldl1') import qualified Data.Map as M -- The item type is a polymorphic parameter. Since it goes into a Map -- it must be able to be compared, so item must be an instance of Ord. type Count = Int type RatingValue = Double -- The Rating is the known (item,Rating) information for a particular "user" type Rating item = M.Map item RatingValue -- The SlopeOne matrix is indexed by pairs of items and is implmeneted -- as a sparse map of maps. If the item type is an instance of Show -- then so is the (SlopeOne item) type. newtype SlopeOne item = SlopeOne (M.Map item (M.Map item (Count,RatingValue))) deriving (Show) -- The SlopeOne' matrix is an unormalized version of SlopeOne newtype SlopeOne' item = SlopeOne' (M.Map item (M.Map item (Count,RatingValue))) deriving (Show) empty = SlopeOne M.empty empty' = SlopeOne' M.empty -- This performs a strict addition on pairs made of two nuumeric types addT (a,b) (c,d) = let (l,r) = (a+c, b+d) in l `seq` r `seq` (l, r) -- There is never an entry for the "diagonal" elements with equal -- items in the pair: (foo,foo) is never in the SlopeOne. update :: Ord item => SlopeOne item -> [Rating item] -> SlopeOne item update (SlopeOne matrixInNormed) usersRatings = SlopeOne . M.map (M.map norm) . foldl' update' matrixIn $ usersRatings where update' oldMatrix userRatings = foldl' (\oldMatrix (itemPair, rating) -> insert oldMatrix itemPair rating) oldMatrix itemCombos where itemCombos = [ ((item1, item2), (1, rating1 - rating2)) | (item1, rating1) <- ratings , (item2, rating2) <- ratings , item1 /= item2] ratings = M.toList userRatings insert outerMap (item1, item2) newRating = M.insertWith' outer item1 newOuterEntry outerMap where newOuterEntry = M.singleton item2 newRating outer _ innerMap = M.insertWith' addT item2 newRating innerMap norm (count,total_rating) = (count, total_rating / fromIntegral count) un_norm (count,rating) = (count, rating * fromIntegral count) matrixIn = M.map (M.map un_norm) matrixInNormed -- This version of update2 makes an unnormalize slopeOne' from each -- Rating and combines them using Map.union* operations and addT. update2 :: Ord item => SlopeOne' item -> [Rating item] -> SlopeOne' item update2 s [] = s update2 (SlopeOne' matrixIn) usersRatings = SlopeOne' . M.unionsWith (M.unionWith addT) . (matrixIn:) . map fromRating $ usersRatings where fromRating userRating = M.mapWithKey expand1 userRating where expand1 item1 rating1 = M.mapMaybeWithKey expand2 userRating where expand2 item2 rating2 | item1 == item2 = Nothing | otherwise = Just (1,rating1 - rating2) predict :: Ord a => SlopeOne a -> Rating a -> Rating a predict (SlopeOne matrixIn) userRatings = let freqM = foldl' insert M.empty [ (item1,found_rating,user_rating) | (item1,innerMap) <- M.assocs matrixIn , M.notMember item1 userRatings , (user_item, user_rating) <- M.toList userRatings , item1 /= user_item , found_rating <- M.lookup user_item innerMap ] insert oldM (item1,found_rating,user_rating) = let (count,norm_rating) = found_rating total_rating = fromIntegral count * (norm_rating + user_rating) in M.insertWith' addT item1 (count,total_rating) oldM normM = M.map (\(count, total_rating) -> total_rating / fromIntegral count) freqM in M.filter (\norm_rating -> norm_rating > 0) normM -- This is a modified version of predict. It also expect the -- unnormalized SlopeOne' but this is a small detail predict' :: Ord a => SlopeOne' a -> Rating a -> Rating a predict' (SlopeOne' matrixIn) userRatings = M.mapMaybeWithKey calcItem matrixIn where calcItem item1 innerMap | M.member item1 userRatings = Nothing | M.null combined = Nothing | norm_rating <= 0 = Nothing | otherwise = Just norm_rating where combined = M.intersectionWith weight innerMap userRatings (total_count,total_rating) = foldl1' addT (M.elems combined) norm_rating = total_rating / fromIntegral total_count weight (count,rating) user_rating = (count,rating + fromIntegral count * user_rating) freqs (SlopeOne matrixIn) userRatings = let freqs = [ (item1,item,rating,find (item1,item)) | item1 <- M.keys matrixIn , (item, rating) <- M.toList userRatings] find (item1,item) = M.findWithDefault (0,0) item (matrixIn M.! item1) in freqs userData :: [Rating String] userData = map M.fromList [ [("squid", 1.0), ("cuttlefish", 0.5), ("octopus", 0.2)], [("squid", 1.0), ("octopus", 0.5), ("nautilus", 0.2)], [("squid", 0.2), ("octopus", 1.0), ("cuttlefish", 0.4), ("nautilus", 0.4)], [("cuttlefish", 0.9), ("octopus", 0.4), ("nautilus", 0.5)] ] userInfo = M.fromList [("squid", 0.4)] predictions = predict (update empty userData) userInfo predictions' = predict' (update2 empty' userData) userInfo
