trying to tie the knot

oleg@pobox.com oleg@pobox.com
Fri, 12 Apr 2002 19:09:03 -0700 (PDT)


Hello!

Hal Daume III wrote:
[description of a parsing problem that involves forward references]

Forward references is the problem. To properly solve it, you have to
find a fixpoint. The best way to avoid hitting the bottom is to make
sure that the fixpoint combinator is applied to a function. Hence the
solution:

type TreeDictLate = [(String,DecisionTreeLate)] -- lookup for subtrees
newtype DecisionTreeLate = DTL (TreeDictLate -> DecisionTree)

ft (DTL late_tree) st = late_tree st

readDecisionTree :: String -> DecisionTree
readDecisionTree s = 
  let (_, wholeTreeLate, subTrees) 
              = readDecisionTree' False [] (filter (/=[]) (lines s))
  in ft wholeTreeLate subTrees

The function readDecisionTree' will return a delayed decision tree: a
function that _will_ yield the decision tree when it is applied to the
forest dictionary. The forest dictionary is itself an assoc list of
tree labels and _late_ decision trees.

Now the test "readDecisionTree $ unlines simpleDT3" passes as well,
and gives the reasonable result:

simpleDT3 = [
   "isArgument0 = t: u (33.0/1.4)",
   "isArgument0 = f:",
   "|   isArgument1 = f :[S1]",
   "|   isArgument1 = t:",
   "|   |   isRecursive1 = t: s (945.0/39.8)",
   "|   |   isRecursive1 = f: u (2.0/1.0)",
   "",
   "Subtree [S1]",
   "",
   "localDefCount <= 15 : u (281.0/1.4)",
   "localDefCount > 15 : s (139.0/11.8)"]


DecisionTree> readDecisionTree $ unlines simpleDT3

Test "isArgument0" "=" "t" (Value "u" 33.0 1.4)
  (Test "isArgument0" "=" "f"
    (Test "isArgument1" "=" "f" 
      (Test "localDefCount" "<=" "15" (Value "u" 281.0 1.4) 
				      (Value "s" 139.0 11.8))
      (Test "isArgument1" "=" "t"
        (Test "isRecursive1" "=" "t" (Value "s" 945.0 39.8)
                                     (Value "u" 2.0 1.0))
        (Value "" 0.0 0.0)))
     (Value "" 0.0 0.0)) 

which seems reasonable.

And even the following passes:
simpleDT4 = [
   "isArgument0 = t: u (33.0/1.4)",
   "isArgument0 = f:",
   "|   isArgument1 = f :[S1]",
   "|   isArgument1 = t :[S2]",
   "",
   "Subtree [S1]",
   "",
   "localDefCount <= 15 : [S2]",
   "localDefCount > 15 : s (139.0/11.8)",
   "",
   "Subtree [S2]",
   "",
   "ll <= 15 : u (2.0/1.4)",
   "ll > 15 : s (1.0/11.8)"]
readDecisionTree $ unlines simpleDT4
[skipped]

The code enclosed. BTW, it seemed the original code had a few bugs.

module DecisionTree where

import IO
import List

data DecisionTree = Test String String String DecisionTree DecisionTree | 
     Value String Double Double
     deriving (Show, Eq, Ord, Read)

type TreeDictLate = [(String,DecisionTreeLate)] -- lookup for subtrees
newtype DecisionTreeLate = DTL (TreeDictLate -> DecisionTree)

ft (DTL late_tree) st = late_tree st

readDecisionTree :: String -> DecisionTree
readDecisionTree s = 
  let (_, wholeTreeLate, subTrees) 
              = readDecisionTree' False [] (filter (/=[]) (lines s))
  in ft wholeTreeLate subTrees

readDecisionTree' :: Bool -> TreeDictLate -> [String] -> ([String],  DecisionTreeLate, TreeDictLate)

readDecisionTree' _ subTrees [] = ([], DTL $ \st -> Value "" 0 0, subTrees)

readDecisionTree' areValue subTrees (x:xs) =
   let (lineDepth, lineType, values') = readLine x
       (subTreesX,xs1) = if xs /= [] && "Subtree" `isPrefixOf` head xs
                         then readSubTrees subTrees xs
		         else (subTrees,xs)
       (xs',   lhs,   subTrees')   = readDecisionTree' False subTreesX  xs1
       (xs'' , rhs,   subTrees'')  = readDecisionTree' False subTrees' xs'
       (xs''', other, subTrees''') = readDecisionTree' True  subTreesX  xs1
       values = values' ++ ["0.0"]
   in  if lineType   -- are we a value
       then if areValue
            then (xs1,    DTL $ \st->Value (values !! 3) (read (values !! 4)) (read (values !! 5)), subTreesX)
	    else (xs''', DTL $ \st->Test (values !! 0) (values !! 1) (values !! 2) (Value (values !! 3) (read (values !! 4)) (read (values !! 5))) (ft other st), subTrees''')
       else if '[' == head (last values')   -- are we a subtree?
            then (xs'', DTL $ \st-> 
	                let (Just dt) = lookup (last values') st
		        in Test (values !! 0) (values !! 1) (values !!2) (ft dt st) (ft lhs st), subTrees')
	    else (xs'', DTL $ \st->Test (values !! 0) (values !! 1) (values !! 2) (ft lhs st) (ft rhs st), subTrees'')

readSubTrees subTrees [] = (subTrees,[])

readSubTrees subTrees (x:xs)
	| "Subtree" `isPrefixOf` x =
	       let name = (words x) !! 1
		   treeDef = takeWhile (\x -> not ("Subtree" `isPrefixOf` x)) xs
		   rest    = dropWhile (\x -> not ("Subtree" `isPrefixOf` x)) xs
		   (_, thisDT, _) = readDecisionTree' False subTrees treeDef
	       in  readSubTrees ((name,thisDT):subTrees) rest
	| otherwise = (subTrees,(x:xs))

readLine :: String -> (Int,Bool,[String])  -- True = Value, False = Test
readLine s = (length (elemIndices '|' s), ')' `elem` s, vals)
   where vals = words $ 
	        map (\x -> if x `elem` ":()/" then ' ' else x) $
		dropWhile (`elem` "| ") s

simpleDT = 
   ["localDefCountSum <= 4 : p (101.0/6.0)",
   "localDefCountSum > 4 : u (7.0)"]

simpleDT2 = [
  "isArgument0 = t: u (33.0/1.4)",
  "isArgument0 = f:",
  "|   isArgument1 = f: u (9.0/1.3)",
  "|   isArgument1 = t:",
  "|   |   isRecursive1 = t: s (945.0/39.8)",
  "|   |   isRecursive1 = f: u (2.0/1.0)"]

{-
Test "isArgument0" "=" "t" 
(Value "u" 33.0 1.4) 
(Test "isArgument0" "=" "f" 
(Test "isArgument1" "=" "f" 
(Value "u" 9.0 1.3) 
(Test "isArgument1" "=" "t" 
(Test "isRecursive1" "=" "t" 
(Value "s" 945.0 39.8) 
(Value "u" 2.0 1.0)) 
(Value "" 0.0 0.0))) 
(Value "" 0.0 0.0))
-}

simpleDT3 = [
   "isArgument0 = t: u (33.0/1.4)",
   "isArgument0 = f:",
   "|   isArgument1 = f :[S1]",
   "|   isArgument1 = t:",
   "|   |   isRecursive1 = t: s (945.0/39.8)",
   "|   |   isRecursive1 = f: u (2.0/1.0)",
   "",
   "Subtree [S1]",
   "",
   "localDefCount <= 15 : u (281.0/1.4)",
   "localDefCount > 15 : s (139.0/11.8)"]

simpleDT4 = [
   "isArgument0 = t: u (33.0/1.4)",
   "isArgument0 = f:",
   "|   isArgument1 = f :[S1]",
   "|   isArgument1 = t :[S2]",
   "",
   "Subtree [S1]",
   "",
   "localDefCount <= 15 : [S2]",
   "localDefCount > 15 : s (139.0/11.8)",
   "",
   "Subtree [S2]",
   "",
   "ll <= 15 : u (2.0/1.4)",
   "ll > 15 : s (1.0/11.8)"]

--readDecisionTree $ unlines simpleDT