[commit: ghc] master: Tweak the lexer: In particular, improve notFollowedBy and friends (c250f93)
Ian Lynagh
igloo at earth.li
Tue May 15 03:34:16 CEST 2012
Repository : ssh://darcs.haskell.org//srv/darcs/ghc
On branch : master
http://hackage.haskell.org/trac/ghc/changeset/c250f93bd38c7d8f6453dd79dd9951f9a02bf5a7
>---------------------------------------------------------------
commit c250f93bd38c7d8f6453dd79dd9951f9a02bf5a7
Author: Ian Lynagh <igloo at earth.li>
Date: Tue May 15 00:16:59 2012 +0100
Tweak the lexer: In particular, improve notFollowedBy and friends
We were hitting a problem when reading the LANGUAGE/OPTIONS pragmas
from GHC.TypeLits, where the buffer ended "{-". The rules for the
start-comment lexeme check that "{-" is not followed by "#", but the
test returned False when there was no next character. Therefore we
were lexing this as as an open-curly lexeme (only consuming the "{",
and not reaching the end of the buffer),
which meant the options parser think that it had reached the end of
the options.
Now we correctly lex as "{-".
>---------------------------------------------------------------
compiler/parser/Lexer.x | 21 ++++++++++++++-------
1 files changed, 14 insertions(+), 7 deletions(-)
diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x
index 378a25c..e40f7b2 100644
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -766,13 +766,17 @@ pop_and act span buf len = do _ <- popLexState
nextCharIs :: StringBuffer -> (Char -> Bool) -> Bool
nextCharIs buf p = not (atEnd buf) && p (currentChar buf)
+{-# INLINE nextCharIsNot #-}
+nextCharIsNot :: StringBuffer -> (Char -> Bool) -> Bool
+nextCharIsNot buf p = not (nextCharIs buf p)
+
notFollowedBy :: Char -> AlexAccPred Int
notFollowedBy char _ _ _ (AI _ buf)
- = nextCharIs buf (/=char)
+ = nextCharIsNot buf (== char)
notFollowedBySymbol :: AlexAccPred Int
notFollowedBySymbol _ _ _ (AI _ buf)
- = nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~")
+ = nextCharIsNot buf (`elem` "!#$%&*+./<=>?@\\^|-~")
-- We must reject doc comments as being ordinary comments everywhere.
-- In some cases the doc comment will be selected as the lexeme due to
@@ -782,13 +786,16 @@ notFollowedBySymbol _ _ _ (AI _ buf)
isNormalComment :: AlexAccPred Int
isNormalComment bits _ _ (AI _ buf)
| haddockEnabled bits = notFollowedByDocOrPragma
- | otherwise = nextCharIs buf (/='#')
+ | otherwise = nextCharIsNot buf (== '#')
where
notFollowedByDocOrPragma
- = not $ spaceAndP buf (`nextCharIs` (`elem` "|^*$#"))
+ = afterOptionalSpace buf (\b -> nextCharIsNot b (`elem` "|^*$#"))
-spaceAndP :: StringBuffer -> (StringBuffer -> Bool) -> Bool
-spaceAndP buf p = p buf || nextCharIs buf (==' ') && p (snd (nextChar buf))
+afterOptionalSpace :: StringBuffer -> (StringBuffer -> Bool) -> Bool
+afterOptionalSpace buf p
+ = if nextCharIs buf (== ' ')
+ then p (snd (nextChar buf))
+ else p buf
atEOL :: AlexAccPred Int
atEOL _ _ _ (AI _ buf) = atEnd buf || currentChar buf == '\n'
@@ -2341,7 +2348,7 @@ dispatch_pragmas prags span buf len = case Map.lookup (clean_pragma (lexemeToStr
known_pragma :: Map String Action -> AlexAccPred Int
known_pragma prags _ _ len (AI _ buf) = (isJust $ Map.lookup (clean_pragma (lexemeToString (offsetBytes (- len) buf) len)) prags)
- && (nextCharIs buf (\c -> not (isAlphaNum c || c == '_')))
+ && (nextCharIsNot buf (\c -> isAlphaNum c || c == '_'))
clean_pragma :: String -> String
clean_pragma prag = canon_ws (map toLower (unprefix prag))
More information about the Cvs-ghc
mailing list