File searching

Simon Marlow simonmar@microsoft.com
Thu, 12 Dec 2002 11:14:28 -0000


This is a multi-part message in MIME format.

------=_NextPart_000_0169_01C2A1CF.A487CA60
Content-Type: text/plain;
	charset="us-ascii"
Content-Transfer-Encoding: 7bit

Hi Folks,

I've attached a Haskell module that describes GHC's file searching
strategy in the form of a simplified implementation, which serves as a
precise description of what GHC actually does.  I'm hoping we can use
this as a starting point from which to discuss extensions.  I've already
added two extensions (see the source for details).

The simulation is complete, in that it covers what happens with
packages, what happens when you name a source file on the command-line,
what happens with files named in a ':load' command in GHCi, and so on.
All the various flags affecting GHC's search behaviour can also be
tweaked.

Enjoy...

Cheers,
	Simon

------=_NextPart_000_0169_01C2A1CF.A487CA60
Content-Type: application/octet-stream;
	name="search.hs"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment;
	filename="search.hs"

module GHCSearch (findModule, explicitFilename) where=0A=
=0A=
import Data.Maybe=0A=
import Data.Char=0A=
=0A=
-- This source file embodies GHC's file searching strategy.  There are=0A=
-- two entry points:=0A=
--=0A=
--   * findModule takes a module name and returns a list of =
(path,location)=0A=
--     pairs: if the file named in the first component exists, then the=0A=
--     location in the second component is used for that module.  It is =
an=0A=
--     error (currently unchecked by GHC) for more than one of the =
locations=0A=
--     to be valid.=0A=
--=0A=
--   * explicitFilenmae takes a string passed as an argument on the GHC=0A=
--     command-line (either one-shot or --make), or an argument from the=0A=
--     :load command in GHC.  It returns a list of the same form as =
findModule.=0A=
--=0A=
-- This module is basically a standalone version of =0A=
-- ghc/compiler/main/Finder.lhs in the compiler itself.=0A=
=0A=
-- Extensions relative to GHC:=0A=
--    - for a module A.B.C, we search for files named <patch>/A.B.C.<ext>=0A=
--	as well as <path>/A/B/C.<ext>.  This applies both to package and=0A=
--	home locations.=0A=
--=0A=
--    - the .hi file for an explicitly-named source file is now the=0A=
--	full module name rather than just the last component.=0A=
--=0A=
=0A=
-- Tunables:=0A=
search_paths =3D [".", "D1","D2"]  	-- modified by the -i option=0A=
src_sufs     =3D [ ".hs", ".lhs" ]	-- (not tunable in GHC)=0A=
hi_suf       =3D ".hi" 			-- or value of -hisuf flag=0A=
obj_suf      =3D ".o"			-- or value of -osuf flag=0A=
hi_dir       =3D Nothing 			-- set to (Just d) for -hidir d.=0A=
obj_dir	     =3D Nothing  		-- set to (Just d) for -odir d.=0A=
=0A=
-- For each package, we list the package name and its import path.=0A=
packages :: [(String, [FilePath])]=0A=
packages =3D [=0A=
  ("package1", ["P1"]),=0A=
  ("package1", ["P2"])=0A=
  ]=0A=
=0A=
package_hi_suf =3D ".hi"	-- not affected by the -hisuf flag, but changed=0A=
			-- by -prof to ".p_hi", and similarly for other "ways".=0A=
=0A=
type Module =3D String=0A=
=0A=
-- Information about the files associated with a module.=0A=
data ModLocation=0A=
  =3D ModLocation {=0A=
	locSource :: Maybe FilePath,=0A=
		-- the source, if it exists.  Modules in a package don't=0A=
		-- normally have source files.=0A=
=0A=
	locHi     :: FilePath,=0A=
		-- The interface file.  This might not exist (eg. in GHCi=0A=
		-- we don't normally write out interface files), but if we=0A=
		-- do create one, this is where it goes.=0A=
=0A=
	locObj    :: Maybe FilePath=0A=
		-- The object file.  Again, this might not exist, but if=0A=
		-- it does exist, its timestamp is used in deciding whether=0A=
		-- to recompile or not.=0A=
		-- =0A=
		-- Modules in a package don't normally have separate object=0A=
		-- files, so in that case this field is set to Nothing.=0A=
  }=0A=
  deriving Show=0A=
=0A=
=0A=
findModule :: Module -> [(FilePath,ModLocation)]=0A=
findModule mod =3D home_locs mod ++ package_locs mod=0A=
=0A=
explicitFilename :: FilePath -> [(FilePath,ModLocation)]=0A=
explicitFilename str=0A=
  | looks_like_a_module       =3D findModule str=0A=
  | looks_like_a_src_filename =3D [mk_src_loc (Just module_name) p m ext]=0A=
  | otherwise                 =3D []=0A=
  where =0A=
	looks_like_a_module =3D isUpper (head str) && all isModId (tail str)=0A=
	isModId c =3D isAlphaNum c || c =3D=3D '\''=0A=
=0A=
        (p,m,e) =3D splitFilename3 str  =0A=
	ext =3D '.':e=0A=
=0A=
	looks_like_a_src_filename =3D ext `elem` src_sufs=0A=
	module_name =3D getModuleName str=0A=
=0A=
-- Should return the module name for a given source file by peeking in=0A=
-- the file.  XXX bogus for now.=0A=
getModuleName str =3D "A.B.C"=0A=
=0A=
-- The possible locations in the "home package" for a given module=0A=
home_locs mod =3D =0A=
  [ mk_src_loc Nothing p m e=0A=
  | p <- search_paths, =0A=
    m <- [mod, dots_to_slashes mod],=0A=
    e <- src_sufs=0A=
  ]=0A=
  where dots_to_slashes =3D map f=0A=
	  where f '.' =3D '/'=0A=
		f c   =3D c=0A=
=0A=
-- The possible locations in other packages for a given module=0A=
package_locs mod =3D=0A=
  [ mk_hi_loc p m hi_suf=0A=
  | (pkg_name, paths) <- packages,=0A=
    p <- paths,=0A=
    m <- [mod, dots_to_slashes mod]=0A=
  ]=0A=
  where dots_to_slashes =3D map f=0A=
	  where f '.' =3D '/'=0A=
		f c   =3D c=0A=
=0A=
-- Make a ModLocation given the name of the source file.=0A=
mk_src_loc=0A=
	:: Maybe Module		-- Just mod <=3D> use mod as the basename for=0A=
				-- the .hi file.=0A=
	-> String		-- The directory containing the source file=0A=
	-> String		-- The basename of the source file=0A=
	-> String		-- The suffix of the source file=0A=
	-> (FilePath, ModLocation)=0A=
mk_src_loc maybe_mod path basename suf =3D =0A=
    (src_file, =0A=
     ModLocation {=0A=
	locSource =3D Just src_file,=0A=
	locHi     =3D hi_file,=0A=
	locObj    =3D Just obj_file=0A=
     }=0A=
    )=0A=
 where src_file =3D path ++ '/':basename ++ suf=0A=
=0A=
       hi_file  =3D hi_path ++ '/':hi_basename ++ hi_suf=0A=
       hi_path =0A=
	  | isJust hi_dir =3D fromJust hi_dir=0A=
	  | otherwise     =3D path=0A=
       hi_basename =0A=
	  | isJust maybe_mod =3D fromJust maybe_mod=0A=
	  | otherwise        =3D basename=0A=
=0A=
       obj_file =3D obj_path ++ '/':basename ++ obj_suf=0A=
       obj_path =0A=
	  | isJust obj_dir =3D fromJust obj_dir=0A=
	  | otherwise      =3D path=0A=
=0A=
mk_hi_loc path basename suf =3D=0A=
     (hi_file,=0A=
      ModLocation {=0A=
	locSource =3D Nothing,=0A=
	locHi	  =3D hi_file,=0A=
	locObj    =3D Nothing=0A=
      }=0A=
     )=0A=
  where hi_file =3D path ++ '/':basename ++ package_hi_suf=0A=
=0A=
-- =
-------------------------------------------------------------------------=
----=0A=
-- Utils=0A=
=0A=
type Suffix =3D String=0A=
=0A=
-- "foo/bar/xyzzy.ext" -> ("foo/bar", "xyzzy", ".ext")=0A=
splitFilename3 :: String -> (String,String,Suffix)=0A=
splitFilename3 str=0A=
   =3D let (dir, rest) =3D split_longest_prefix str (=3D=3D'/')=0A=
	 (name, ext) =3D splitFilename rest=0A=
	 real_dir | null dir  =3D "."=0A=
		  | otherwise =3D dir=0A=
     in  (real_dir, name, ext)=0A=
=0A=
splitFilename :: String -> (String,Suffix)=0A=
splitFilename f =3D split_longest_prefix f (=3D=3D'.')=0A=
=0A=
-- split a string at the last character where 'pred' is True,=0A=
-- returning a pair of strings. The first component holds the string=0A=
-- up (but not including) the last character for which 'pred' returned=0A=
-- True, the second whatever comes after (but also not including the=0A=
-- last character).=0A=
--=0A=
-- If 'pred' returns False for all characters in the string, the original=0A=
-- string is returned in the second component (and the first one is just=0A=
-- empty).=0A=
split_longest_prefix :: String -> (Char -> Bool) -> (String,String)=0A=
split_longest_prefix s pred=0A=
  =3D case pre of=0A=
	[]      -> ([], reverse suf)=0A=
	(_:pre) -> (reverse pre, reverse suf)=0A=
  where (suf,pre) =3D break pred (reverse s)=0A=
=0A=

------=_NextPart_000_0169_01C2A1CF.A487CA60--