[commit: base] master: Add System.IO.char8, the encoding used by openBinaryFile, (245a3e3)

Simon Marlow marlowsd at gmail.com
Tue May 24 15:13:40 CEST 2011


Repository : ssh://darcs.haskell.org//srv/darcs/packages/base

On branch  : master

http://hackage.haskell.org/trac/ghc/changeset/245a3e3e650e1b110f620e39925bfb0cc9b93002

>---------------------------------------------------------------

commit 245a3e3e650e1b110f620e39925bfb0cc9b93002
Author: Simon Marlow <marlowsd at gmail.com>
Date:   Tue Apr 5 09:57:22 2011 +0100

    Add System.IO.char8, the encoding used by openBinaryFile,
    and correct the documentation for hSetBinaryMode which claimed that
    it was using the latin1 encoding when in fact it was using an
    unchecked modulo-256 version of it.

>---------------------------------------------------------------

 GHC/IO/Encoding.hs |   13 ++++++++++++-
 GHC/IO/Handle.hs   |    2 +-
 System/IO.hs       |    1 +
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/GHC/IO/Encoding.hs b/GHC/IO/Encoding.hs
index 505824e..92ca843 100644
--- a/GHC/IO/Encoding.hs
+++ b/GHC/IO/Encoding.hs
@@ -22,6 +22,7 @@ module GHC.IO.Encoding (
   utf16, utf16le, utf16be,
   utf32, utf32le, utf32be, 
   localeEncoding, fileSystemEncoding, foreignEncoding,
+  char8,
   mkTextEncoding,
   ) where
 
@@ -125,6 +126,16 @@ fileSystemEncoding = CodePage.mkLocaleEncoding RoundtripFailure
 foreignEncoding = CodePage.mkLocaleEncoding IgnoreCodingFailure
 #endif
 
+-- | An encoding in which Unicode code points are translated to bytes
+-- by taking the code point modulo 256.  When decoding, bytes are
+-- translated directly into the equivalent code point.
+--
+-- This encoding never fails in either direction.  However, encoding
+-- discards informaiton, so encode followed by decode is not the
+-- identity.
+char8 :: TextEncoding
+char8 = Latin1.latin1
+
 -- | Look up the named Unicode encoding.  May fail with 
 --
 --  * 'isDoesNotExistError' if the encoding is unknown
@@ -183,7 +194,7 @@ mkTextEncoding e = case mb_coding_failure_mode of
                                             ("unknown encoding:" ++ e)  Nothing Nothing)
 
 latin1_encode :: CharBuffer -> Buffer Word8 -> IO (CharBuffer, Buffer Word8)
-latin1_encode input output = fmap (\(_why,input',output') -> (input',output')) $ Latin1.latin1_encode input output -- unchecked, used for binary
+latin1_encode input output = fmap (\(_why,input',output') -> (input',output')) $ Latin1.latin1_encode input output -- unchecked, used for char8
 --latin1_encode = unsafePerformIO $ do mkTextEncoder Iconv.latin1 >>= return.encode
 
 latin1_decode :: Buffer Word8 -> CharBuffer -> IO (Buffer Word8, CharBuffer)
diff --git a/GHC/IO/Handle.hs b/GHC/IO/Handle.hs
index f42fd55..fcfa92d 100644
--- a/GHC/IO/Handle.hs
+++ b/GHC/IO/Handle.hs
@@ -551,7 +551,7 @@ hIsTerminalDevice handle = do
 -- | Select binary mode ('True') or text mode ('False') on a open handle.
 -- (See also 'openBinaryFile'.)
 --
--- This has the same effect as calling 'hSetEncoding' with 'latin1', together
+-- This has the same effect as calling 'hSetEncoding' with 'char8', together
 -- with 'hSetNewlineMode' with 'noNewlineTranslation'.
 --
 hSetBinaryMode :: Handle -> Bool -> IO ()
diff --git a/System/IO.hs b/System/IO.hs
index ab52244..bf26835 100644
--- a/System/IO.hs
+++ b/System/IO.hs
@@ -201,6 +201,7 @@ module System.IO (
     utf16, utf16le, utf16be,
     utf32, utf32le, utf32be, 
     localeEncoding,
+    char8,
     mkTextEncoding,
 #endif
 





More information about the Cvs-libraries mailing list