-{--
+{--
This is a script to generate the necessary tables to support Windows code page
-encoding/decoding.
+encoding/decoding.
License: see libraries/base/LICENSE
tables required for the CJK double-byte codepages are too large to be
statically linked into every executable. We plan to add support for them once
GHC is able to produce Windows DLLs.
-
---}
+--}
module Main where
main :: IO ()
main = do
moduleName:outFile:files <- getArgs
- sbes <- mapM readMapAndIx files
+ let badFiles = -- These fail with an error like
+ -- MakeTable: Enum.toEnum{Word8}: tag (33088) is outside of bounds (0,255)
+ -- I have no idea what's going on, so for now we just
+ -- skip them.
+ ["CPs/CP932.TXT",
+ "CPs/CP936.TXT",
+ "CPs/CP949.TXT",
+ "CPs/CP950.TXT"]
+ let files' = filter (`notElem` badFiles) files
+ sbes <- mapM readMapAndIx files'
+ putStrLn "Writing output"
withBinaryFile outFile WriteMode $ flip hPutStr
- $ unlines $ makeTableFile moduleName files sbes
+ $ unlines $ makeTableFile moduleName files' sbes
where
readMapAndIx f = do
+ putStrLn ("Reading " ++ f)
m <- readMap f
return (codePageNum f, m)
readCharHex s = if c > fromEnum (maxBound :: Word16)
then error "Can't handle non-BMP character."
else toEnum c
- where c = readHex' s
+ where c = readHex' s
-------------------------------------------
++ [" ]"]
mkTableEntry (i,m) = " (" ++ show i ++ ", " ++ makeSBE m ++ " )"
blockSizeText = ["blockBitSize :: Int", "blockBitSize = " ++ show blockBitSize]
-
+
makeSBE :: Map.Map Word8 Char -> String
makeSBE m = unlines
[ "SingleByteCP {"
, " decoderArray = " ++ mkConvArray es
- , " , encoderArray = " ++ mkCompactArray (swapMap m)
+ , " , encoderArray = " ++ mkCompactArray (swapMap m)
, " }"
]
where
type CompressState b = (Map.Map Int [b], Map.Map [b] Int)
-- each entry in the list corresponds to a block of size n.
-compress :: (Bounded a, Enum a, Ord a, Bounded b, Ord b) => Int -> Map.Map a b
+compress :: (Bounded a, Enum a, Ord a, Bounded b, Ord b) => Int -> Map.Map a b
-> ([Int], CompressState b)
compress n ms = runState (mapM lookupOrAdd chunks) (Map.empty, Map.empty)
where
-- Static parts of the generated module.
languageDirectives :: [String]
-languageDirectives = ["{-# LANGUAGE MagicHash #-}"]
+languageDirectives = ["{-# LANGUAGE CPP, MagicHash, NoImplicitPrelude #-}"]
firstComment :: [FilePath] -> [String]
firstComment files = map ("-- " ++) $
[ "Do not edit this file directly!"
- , "It was generated by the MakeTable.hs script using the following files:"
+ , "It was generated by the MakeTable.hs script using the files below."
+ , "To regenerate it, run \"make\" in ../../../../codepages/"
+ , ""
+ , "Files:"
] ++ map takeFileName files
theImports :: [String]
theImports = map ("import " ++ )
- ["GHC.Prim", "GHC.Base", "GHC.Word", "GHC.Num"]
+ ["GHC.Prim", "GHC.Base", "GHC.Word"]
theTypes :: [String]
theTypes = [ "data ConvArray a = ConvArray Addr#"
instance Embed Word8 where
mkHex = showHex'
-
+
instance Embed Word16 where
mkHex = repDualByte
showHex' s = "\\x" ++ showHex s ""
repDualByte :: Enum c => c -> String
-repDualByte c
+repDualByte c
| n >= 2^(16::Int) = error "value is too high!"
-- NOTE : this assumes little-endian architecture. But we're only using this on Windows,
-- so it's probably OK.