X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=codepages%2FMakeTable.hs;h=951274e8a434100e2da6b7378d56939ea315af26;hb=ff563091fe7552d348011770042ea94aac85f82f;hp=af123df4dddecef4fa40f182618c6b2a9997a25c;hpb=de76ff60453a0a57a9a942e4f0130ec2eca921f0;p=ghc-base.git diff --git a/codepages/MakeTable.hs b/codepages/MakeTable.hs index af123df..951274e 100644 --- a/codepages/MakeTable.hs +++ b/codepages/MakeTable.hs @@ -1,6 +1,6 @@ -{-- +{-- This is a script to generate the necessary tables to support Windows code page -encoding/decoding. +encoding/decoding. License: see libraries/base/LICENSE @@ -14,8 +14,7 @@ Currently, this script only supports single-byte encodings, since the lookup tables required for the CJK double-byte codepages are too large to be statically linked into every executable. We plan to add support for them once GHC is able to produce Windows DLLs. - ---} +--} module Main where @@ -33,11 +32,22 @@ import Control.Exception(evaluate) main :: IO () main = do moduleName:outFile:files <- getArgs - sbes <- mapM readMapAndIx files + let badFiles = -- These fail with an error like + -- MakeTable: Enum.toEnum{Word8}: tag (33088) is outside of bounds (0,255) + -- I have no idea what's going on, so for now we just + -- skip them. + ["CPs/CP932.TXT", + "CPs/CP936.TXT", + "CPs/CP949.TXT", + "CPs/CP950.TXT"] + let files' = filter (`notElem` badFiles) files + sbes <- mapM readMapAndIx files' + putStrLn "Writing output" withBinaryFile outFile WriteMode $ flip hPutStr - $ unlines $ makeTableFile moduleName files sbes + $ unlines $ makeTableFile moduleName files' sbes where readMapAndIx f = do + putStrLn ("Reading " ++ f) m <- readMap f return (codePageNum f, m) @@ -70,7 +80,7 @@ readCharHex :: String -> Char readCharHex s = if c > fromEnum (maxBound :: Word16) then error "Can't handle non-BMP character." else toEnum c - where c = readHex' s + where c = readHex' s ------------------------------------------- @@ -90,13 +100,13 @@ makeTableFile moduleName files maps = concat ++ [" ]"] mkTableEntry (i,m) = " (" ++ show i ++ ", " ++ makeSBE m ++ " )" blockSizeText = ["blockBitSize :: Int", "blockBitSize = " ++ show blockBitSize] - + makeSBE :: Map.Map Word8 Char -> String makeSBE m = unlines [ "SingleByteCP {" , " decoderArray = " ++ mkConvArray es - , " , encoderArray = " ++ mkCompactArray (swapMap m) + , " , encoderArray = " ++ mkCompactArray (swapMap m) , " }" ] where @@ -167,7 +177,7 @@ mkCompactArray m = unlines [ type CompressState b = (Map.Map Int [b], Map.Map [b] Int) -- each entry in the list corresponds to a block of size n. -compress :: (Bounded a, Enum a, Ord a, Bounded b, Ord b) => Int -> Map.Map a b +compress :: (Bounded a, Enum a, Ord a, Bounded b, Ord b) => Int -> Map.Map a b -> ([Int], CompressState b) compress n ms = runState (mapM lookupOrAdd chunks) (Map.empty, Map.empty) where @@ -196,12 +206,15 @@ languageDirectives = ["{-# LANGUAGE MagicHash #-}"] firstComment :: [FilePath] -> [String] firstComment files = map ("-- " ++) $ [ "Do not edit this file directly!" - , "It was generated by the MakeTable.hs script using the following files:" + , "It was generated by the MakeTable.hs script using the files below." + , "To regenerate it, run \"make\" in ../../../../codepages/" + , "" + , "Files:" ] ++ map takeFileName files theImports :: [String] theImports = map ("import " ++ ) - ["GHC.Prim", "GHC.Base", "GHC.Word", "GHC.Num"] + ["GHC.Prim", "GHC.Base", "GHC.Word"] theTypes :: [String] theTypes = [ "data ConvArray a = ConvArray Addr#" @@ -226,7 +239,7 @@ class (Ord a, Enum a, Bounded a, Show a) => Embed a where instance Embed Word8 where mkHex = showHex' - + instance Embed Word16 where mkHex = repDualByte @@ -241,7 +254,7 @@ showHex' :: Integral a => a -> String showHex' s = "\\x" ++ showHex s "" repDualByte :: Enum c => c -> String -repDualByte c +repDualByte c | n >= 2^(16::Int) = error "value is too high!" -- NOTE : this assumes little-endian architecture. But we're only using this on Windows, -- so it's probably OK.