From: simonm Date: Mon, 2 Feb 1998 17:35:59 +0000 (+0000) Subject: [project @ 1998-02-02 17:27:26 by simonm] X-Git-Tag: Approx_2487_patches~1016 X-Git-Url: http://git.megacz.com/?a=commitdiff_plain;h=28139aea50376444d56f43f0914291348a51a7e7;p=ghc-hetmet.git [project @ 1998-02-02 17:27:26 by simonm] Library re-organisation: All libraries now live under ghc/lib, which has the following structure: ghc/lib/std -- all prelude files (libHS.a) ghc/lib/std/cbits ghc/lib/exts -- standard Hugs/GHC extensions (libHSexts.a) -- available with '-fglasgow-exts' ghc/lib/posix -- POSIX library (libHSposix.a) ghc/lib/posix/cbits -- available with '-syslib posix' ghc/lib/misc -- used to be hslibs/ghc (libHSmisc.a) ghc/lib/misc/cbits -- available with '-syslib misc' ghc/lib/concurrent -- Concurrent libraries (libHSconc.a) -- available with '-concurrent' Also, several non-standard prelude modules had their names changed to begin with 'Prel' to reduce namespace pollution. Addr ==> PrelAddr (Addr interface available in 'exts') ArrBase ==> PrelArr CCall ==> PrelCCall (CCall interface available in 'exts') ConcBase ==> PrelConc GHCerr ==> PrelErr Foreign ==> PrelForeign (Foreign interface available in 'exts') GHC ==> PrelGHC IOHandle ==> PrelHandle IOBase ==> PrelIOBase GHCmain ==> PrelMain STBase ==> PrelST Unsafe ==> PrelUnsafe UnsafeST ==> PrelUnsafeST --- diff --git a/ghc/compiler/basicTypes/IdUtils.lhs b/ghc/compiler/basicTypes/IdUtils.lhs index fa75ed4..7307caa 100644 --- a/ghc/compiler/basicTypes/IdUtils.lhs +++ b/ghc/compiler/basicTypes/IdUtils.lhs @@ -16,7 +16,7 @@ import StdIdInfo import Name ( mkWiredInIdName, Name ) import PrimOp ( primOpInfo, tagOf_PrimOp, primOp_str, PrimOpInfo(..), PrimOpResultInfo(..), PrimOp ) -import PrelMods ( gHC__ ) +import PrelMods ( pREL_GHC ) import Type ( mkForAllTys, mkFunTy, mkFunTys, mkTyVarTy, mkTyConApp ) import TysWiredIn ( boolTy ) import Unique ( mkPrimOpIdUnique ) @@ -57,7 +57,7 @@ primOpName op = name where key = mkPrimOpIdUnique (IBOX(tagOf_PrimOp prim_op)) - name = mkWiredInIdName key gHC__ occ_name the_id + name = mkWiredInIdName key pREL_GHC occ_name the_id the_id = mkPrimitiveId name ty prim_op \end{code} diff --git a/ghc/compiler/basicTypes/Name.lhs b/ghc/compiler/basicTypes/Name.lhs index e01e8c0..9951f98 100644 --- a/ghc/compiler/basicTypes/Name.lhs +++ b/ghc/compiler/basicTypes/Name.lhs @@ -61,7 +61,6 @@ import CStrings ( identToC, modnameToC, cSEP ) import CmdLineOpts ( opt_PprStyle_All, opt_OmitInterfacePragmas, opt_EnsureSplittableC ) import BasicTypes ( Module, IfaceFlavour(..), moduleString, pprModule ) -import PrelMods ( gHC__ ) import Lex ( isLexSym, isLexConId ) import SrcLoc ( noSrcLoc, mkBuiltinSrcLoc, SrcLoc ) import Unique ( pprUnique, showUnique, Unique, Uniquable(..) ) diff --git a/ghc/compiler/prelude/PrelInfo.lhs b/ghc/compiler/prelude/PrelInfo.lhs index 21c2ea2..f4f3cab 100644 --- a/ghc/compiler/prelude/PrelInfo.lhs +++ b/ghc/compiler/prelude/PrelInfo.lhs @@ -338,18 +338,18 @@ to write them all down in one place. prelude_primop op = qual (modAndOcc (primOpName op)) intTyCon_RDR = qual (modAndOcc intTyCon) -ioTyCon_RDR = tcQual (iO_BASE, SLIT("IO")) -ioDataCon_RDR = varQual (iO_BASE, SLIT("IO")) -ioOkDataCon_RDR = varQual (iO_BASE, SLIT("IOok")) +ioTyCon_RDR = tcQual (pREL_IO_BASE, SLIT("IO")) +ioDataCon_RDR = varQual (pREL_IO_BASE, SLIT("IO")) +ioOkDataCon_RDR = varQual (pREL_IO_BASE, SLIT("IOok")) orderingTyCon_RDR = tcQual (pREL_BASE, SLIT("Ordering")) rationalTyCon_RDR = tcQual (pREL_NUM, SLIT("Rational")) ratioTyCon_RDR = tcQual (pREL_NUM, SLIT("Ratio")) ratioDataCon_RDR = varQual (pREL_NUM, SLIT(":%")) -byteArrayTyCon_RDR = tcQual (aRR_BASE, SLIT("ByteArray")) -mutableByteArrayTyCon_RDR = tcQual (aRR_BASE, SLIT("MutableByteArray")) +byteArrayTyCon_RDR = tcQual (pREL_ARR, SLIT("ByteArray")) +mutableByteArrayTyCon_RDR = tcQual (pREL_ARR, SLIT("MutableByteArray")) -allClass_RDR = tcQual (gHC__, SLIT("All")) +allClass_RDR = tcQual (pREL_GHC, SLIT("All")) eqClass_RDR = tcQual (pREL_BASE, SLIT("Eq")) ordClass_RDR = tcQual (pREL_BASE, SLIT("Ord")) evalClass_RDR = tcQual (pREL_BASE, SLIT("Eval")) @@ -369,8 +369,8 @@ realFracClass_RDR = tcQual (pREL_NUM, SLIT("RealFrac")) realFloatClass_RDR = tcQual (pREL_NUM, SLIT("RealFloat")) readClass_RDR = tcQual (pREL_READ, SLIT("Read")) ixClass_RDR = tcQual (iX, SLIT("Ix")) -ccallableClass_RDR = tcQual (gHC__, SLIT("CCallable")) -creturnableClass_RDR = tcQual (gHC__, SLIT("CReturnable")) +ccallableClass_RDR = tcQual (pREL_GHC, SLIT("CCallable")) +creturnableClass_RDR = tcQual (pREL_GHC, SLIT("CReturnable")) fromInt_RDR = varQual (pREL_BASE, SLIT("fromInt")) fromInteger_RDR = varQual (pREL_BASE, SLIT("fromInteger")) @@ -431,8 +431,8 @@ plus_RDR = varQual (pREL_BASE, SLIT("+")) times_RDR = varQual (pREL_BASE, SLIT("*")) mkInt_RDR = varQual (pREL_BASE, SLIT("I#")) -error_RDR = varQual (gHC_ERR, SLIT("error")) -assert_RDR = varQual (gHC_ERR, SLIT("assert__")) +error_RDR = varQual (pREL_ERR, SLIT("error")) +assert_RDR = varQual (pREL_ERR, SLIT("assert__")) eqH_Char_RDR = prelude_primop CharEqOp ltH_Char_RDR = prelude_primop CharLtOp diff --git a/ghc/compiler/prelude/PrelMods.lhs b/ghc/compiler/prelude/PrelMods.lhs index 287a378..9153a0b 100644 --- a/ghc/compiler/prelude/PrelMods.lhs +++ b/ghc/compiler/prelude/PrelMods.lhs @@ -12,13 +12,9 @@ defined here so as to avod \begin{code} module PrelMods ( - gHC__, pRELUDE, pREL_BASE, - pREL_READ , pREL_NUM, pREL_LIST, - pREL_TUP , pACKED_STRING, cONC_BASE, - iO_BASE , mONAD, rATIO, iX, - sT_BASE , aRR_BASE, fOREIGN, mAIN, - gHC_MAIN , gHC_ERR, - cCALL , aDDR + pREL_GHC, pRELUDE, mONAD, rATIO, iX, mAIN, pREL_MAIN, pREL_ERR, + pREL_BASE, pREL_NUM, pREL_LIST, pREL_TUP, pREL_ADDR, pREL_READ, + pREL_PACK, pREL_CONC, pREL_IO_BASE, pREL_ST, pREL_ARR, pREL_FOREIGN ) where #include "HsVersions.h" @@ -27,32 +23,33 @@ import BasicTypes( Module ) \end{code} \begin{code} -gHC__, pRELUDE, pREL_BASE, pREL_NUM, pREL_LIST, pREL_TUP :: Module -pACKED_STRING, cONC_BASE, iO_BASE, mONAD, rATIO, iX :: Module -sT_BASE, aRR_BASE, fOREIGN, mAIN, gHC_MAIN, gHC_ERR :: Module +pREL_GHC, pRELUDE, mONAD, rATIO, iX, mAIN, pREL_MAIN, pREL_ERR :: Module +pREL_BASE, pREL_NUM, pREL_LIST, pREL_TUP, pREL_ADDR, pREL_READ :: Module +pREL_PACK, pREL_CONC, pREL_IO_BASE, pREL_ST, pREL_ARR, pREL_FOREIGN :: Module -gHC__ = SLIT("GHC") -- Primitive types and values pRELUDE = SLIT("Prelude") +pREL_GHC = SLIT("PrelGHC") -- Primitive types and values pREL_BASE = SLIT("PrelBase") pREL_READ = SLIT("PrelRead") pREL_NUM = SLIT("PrelNum") pREL_LIST = SLIT("PrelList") pREL_TUP = SLIT("PrelTup") -pACKED_STRING= SLIT("PackBase") -cONC_BASE = SLIT("ConcBase") -iO_BASE = SLIT("IOBase") +pREL_PACK = SLIT("PrelPack") +pREL_CONC = SLIT("PrelConc") +pREL_IO_BASE = SLIT("PrelIOBase") +pREL_ST = SLIT("PrelST") +pREL_ARR = SLIT("PrelArr") +pREL_FOREIGN = SLIT("PrelForeign") +pREL_CCALL = SLIT("PrelCCall") +pREL_ADDR = SLIT("PrelAddr") +pREL_ERR = SLIT("PrelErr") + mONAD = SLIT("Monad") rATIO = SLIT("Ratio") iX = SLIT("Ix") -sT_BASE = SLIT("STBase") -aRR_BASE = SLIT("ArrBase") -fOREIGN = SLIT("Foreign") -cCALL = SLIT("CCall") -aDDR = SLIT("Addr") +pREL_MAIN = SLIT("PrelMain") mAIN = SLIT("Main") -gHC_MAIN = SLIT("GHCmain") -gHC_ERR = SLIT("GHCerr") \end{code} diff --git a/ghc/compiler/prelude/PrelVals.lhs b/ghc/compiler/prelude/PrelVals.lhs index 513cec4..9f6930b 100644 --- a/ghc/compiler/prelude/PrelVals.lhs +++ b/ghc/compiler/prelude/PrelVals.lhs @@ -80,10 +80,10 @@ pc_bottoming_Id key mod name ty -- these "bottom" out, no matter what their arguments eRROR_ID - = pc_bottoming_Id errorIdKey gHC_ERR SLIT("error") errorTy + = pc_bottoming_Id errorIdKey pREL_ERR SLIT("error") errorTy generic_ERROR_ID u n - = pc_bottoming_Id u gHC_ERR n errorTy + = pc_bottoming_Id u pREL_ERR n errorTy pAT_ERROR_ID = generic_ERROR_ID patErrorIdKey SLIT("patError") @@ -99,11 +99,11 @@ nO_METHOD_BINDING_ERROR_ID = generic_ERROR_ID noMethodBindingErrorIdKey SLIT("noMethodBindingError") aBSENT_ERROR_ID - = pc_bottoming_Id absentErrorIdKey gHC_ERR SLIT("absentErr") + = pc_bottoming_Id absentErrorIdKey pREL_ERR SLIT("absentErr") (mkSigmaTy [openAlphaTyVar] [] openAlphaTy) pAR_ERROR_ID - = pcMiscPrelId parErrorIdKey gHC_ERR SLIT("parError") + = pcMiscPrelId parErrorIdKey pREL_ERR SLIT("parError") (mkSigmaTy [openAlphaTyVar] [] openAlphaTy) noIdInfo openAlphaTy = mkTyVarTy openAlphaTyVar @@ -121,7 +121,7 @@ decide that the second argument is strict, evaluate that first (!!), and make a jolly old mess. \begin{code} tRACE_ID - = pcMiscPrelId traceIdKey iO_BASE SLIT("trace") traceTy + = pcMiscPrelId traceIdKey pREL_IO_BASE SLIT("trace") traceTy (noIdInfo `addSpecInfo` pcGenerateSpecs traceIdKey tRACE_ID noIdInfo traceTy) where traceTy = mkSigmaTy [alphaTyVar] [] (mkFunTys [mkListTy charTy, alphaTy] alphaTy) @@ -135,33 +135,33 @@ tRACE_ID \begin{code} packStringForCId - = pcMiscPrelId packCStringIdKey{-ToDo:rename-} pACKED_STRING SLIT("packCString#") + = pcMiscPrelId packCStringIdKey{-ToDo:rename-} pREL_PACK SLIT("packCString#") (mkFunTys [stringTy] byteArrayPrimTy) noIdInfo -------------------------------------------------------------------- unpackCStringId - = pcMiscPrelId unpackCStringIdKey pACKED_STRING SLIT("unpackCString#") + = pcMiscPrelId unpackCStringIdKey pREL_PACK SLIT("unpackCString#") (mkFunTys [addrPrimTy{-a char *-}] stringTy) noIdInfo -- Andy says: -- (FunTy addrPrimTy{-a char *-} stringTy) (noIdInfo `addInfo` exactArity 1) -- but I don't like wired-in IdInfos (WDP) unpackCString2Id -- for cases when a string has a NUL in it - = pcMiscPrelId unpackCString2IdKey pACKED_STRING SLIT("unpackNBytes#") + = pcMiscPrelId unpackCString2IdKey pREL_PACK SLIT("unpackNBytes#") (mkFunTys [addrPrimTy{-a char *-}, intPrimTy{-length-}] stringTy) noIdInfo -------------------------------------------------------------------- unpackCStringAppendId - = pcMiscPrelId unpackCStringAppendIdKey pACKED_STRING SLIT("unpackAppendCString#") + = pcMiscPrelId unpackCStringAppendIdKey pREL_PACK SLIT("unpackAppendCString#") (mkFunTys [addrPrimTy{-a "char *" pointer-},stringTy] stringTy) ((noIdInfo {-LATER:`addUnfoldInfo` mkMagicUnfolding unpackCStringAppendIdKey-}) `addArityInfo` exactArity 2) unpackCStringFoldrId - = pcMiscPrelId unpackCStringFoldrIdKey pACKED_STRING SLIT("unpackFoldrCString#") + = pcMiscPrelId unpackCStringFoldrIdKey pREL_PACK SLIT("unpackFoldrCString#") (mkSigmaTy [alphaTyVar] [] (mkFunTys [addrPrimTy{-a "char *" pointer-}, mkFunTys [charTy, alphaTy] alphaTy, @@ -481,7 +481,7 @@ noFollowId = pcMiscPrelId noFollowIdKey cONC_BASE SLIT("noFollow") nasty as-is, change it back to a literal (@Literal@). \begin{code} realWorldPrimId - = pcMiscPrelId realWorldPrimIdKey gHC__ SLIT("realWorld#") + = pcMiscPrelId realWorldPrimIdKey pREL_GHC SLIT("realWorld#") realWorldStatePrimTy noIdInfo \end{code} @@ -498,7 +498,7 @@ voidId = pc_bottoming_Id voidIdKey pREL_BASE SLIT("void") voidTy \begin{code} buildId - = pcMiscPrelId buildIdKey gHC_ERR SLIT("build") buildTy + = pcMiscPrelId buildIdKey pREL_ERR SLIT("build") buildTy ((((noIdInfo {-LATER:`addUnfoldInfo` mkMagicUnfolding buildIdKey-}) `addStrictnessInfo` mkStrictnessInfo [WwStrict] False) @@ -543,7 +543,7 @@ mkBuild ty tv c n g expr \begin{code} augmentId - = pcMiscPrelId augmentIdKey gHC_ERR SLIT("augment") augmentTy + = pcMiscPrelId augmentIdKey pREL_ERR SLIT("augment") augmentTy (((noIdInfo {-LATER:`addUnfoldInfo` mkMagicUnfolding augmentIdKey-}) `addStrictnessInfo` mkStrictnessInfo [WwStrict,WwLazy False] False) diff --git a/ghc/compiler/prelude/PrimOp.lhs b/ghc/compiler/prelude/PrimOp.lhs index 84af9e0..60050db 100644 --- a/ghc/compiler/prelude/PrimOp.lhs +++ b/ghc/compiler/prelude/PrimOp.lhs @@ -1822,8 +1822,8 @@ pprPrimOp other_op = getPprStyle $ \ sty -> if codeStyle sty then -- For C just print the primop itself identToC str - else if ifaceStyle sty then -- For interfaces Print it qualified with GHC. - ptext SLIT("GHC.") <> ptext str + else if ifaceStyle sty then -- For interfaces Print it qualified with PrelGHC. + ptext SLIT("PrelGHC.") <> ptext str else -- Unqualified is good enough ptext str where diff --git a/ghc/compiler/prelude/TysPrim.lhs b/ghc/compiler/prelude/TysPrim.lhs index 660b2a5..37ad832 100644 --- a/ghc/compiler/prelude/TysPrim.lhs +++ b/ghc/compiler/prelude/TysPrim.lhs @@ -18,7 +18,7 @@ import TyCon ( mkPrimTyCon, mkDataTyCon, TyCon ) import BasicTypes ( NewOrData(..), RecFlag(..) ) import Type ( mkTyConApp, mkTyConTy, mkTyVarTys, Type ) import TyVar ( GenTyVar(..), alphaTyVars ) -import PrelMods ( gHC__ ) +import PrelMods ( pREL_GHC ) import Unique \end{code} @@ -40,7 +40,7 @@ pcPrimTyCon :: Unique{-TyConKey-} -> FAST_STRING -> Int -> PrimRep -> TyCon pcPrimTyCon key str arity primrep = the_tycon where - name = mkWiredInTyConName key gHC__ str the_tycon + name = mkWiredInTyConName key pREL_GHC str the_tycon the_tycon = mkPrimTyCon name arity primrep @@ -132,7 +132,7 @@ voidTyCon = mk_no_constr_tycon voidTyConKey SLIT("Void") mk_no_constr_tycon key str = the_tycon where - name = mkWiredInTyConName key gHC__ str the_tycon + name = mkWiredInTyConName key pREL_GHC str the_tycon the_tycon = mkDataTyCon name mkBoxedTypeKind [] -- No tyvars [] -- No context diff --git a/ghc/compiler/prelude/TysWiredIn.lhs b/ghc/compiler/prelude/TysWiredIn.lhs index 2f78305..7f1d624 100644 --- a/ghc/compiler/prelude/TysWiredIn.lhs +++ b/ghc/compiler/prelude/TysWiredIn.lhs @@ -235,15 +235,15 @@ min_int = toInteger minInt \begin{code} wordTy = mkTyConTy wordTyCon -wordTyCon = pcNonRecDataTyCon wordTyConKey fOREIGN SLIT("Word") [] [wordDataCon] -wordDataCon = pcDataCon wordDataConKey fOREIGN SLIT("W#") [] [] [wordPrimTy] wordTyCon +wordTyCon = pcNonRecDataTyCon wordTyConKey pREL_FOREIGN SLIT("Word") [] [wordDataCon] +wordDataCon = pcDataCon wordDataConKey pREL_FOREIGN SLIT("W#") [] [] [wordPrimTy] wordTyCon \end{code} \begin{code} addrTy = mkTyConTy addrTyCon -addrTyCon = pcNonRecDataTyCon addrTyConKey aDDR SLIT("Addr") [] [addrDataCon] -addrDataCon = pcDataCon addrDataConKey aDDR SLIT("A#") [] [] [addrPrimTy] addrTyCon +addrTyCon = pcNonRecDataTyCon addrTyConKey pREL_ADDR SLIT("Addr") [] [addrDataCon] +addrDataCon = pcDataCon addrDataConKey pREL_ADDR SLIT("A#") [] [] [addrPrimTy] addrTyCon \end{code} \begin{code} @@ -264,29 +264,29 @@ doubleDataCon = pcDataCon doubleDataConKey pREL_BASE SLIT("D#") [] [] [doublePri mkStateTy ty = mkTyConApp stateTyCon [ty] realWorldStateTy = mkStateTy realWorldTy -- a common use -stateTyCon = pcNonRecDataTyCon stateTyConKey sT_BASE SLIT("State") alpha_tyvar [stateDataCon] +stateTyCon = pcNonRecDataTyCon stateTyConKey pREL_ST SLIT("State") alpha_tyvar [stateDataCon] stateDataCon - = pcDataCon stateDataConKey sT_BASE SLIT("S#") + = pcDataCon stateDataConKey pREL_ST SLIT("S#") alpha_tyvar [] [mkStatePrimTy alphaTy] stateTyCon \end{code} \begin{code} stablePtrTyCon - = pcNonRecDataTyCon stablePtrTyConKey fOREIGN SLIT("StablePtr") + = pcNonRecDataTyCon stablePtrTyConKey pREL_FOREIGN SLIT("StablePtr") alpha_tyvar [stablePtrDataCon] where stablePtrDataCon - = pcDataCon stablePtrDataConKey fOREIGN SLIT("StablePtr") + = pcDataCon stablePtrDataConKey pREL_FOREIGN SLIT("StablePtr") alpha_tyvar [] [mkStablePtrPrimTy alphaTy] stablePtrTyCon \end{code} \begin{code} foreignObjTyCon - = pcNonRecDataTyCon foreignObjTyConKey fOREIGN SLIT("ForeignObj") + = pcNonRecDataTyCon foreignObjTyConKey pREL_FOREIGN SLIT("ForeignObj") [] [foreignObjDataCon] where foreignObjDataCon - = pcDataCon foreignObjDataConKey fOREIGN SLIT("ForeignObj") + = pcDataCon foreignObjDataConKey pREL_FOREIGN SLIT("ForeignObj") [] [] [foreignObjPrimTy] foreignObjTyCon \end{code} @@ -346,118 +346,118 @@ We fish one of these \tr{StateAnd#} things with \begin{code} stateAndPtrPrimTyCon - = pcNonRecDataTyCon stateAndPtrPrimTyConKey sT_BASE SLIT("StateAndPtr#") + = pcNonRecDataTyCon stateAndPtrPrimTyConKey pREL_ST SLIT("StateAndPtr#") alpha_beta_tyvars [stateAndPtrPrimDataCon] stateAndPtrPrimDataCon - = pcDataCon stateAndPtrPrimDataConKey sT_BASE SLIT("StateAndPtr#") + = pcDataCon stateAndPtrPrimDataConKey pREL_ST SLIT("StateAndPtr#") alpha_beta_tyvars [] [mkStatePrimTy alphaTy, betaTy] stateAndPtrPrimTyCon stateAndCharPrimTyCon - = pcNonRecDataTyCon stateAndCharPrimTyConKey sT_BASE SLIT("StateAndChar#") + = pcNonRecDataTyCon stateAndCharPrimTyConKey pREL_ST SLIT("StateAndChar#") alpha_tyvar [stateAndCharPrimDataCon] stateAndCharPrimDataCon - = pcDataCon stateAndCharPrimDataConKey sT_BASE SLIT("StateAndChar#") + = pcDataCon stateAndCharPrimDataConKey pREL_ST SLIT("StateAndChar#") alpha_tyvar [] [mkStatePrimTy alphaTy, charPrimTy] stateAndCharPrimTyCon stateAndIntPrimTyCon - = pcNonRecDataTyCon stateAndIntPrimTyConKey sT_BASE SLIT("StateAndInt#") + = pcNonRecDataTyCon stateAndIntPrimTyConKey pREL_ST SLIT("StateAndInt#") alpha_tyvar [stateAndIntPrimDataCon] stateAndIntPrimDataCon - = pcDataCon stateAndIntPrimDataConKey sT_BASE SLIT("StateAndInt#") + = pcDataCon stateAndIntPrimDataConKey pREL_ST SLIT("StateAndInt#") alpha_tyvar [] [mkStatePrimTy alphaTy, intPrimTy] stateAndIntPrimTyCon stateAndWordPrimTyCon - = pcNonRecDataTyCon stateAndWordPrimTyConKey sT_BASE SLIT("StateAndWord#") + = pcNonRecDataTyCon stateAndWordPrimTyConKey pREL_ST SLIT("StateAndWord#") alpha_tyvar [stateAndWordPrimDataCon] stateAndWordPrimDataCon - = pcDataCon stateAndWordPrimDataConKey sT_BASE SLIT("StateAndWord#") + = pcDataCon stateAndWordPrimDataConKey pREL_ST SLIT("StateAndWord#") alpha_tyvar [] [mkStatePrimTy alphaTy, wordPrimTy] stateAndWordPrimTyCon stateAndAddrPrimTyCon - = pcNonRecDataTyCon stateAndAddrPrimTyConKey sT_BASE SLIT("StateAndAddr#") + = pcNonRecDataTyCon stateAndAddrPrimTyConKey pREL_ST SLIT("StateAndAddr#") alpha_tyvar [stateAndAddrPrimDataCon] stateAndAddrPrimDataCon - = pcDataCon stateAndAddrPrimDataConKey sT_BASE SLIT("StateAndAddr#") + = pcDataCon stateAndAddrPrimDataConKey pREL_ST SLIT("StateAndAddr#") alpha_tyvar [] [mkStatePrimTy alphaTy, addrPrimTy] stateAndAddrPrimTyCon stateAndStablePtrPrimTyCon - = pcNonRecDataTyCon stateAndStablePtrPrimTyConKey fOREIGN SLIT("StateAndStablePtr#") + = pcNonRecDataTyCon stateAndStablePtrPrimTyConKey pREL_FOREIGN SLIT("StateAndStablePtr#") alpha_beta_tyvars [stateAndStablePtrPrimDataCon] stateAndStablePtrPrimDataCon - = pcDataCon stateAndStablePtrPrimDataConKey fOREIGN SLIT("StateAndStablePtr#") + = pcDataCon stateAndStablePtrPrimDataConKey pREL_FOREIGN SLIT("StateAndStablePtr#") alpha_beta_tyvars [] [mkStatePrimTy alphaTy, mkTyConApp stablePtrPrimTyCon [betaTy]] stateAndStablePtrPrimTyCon stateAndForeignObjPrimTyCon - = pcNonRecDataTyCon stateAndForeignObjPrimTyConKey fOREIGN SLIT("StateAndForeignObj#") + = pcNonRecDataTyCon stateAndForeignObjPrimTyConKey pREL_FOREIGN SLIT("StateAndForeignObj#") alpha_tyvar [stateAndForeignObjPrimDataCon] stateAndForeignObjPrimDataCon - = pcDataCon stateAndForeignObjPrimDataConKey fOREIGN SLIT("StateAndForeignObj#") + = pcDataCon stateAndForeignObjPrimDataConKey pREL_FOREIGN SLIT("StateAndForeignObj#") alpha_tyvar [] [mkStatePrimTy alphaTy, mkTyConTy foreignObjPrimTyCon] stateAndForeignObjPrimTyCon stateAndFloatPrimTyCon - = pcNonRecDataTyCon stateAndFloatPrimTyConKey sT_BASE SLIT("StateAndFloat#") + = pcNonRecDataTyCon stateAndFloatPrimTyConKey pREL_ST SLIT("StateAndFloat#") alpha_tyvar [stateAndFloatPrimDataCon] stateAndFloatPrimDataCon - = pcDataCon stateAndFloatPrimDataConKey sT_BASE SLIT("StateAndFloat#") + = pcDataCon stateAndFloatPrimDataConKey pREL_ST SLIT("StateAndFloat#") alpha_tyvar [] [mkStatePrimTy alphaTy, floatPrimTy] stateAndFloatPrimTyCon stateAndDoublePrimTyCon - = pcNonRecDataTyCon stateAndDoublePrimTyConKey sT_BASE SLIT("StateAndDouble#") + = pcNonRecDataTyCon stateAndDoublePrimTyConKey pREL_ST SLIT("StateAndDouble#") alpha_tyvar [stateAndDoublePrimDataCon] stateAndDoublePrimDataCon - = pcDataCon stateAndDoublePrimDataConKey sT_BASE SLIT("StateAndDouble#") + = pcDataCon stateAndDoublePrimDataConKey pREL_ST SLIT("StateAndDouble#") alpha_tyvar [] [mkStatePrimTy alphaTy, doublePrimTy] stateAndDoublePrimTyCon \end{code} \begin{code} stateAndArrayPrimTyCon - = pcNonRecDataTyCon stateAndArrayPrimTyConKey aRR_BASE SLIT("StateAndArray#") + = pcNonRecDataTyCon stateAndArrayPrimTyConKey pREL_ARR SLIT("StateAndArray#") alpha_beta_tyvars [stateAndArrayPrimDataCon] stateAndArrayPrimDataCon - = pcDataCon stateAndArrayPrimDataConKey aRR_BASE SLIT("StateAndArray#") + = pcDataCon stateAndArrayPrimDataConKey pREL_ARR SLIT("StateAndArray#") alpha_beta_tyvars [] [mkStatePrimTy alphaTy, mkArrayPrimTy betaTy] stateAndArrayPrimTyCon stateAndMutableArrayPrimTyCon - = pcNonRecDataTyCon stateAndMutableArrayPrimTyConKey aRR_BASE SLIT("StateAndMutableArray#") + = pcNonRecDataTyCon stateAndMutableArrayPrimTyConKey pREL_ARR SLIT("StateAndMutableArray#") alpha_beta_tyvars [stateAndMutableArrayPrimDataCon] stateAndMutableArrayPrimDataCon - = pcDataCon stateAndMutableArrayPrimDataConKey aRR_BASE SLIT("StateAndMutableArray#") + = pcDataCon stateAndMutableArrayPrimDataConKey pREL_ARR SLIT("StateAndMutableArray#") alpha_beta_tyvars [] [mkStatePrimTy alphaTy, mkMutableArrayPrimTy alphaTy betaTy] stateAndMutableArrayPrimTyCon stateAndByteArrayPrimTyCon - = pcNonRecDataTyCon stateAndByteArrayPrimTyConKey aRR_BASE SLIT("StateAndByteArray#") + = pcNonRecDataTyCon stateAndByteArrayPrimTyConKey pREL_ARR SLIT("StateAndByteArray#") alpha_tyvar [stateAndByteArrayPrimDataCon] stateAndByteArrayPrimDataCon - = pcDataCon stateAndByteArrayPrimDataConKey aRR_BASE SLIT("StateAndByteArray#") + = pcDataCon stateAndByteArrayPrimDataConKey pREL_ARR SLIT("StateAndByteArray#") alpha_tyvar [] [mkStatePrimTy alphaTy, byteArrayPrimTy] stateAndByteArrayPrimTyCon stateAndMutableByteArrayPrimTyCon - = pcNonRecDataTyCon stateAndMutableByteArrayPrimTyConKey aRR_BASE SLIT("StateAndMutableByteArray#") + = pcNonRecDataTyCon stateAndMutableByteArrayPrimTyConKey pREL_ARR SLIT("StateAndMutableByteArray#") alpha_tyvar [stateAndMutableByteArrayPrimDataCon] stateAndMutableByteArrayPrimDataCon - = pcDataCon stateAndMutableByteArrayPrimDataConKey aRR_BASE SLIT("StateAndMutableByteArray#") + = pcDataCon stateAndMutableByteArrayPrimDataConKey pREL_ARR SLIT("StateAndMutableByteArray#") alpha_tyvar [] [mkStatePrimTy alphaTy, mkTyConApp mutableByteArrayPrimTyCon alpha_ty] stateAndMutableByteArrayPrimTyCon stateAndSynchVarPrimTyCon - = pcNonRecDataTyCon stateAndSynchVarPrimTyConKey cONC_BASE SLIT("StateAndSynchVar#") + = pcNonRecDataTyCon stateAndSynchVarPrimTyConKey pREL_CONC SLIT("StateAndSynchVar#") alpha_beta_tyvars [stateAndSynchVarPrimDataCon] stateAndSynchVarPrimDataCon - = pcDataCon stateAndSynchVarPrimDataConKey cONC_BASE SLIT("StateAndSynchVar#") + = pcDataCon stateAndSynchVarPrimDataConKey pREL_CONC SLIT("StateAndSynchVar#") alpha_beta_tyvars [] [mkStatePrimTy alphaTy, mkSynchVarPrimTy alphaTy betaTy] stateAndSynchVarPrimTyCon \end{code} @@ -512,9 +512,9 @@ type of runST. \begin{code} mkStateTransformerTy s a = mkTyConApp stTyCon [s, a] -stTyCon = pcNonRecNewTyCon stTyConKey sT_BASE SLIT("ST") alpha_beta_tyvars [stDataCon] +stTyCon = pcNonRecNewTyCon stTyConKey pREL_ST SLIT("ST") alpha_beta_tyvars [stDataCon] -stDataCon = pcDataCon stDataConKey sT_BASE SLIT("ST") +stDataCon = pcDataCon stDataConKey pREL_ST SLIT("ST") alpha_beta_tyvars [] [ty] stTyCon where ty = mkFunTy (mkStatePrimTy alphaTy) (mkSTretTy alphaTy betaTy) @@ -522,10 +522,10 @@ stDataCon = pcDataCon stDataConKey sT_BASE SLIT("ST") mkSTretTy alpha beta = mkTyConApp stRetTyCon [alpha,beta] stRetTyCon - = pcNonRecDataTyCon stRetTyConKey sT_BASE SLIT("STret") + = pcNonRecDataTyCon stRetTyConKey pREL_ST SLIT("STret") alpha_beta_tyvars [stRetDataCon] stRetDataCon - = pcDataCon stRetDataConKey sT_BASE SLIT("STret") + = pcDataCon stRetDataConKey pREL_ST SLIT("STret") alpha_beta_tyvars [] [mkStatePrimTy alphaTy, betaTy] stRetTyCon \end{code} diff --git a/ghc/compiler/typecheck/TcInstDcls.lhs b/ghc/compiler/typecheck/TcInstDcls.lhs index 8406ff6..900d97f 100644 --- a/ghc/compiler/typecheck/TcInstDcls.lhs +++ b/ghc/compiler/typecheck/TcInstDcls.lhs @@ -780,7 +780,7 @@ nonBoxedPrimCCallErr clas inst_ty ppr inst_ty]) omittedMethodWarn sel_id clas - = sep [ptext SLIT("Warning: no explicit method nor default method for") <+> quotes (ppr sel_id), + = sep [ptext SLIT("No explicit method nor default method for") <+> quotes (ppr sel_id), ptext SLIT("in an instance declaration for") <+> quotes (ppr clas)] {- diff --git a/ghc/compiler/typecheck/TcModule.lhs b/ghc/compiler/typecheck/TcModule.lhs index 1855672..d216314 100644 --- a/ghc/compiler/typecheck/TcModule.lhs +++ b/ghc/compiler/typecheck/TcModule.lhs @@ -52,7 +52,7 @@ import Class ( Class, classSelIds, classTyCon ) import Type ( mkTyConApp, mkSynTy, Type ) import TyVar ( emptyTyVarEnv ) import TysWiredIn ( unitTy ) -import PrelMods ( gHC_MAIN, mAIN ) +import PrelMods ( pREL_MAIN, mAIN ) import PrelInfo ( main_NAME, ioTyCon_NAME ) import Unify ( unifyTauTy ) import UniqFM ( lookupUFM_Directly, lookupWithDefaultUFM_Directly, diff --git a/ghc/compiler/types/TyCon.lhs b/ghc/compiler/types/TyCon.lhs index 530af85..a2048a5 100644 --- a/ghc/compiler/types/TyCon.lhs +++ b/ghc/compiler/types/TyCon.lhs @@ -53,7 +53,7 @@ import Maybes import Name ( Name, nameUnique, mkWiredInTyConName, NamedThing(getName) ) import Unique ( Unique, funTyConKey, Uniquable(..) ) import PrimRep ( PrimRep(..), isFollowableRep ) -import PrelMods ( gHC__, pREL_TUP, pREL_BASE ) +import PrelMods ( pREL_GHC, pREL_TUP, pREL_BASE ) import Lex ( mkTupNameStr ) import SrcLoc ( SrcLoc, mkBuiltinSrcLoc ) import Util ( nOfThem, isIn ) @@ -122,7 +122,7 @@ data TyCon \begin{code} mkFunTyCon = FunTyCon -mkFunTyConName = mkWiredInTyConName funTyConKey gHC__ SLIT("->") FunTyCon +mkFunTyConName = mkWiredInTyConName funTyConKey pREL_GHC SLIT("->") FunTyCon mkSpecTyCon = SpecTyCon mkTupleTyCon = TupleTyCon diff --git a/ghc/driver/ghc.lprl b/ghc/driver/ghc.lprl index 92b6eee..8ed8c07 100644 --- a/ghc/driver/ghc.lprl +++ b/ghc/driver/ghc.lprl @@ -2786,7 +2786,7 @@ arg: while($_ = $Args[0]) { /^-syslib(.*)/ && do { local($syslib) = &grab_arg_arg(*Args,'-syslib',$1); print STDERR "$Pgm: no such system library (-syslib): $syslib\n", - $Status++ unless $syslib =~ /^(hbc|ghc|posix|contrib)$/; + $Status++ unless $syslib =~ /^(exts|misc|posix)$/; # # The posix library is a `special' in that it relies on diff --git a/ghc/lib/Makefile b/ghc/lib/Makefile index 133fcde..27144d0 100644 --- a/ghc/lib/Makefile +++ b/ghc/lib/Makefile @@ -10,106 +10,8 @@ TOP = .. include $(TOP)/mk/boilerplate.mk -WAYS=$(GhcLibWays) +# posix must be before misc. -ifeq "$(way)" "" -SUBDIRS = cbits -else -SUBDIRS= -endif - -#----------------------------------------------------------------------------- -# Setting the standard variables -# - -LIB_DIRS = ghc required glaExts concurrent - -LIBRARY = libHS$(_way).a -HS_SRCS = $(foreach d, $(LIB_DIRS), $(wildcard $(d)/*.lhs)) -HS_OBJS = $(HS_SRCS:.lhs=.$(way_)o) -LIBOBJS = $(HS_OBJS) -HS_IFACES= $(HS_SRCS:.lhs=.$(way_)hi) ghc/GHC.$(way_)hi - - -#----------------------------------------------------------------------------- -# Setting the GHC compile options - -SRC_HC_OPTS += -recomp -cpp -fglasgow-exts -fvia-C -Rghc-timing $(GhcLibHcOpts) - -# -# Profiling options -WAY_p_HC_OPTS += -GPrelude -WAY_mr_HC_OPTS += -GPrelude - -# -# Object and interface files have suffixes tagged with their ways -# -ifneq "$(way)" "" -SRC_HC_OPTS += -hisuf $(way_)hi -endif - -# per-module flags -ghc/ArrBase_HC_OPTS += -monly-2-regs -glaExts/PackedString_HC_OPTS += -monly-3-regs -required/Directory_HC_OPTS += -monly-3-regs -concurrent/Parallel_HC_OPTS += -fglasgow-exts -required/Time_HC_OPTS += -monly-3-regs -H16m - -# Far too much heap is needed to compile PrelNum with -O at the -# moment, but there you go.. -ghc/PrelNum_HC_OPTS += -H30m -# Note: this option has to go in the Makefile rather than in an -# OPTIONS line in the source file. The reason being that we want -# to override the SRC_HC_OPTS of -O, and anything option coming -# from the Makefile overrides what's in OPTIONS lines. (mumble_HC_OPTS -# does override SRC_HC_OPTS settings) -ghc/Unsafe_HC_OPTS += -Onot - -ghc/PrelBase_HC_OPTS += -H12m -ghc/PrelRead_HC_OPTS += -H12m -ghc/PrelTup_HC_OPTS += -H12m -ghc/ArrBase_HC_OPTS += -H8m -ghc/IOHandle_HC_OPTS += -H12m -required/Time_HC_OPTS += -H8m -required/Complex_HC_OPTS += -H10m -required/IO_HC_OPTS += -H12m -glaExts/Int_HC_OPTS += -H8m -glaExts/Word_HC_OPTS += -H8m - -#----------------------------------------------------------------------------- -# Dependency generation - -SRC_MKDEPENDHS_OPTS += -ighc:required:glaExts:concurrent -I$(GHC_INCLUDE_DIR) - -#----------------------------------------------------------------------------- -# Rules - -ghc/GHC.$(way_)hi : ghc/GHC.hi-boot - cp $< $@ - -boot :: ghc/GHC.hi $(foreach way, $(WAYS), ghc/GHC.$(way)_hi) - -#----------------------------------------------------------------------------- -# Installation; need to install .hi files as well as libraries -# -# The interface files are put inside the $(libdir), since they -# might (potentially) be platform specific.. -# -# Note: we use `override' here to ignore the setting of datadir -# which may have been set on the command-line..naughty, as it -# prevents `datadir' from being used from the command-line. -# This only applies to binary-distributions, though.n - -ifeq "$(BIN_DIST)" "1" -override datadir:=$(libdir)/imports -else -datadir:=$(libdir)/imports -endif - -# -# Files to install from here -# -INSTALL_LIBS += $(LIBRARY) -INSTALL_DATAS += $(HS_IFACES) +SUBDIRS = std exts posix misc concurrent include $(TOP)/mk/target.mk diff --git a/ghc/lib/concurrent/Channel.lhs b/ghc/lib/concurrent/Channel.lhs index e21bca1..6ae4ac8 100644 --- a/ghc/lib/concurrent/Channel.lhs +++ b/ghc/lib/concurrent/Channel.lhs @@ -27,9 +27,9 @@ module Channel ) where import Prelude -import ConcBase -import STBase -import Unsafe ( unsafeInterleaveIO ) +import PrelConc +import PrelST +import PrelUnsafe ( unsafeInterleaveIO ) \end{code} A channel is represented by two @MVar@s keeping track of the two ends diff --git a/ghc/lib/concurrent/ChannelVar.lhs b/ghc/lib/concurrent/ChannelVar.lhs index a60b830..5a7db0e 100644 --- a/ghc/lib/concurrent/ChannelVar.lhs +++ b/ghc/lib/concurrent/ChannelVar.lhs @@ -19,7 +19,7 @@ module ChannelVar ) where import Prelude -import ConcBase +import PrelConc \end{code} @MVars@ provide the basic mechanisms for synchronising access to a shared diff --git a/ghc/lib/concurrent/Concurrent.lhs b/ghc/lib/concurrent/Concurrent.lhs index c715a1e..b0169c2 100644 --- a/ghc/lib/concurrent/Concurrent.lhs +++ b/ghc/lib/concurrent/Concurrent.lhs @@ -19,7 +19,7 @@ module Concurrent ( module Semaphore, module Merge, module SampleVar, - module ConcBase + module PrelConc ) where import Parallel @@ -28,5 +28,5 @@ import Channel import Semaphore import Merge import SampleVar -import ConcBase +import PrelConc \end{code} diff --git a/ghc/lib/concurrent/Makefile b/ghc/lib/concurrent/Makefile new file mode 100644 index 0000000..5b3c39e --- /dev/null +++ b/ghc/lib/concurrent/Makefile @@ -0,0 +1,62 @@ +# +# Makefile for hslibs subdir +# +TOP = ../.. +include $(TOP)/mk/boilerplate.mk + +WAYS=$(GhcLibWays) + +#----------------------------------------------------------------------------- +# Setting the standard variables +# + +LIBRARY = libHSconc$(_way).a +HS_SRCS = $(wildcard *.lhs) +HS_OBJS = $(HS_SRCS:.lhs=.$(way_)o) +LIBOBJS = $(HS_OBJS) +HS_IFACES= $(HS_SRCS:.lhs=.$(way_)hi) GHC.$(way_)hi + + +#----------------------------------------------------------------------------- +# Setting the GHC compile options + +SRC_HC_OPTS += -recomp -cpp -fglasgow-exts -fvia-C -Rghc-timing $(GhcLibHcOpts) + +# +# Profiling options +WAY_p_HC_OPTS += -GPrelude +WAY_mr_HC_OPTS += -GPrelude + +# +# Object and interface files have suffixes tagged with their ways +# +ifneq "$(way)" "" +SRC_HC_OPTS += -hisuf $(way_)hi +endif + +Parallel_HC_OPTS += -fglasgow-exts + +#----------------------------------------------------------------------------- +# Dependency generation + +SRC_MKDEPENDHS_OPTS += -I$(GHC_INCLUDE_DIR) + +#----------------------------------------------------------------------------- +# Installation; need to install .hi files as well as libraries +# +# The interface files are put inside the $(libdir), since they +# might (potentially) be platform specific.. +# +# override is used here because for binary distributions, datadir is +# set on the command line. sigh. +# +override datadir:=$(libdir)/imports/concurrent + +# +# Files to install from here +# +INSTALL_LIBS += $(LIBRARY) +INSTALL_DATAS += $(HS_IFACES) + +include $(TOP)/mk/target.mk + diff --git a/ghc/lib/concurrent/Merge.lhs b/ghc/lib/concurrent/Merge.lhs index a108a24..a036a05 100644 --- a/ghc/lib/concurrent/Merge.lhs +++ b/ghc/lib/concurrent/Merge.lhs @@ -15,9 +15,9 @@ module Merge ) where import Semaphore -import ConcBase -import Unsafe ( unsafeInterleaveIO ) -import IOBase +import PrelConc +import PrelUnsafe ( unsafeInterleaveIO ) +import PrelIOBase max_buff_size = 1 diff --git a/ghc/lib/concurrent/Parallel.lhs b/ghc/lib/concurrent/Parallel.lhs index 4a6eacb..2089219 100644 --- a/ghc/lib/concurrent/Parallel.lhs +++ b/ghc/lib/concurrent/Parallel.lhs @@ -10,12 +10,12 @@ module Parallel (par, seq -- re-exported #endif ) where -import ConcBase ( par ) +import PrelConc ( par ) #if defined(__GRANSIM__) import PrelBase -import GHCerr ( parError ) -import GHC ( parGlobal#, parLocal#, parAt#, parAtAbs#, parAtRel#, parAtForNow# ) +import PrelErr ( parError ) +import PrelGHC ( parGlobal#, parLocal#, parAt#, parAtAbs#, parAtRel#, parAtForNow# ) {-# INLINE parGlobal #-} {-# INLINE parLocal #-} diff --git a/ghc/lib/concurrent/SampleVar.lhs b/ghc/lib/concurrent/SampleVar.lhs index 7f1061b..53199a6 100644 --- a/ghc/lib/concurrent/SampleVar.lhs +++ b/ghc/lib/concurrent/SampleVar.lhs @@ -29,7 +29,7 @@ module SampleVar ) where -import ConcBase +import PrelConc type SampleVar a diff --git a/ghc/lib/concurrent/Semaphore.lhs b/ghc/lib/concurrent/Semaphore.lhs index 363c936..9fc8a0b 100644 --- a/ghc/lib/concurrent/Semaphore.lhs +++ b/ghc/lib/concurrent/Semaphore.lhs @@ -23,7 +23,7 @@ module Semaphore ) where -import ConcBase +import PrelConc \end{code} General semaphores are also implemented readily in terms of shared diff --git a/ghc/lib/exts/Addr.lhs b/ghc/lib/exts/Addr.lhs new file mode 100644 index 0000000..28822b6 --- /dev/null +++ b/ghc/lib/exts/Addr.lhs @@ -0,0 +1,11 @@ +% +% (c) The AQUA Project, Glasgow University, 1994-1996 +% + +\section[Addr]{Module @Addr@} + +\begin{code} +module Addr ( module PrelAddr ) where + +import PrelAddr +\end{code} diff --git a/ghc/lib/glaExts/Bits.lhs b/ghc/lib/exts/Bits.lhs similarity index 100% rename from ghc/lib/glaExts/Bits.lhs rename to ghc/lib/exts/Bits.lhs diff --git a/ghc/lib/glaExts/ByteArray.lhs b/ghc/lib/exts/ByteArray.lhs similarity index 98% rename from ghc/lib/glaExts/ByteArray.lhs rename to ghc/lib/exts/ByteArray.lhs index d6326dc..d74c728 100644 --- a/ghc/lib/glaExts/ByteArray.lhs +++ b/ghc/lib/exts/ByteArray.lhs @@ -31,7 +31,7 @@ module ByteArray ) where -import ArrBase +import PrelArr import Ix import Foreign (Word) import Addr diff --git a/ghc/lib/exts/CCall.lhs b/ghc/lib/exts/CCall.lhs new file mode 100644 index 0000000..3eb0e68 --- /dev/null +++ b/ghc/lib/exts/CCall.lhs @@ -0,0 +1,11 @@ +% +% (c) The AQUA Project, Glasgow University, 1994-1996 +% + +\section[CCall]{Module @CCall@} + +\begin{code} +module CCall ( module PrelCCall ) where + +import PrelCCall +\end{code} diff --git a/ghc/lib/exts/Foreign.lhs b/ghc/lib/exts/Foreign.lhs new file mode 100644 index 0000000..5f38db6 --- /dev/null +++ b/ghc/lib/exts/Foreign.lhs @@ -0,0 +1,11 @@ +% +% (c) The AQUA Project, Glasgow University, 1994-1996 +% + +\section[Foreign]{Module @Foreign@} + +\begin{code} +module Foreign ( module PrelForeign ) where + +import PrelForeign +\end{code} diff --git a/ghc/lib/glaExts/GlaExts.lhs b/ghc/lib/exts/GlaExts.lhs similarity index 94% rename from ghc/lib/glaExts/GlaExts.lhs rename to ghc/lib/exts/GlaExts.lhs index 98d48bd..b00e591 100644 --- a/ghc/lib/glaExts/GlaExts.lhs +++ b/ghc/lib/exts/GlaExts.lhs @@ -54,19 +54,19 @@ module GlaExts trace, Lift(..), - -- and finally, all the unboxed primops of GHC! - module GHC + -- and finally, all the unboxed primops of PrelGHC! + module PrelGHC ) where -import GHC -import STBase -import IOExts +import PrelGHC import PrelBase +import PrelST +import IOExts +import PrelIOBase import ByteArray import MutableArray import Monad -import IOBase import Foreign type PrimIO a = IO a diff --git a/ghc/lib/glaExts/IOExts.lhs b/ghc/lib/exts/IOExts.lhs similarity index 95% rename from ghc/lib/glaExts/IOExts.lhs rename to ghc/lib/exts/IOExts.lhs index 9f108b1..51ef0fc 100644 --- a/ghc/lib/glaExts/IOExts.lhs +++ b/ghc/lib/exts/IOExts.lhs @@ -35,11 +35,11 @@ module IOExts \begin{code} import PrelBase -import IOBase -import STBase -import Unsafe -import GHC -import ArrBase +import PrelIOBase +import PrelST +import PrelUnsafe +import PrelArr +import PrelGHC import Ix reallyUnsafePtrEq a b = diff --git a/ghc/lib/glaExts/Int.lhs b/ghc/lib/exts/Int.lhs similarity index 99% rename from ghc/lib/glaExts/Int.lhs rename to ghc/lib/exts/Int.lhs index 5fa62e6..78c694f 100644 --- a/ghc/lib/glaExts/Int.lhs +++ b/ghc/lib/exts/Int.lhs @@ -7,8 +7,6 @@ This code is largely copied from the Hugs library of the same name. \begin{code} -{-# OPTIONS -fno-implicit-prelude #-} - ----------------------------------------------------------------------------- -- Signed Integers -- Suitable for use with Hugs 1.4 on 32 bit systems. @@ -33,9 +31,8 @@ import PrelBase import PrelNum import PrelRead import Ix -import GHCerr ( error ) import Bits -import GHC +import PrelGHC import CCall ----------------------------------------------------------------------------- diff --git a/ghc/lib/glaExts/LazyST.lhs b/ghc/lib/exts/LazyST.lhs similarity index 84% rename from ghc/lib/glaExts/LazyST.lhs rename to ghc/lib/exts/LazyST.lhs index d18b716..51fb06a 100644 --- a/ghc/lib/glaExts/LazyST.lhs +++ b/ghc/lib/exts/LazyST.lhs @@ -30,15 +30,14 @@ module LazyST ( ) where import qualified ST -import qualified STBase -import ArrBase -import qualified UnsafeST ( unsafeInterleaveST ) +import qualified PrelST +import PrelArr import PrelBase ( Eq(..), Int, Bool, ($), ()(..) ) import Monad import Ix -import GHC +import PrelGHC -newtype ST s a = ST (STBase.State s -> (a,STBase.State s)) +newtype ST s a = ST (PrelST.State s -> (a,PrelST.State s)) instance Monad (ST s) where @@ -55,7 +54,7 @@ instance Monad (ST s) where -- ToDo: un-inline this, it could cause problems... runST :: (All s => ST s a) -> a -runST st = case st of ST st -> let (r,_) = st (STBase.S# realWorld#) in r +runST st = case st of ST st -> let (r,_) = st (PrelST.S# realWorld#) in r \end{code} %********************************************************* @@ -104,17 +103,17 @@ thawSTArray arr = freezeSTArray (STArray arr) = strictToLazyST (freezeArray arr) unsafeFreezeSTArray (STArray arr) = strictToLazyST (unsafeFreezeArray arr) -strictToLazyST :: STBase.ST s a -> ST s a -strictToLazyST (STBase.ST m) = ST $ \s -> +strictToLazyST :: PrelST.ST s a -> ST s a +strictToLazyST (PrelST.ST m) = ST $ \s -> let - STBase.S# s# = s - STBase.STret s2# r = m s# + PrelST.S# s# = s + PrelST.STret s2# r = m s# in - (r, STBase.S# s2#) + (r, PrelST.S# s2#) -lazyToStrictST :: ST s a -> STBase.ST s a -lazyToStrictST (ST m) = STBase.ST $ \s -> - case (m (STBase.S# s)) of (a, STBase.S# s') -> STBase.STret s' a +lazyToStrictST :: ST s a -> PrelST.ST s a +lazyToStrictST (ST m) = PrelST.ST $ \s -> + case (m (PrelST.S# s)) of (a, PrelST.S# s') -> PrelST.STret s' a unsafeInterleaveST :: ST s a -> ST s a unsafeInterleaveST = strictToLazyST . ST.unsafeInterleaveST . lazyToStrictST diff --git a/ghc/lib/exts/Makefile b/ghc/lib/exts/Makefile new file mode 100644 index 0000000..11c33c3 --- /dev/null +++ b/ghc/lib/exts/Makefile @@ -0,0 +1,67 @@ +################################################################################# +# +# ghc/lib/Makefile +# +# Makefile for building the GHC Prelude libraries umpteen ways +# +# +################################################################################# + +TOP = ../.. +include $(TOP)/mk/boilerplate.mk + +WAYS=$(GhcLibWays) + +#----------------------------------------------------------------------------- +# Setting the standard variables +# + +LIBRARY = libHSexts$(_way).a +HS_SRCS = $(wildcard *.lhs) +HS_OBJS = $(HS_SRCS:.lhs=.$(way_)o) +LIBOBJS = $(HS_OBJS) +HS_IFACES= $(HS_SRCS:.lhs=.$(way_)hi) + +#----------------------------------------------------------------------------- +# Setting the GHC compile options + +SRC_HC_OPTS += -recomp -cpp -fglasgow-exts -fvia-C -Rghc-timing $(GhcLibHcOpts) + +# +# Profiling options +WAY_p_HC_OPTS += -GPrelude +WAY_mr_HC_OPTS += -GPrelude + +# +# Object and interface files have suffixes tagged with their ways +# +ifneq "$(way)" "" +SRC_HC_OPTS += -hisuf $(way_)hi +endif + +Int_HC_OPTS += -H8m +Word_HC_OPTS += -H8m + +#----------------------------------------------------------------------------- +# Dependency generation + +SRC_MKDEPENDHS_OPTS += -i../std -I$(GHC_INCLUDE_DIR) + +#----------------------------------------------------------------------------- +# Installation; need to install .hi files as well as libraries +# +# The interface files are put inside the $(libdir), since they +# might (potentially) be platform specific.. +# +# override is used here because for binary distributions, datadir is +# set on the command line. sigh. +# +override datadir:=$(libdir)/imports/exts + +# +# Files to install from here +# +INSTALL_LIBS += $(LIBRARY) +INSTALL_DATAS += $(HS_IFACES) + +include $(TOP)/mk/target.mk diff --git a/ghc/lib/glaExts/MutableArray.lhs b/ghc/lib/exts/MutableArray.lhs similarity index 99% rename from ghc/lib/glaExts/MutableArray.lhs rename to ghc/lib/exts/MutableArray.lhs index eead4b6..7a45059 100644 --- a/ghc/lib/glaExts/MutableArray.lhs +++ b/ghc/lib/exts/MutableArray.lhs @@ -55,7 +55,7 @@ module MutableArray ) where -import ArrBase +import PrelArr import ST import Ix diff --git a/ghc/lib/glaExts/NumExts.lhs b/ghc/lib/exts/NumExts.lhs similarity index 100% rename from ghc/lib/glaExts/NumExts.lhs rename to ghc/lib/exts/NumExts.lhs diff --git a/ghc/lib/glaExts/ST.lhs b/ghc/lib/exts/ST.lhs similarity index 97% rename from ghc/lib/glaExts/ST.lhs rename to ghc/lib/exts/ST.lhs index 1df58dc..2777fe4 100644 --- a/ghc/lib/glaExts/ST.lhs +++ b/ghc/lib/exts/ST.lhs @@ -28,9 +28,9 @@ module ST ( ) where -import ArrBase -import UnsafeST -import STBase +import PrelArr +import PrelUnsafeST +import PrelST import PrelBase ( Eq(..), Int, Bool, ($), ()(..) ) import Monad import Ix diff --git a/ghc/lib/glaExts/Word.lhs b/ghc/lib/exts/Word.lhs similarity index 99% rename from ghc/lib/glaExts/Word.lhs rename to ghc/lib/exts/Word.lhs index 9e4f7dd..4cb94e8 100644 --- a/ghc/lib/glaExts/Word.lhs +++ b/ghc/lib/exts/Word.lhs @@ -8,7 +8,6 @@ interface, types and operations over unsigned, sized quantities. \begin{code} -{-# OPTIONS -fno-implicit-prelude #-} module Word ( Word8 -- all abstract. , Word16 -- instances: Eq, Ord @@ -34,9 +33,8 @@ import PrelBase import PrelNum import PrelRead import Ix -import GHCerr ( error ) import Bits -import GHC +import PrelGHC import CCall ----------------------------------------------------------------------------- diff --git a/ghc/lib/misc/BSD.lhs b/ghc/lib/misc/BSD.lhs new file mode 100644 index 0000000..fae2966 --- /dev/null +++ b/ghc/lib/misc/BSD.lhs @@ -0,0 +1,451 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1995-1997 +% +\section[BSD]{Misc BSD bindings} + +The @BSD@ module defines Haskell bindings to network programming +functionality that is only provided by BSD-style APIs. + +\begin{code} +{-# OPTIONS -#include "cbits/ghcSockets.h" #-} + +module BSD ( + + HostName, + getHostName, -- :: IO HostName + + ServiceName, + getServiceByName, -- :: ServiceName -> IO ServiceEntry + getServicePortNumber, -- :: ServiceName -> IO PortNumber + + ServiceEntry(..), + getServiceEntry, -- :: IO ServiceEntry + setServiceEntry, -- :: Bool -> IO () + endServiceEntry, -- :: IO () + getServiceEntries, -- :: Bool -> IO [ServiceEntry] + + ProtocolName, + ProtocolEntry(..), + getProtocolByName, -- :: ProtocolName -> IO ProtocolEntry + getProtocolByNumber, -- :: ProtocolNumber -> IO ProtcolEntry + + setProtocolEntry, -- :: Bool -> IO () + getProtocolEntry, -- :: IO ProtocolEntry + endProtocolEntry, -- :: IO () + getProtocolEntries, -- :: Bool -> IO [ProtocolEntry] + + PortNumber, + getProtocolNumber, -- :: ProtocolName -> ProtocolNumber + + HostEntry(..), + getHostByName, -- :: HostName -> IO HostEntry + getHostByAddr, -- :: HostAddress -> Family -> IO HostEntry + + setHostEntry, -- :: Bool -> IO () + getHostEntry, -- :: IO HostEntry + endHostEntry, -- :: IO () + getHostEntries, -- :: Bool -> IO [HostEntry] + + NetworkName, + NetworkAddr, + NetworkEntry(..), + getNetworkByName, -- :: NetworkName -> IO NetworkEntry + getNetworkByAddr, -- :: NetworkAddr -> Family -> IO NetworkEntry + setNetworkEntry, -- :: Bool -> IO () + getNetworkEntry, -- :: IO NetworkEntry + endNetworkEntry, -- :: IO () + getNetworkEntries -- :: Bool -> IO [NetworkEntry] + +) where + + +import GlaExts + +import PrelIOBase + +import Foreign -- Addr.. +import PackedString ( byteArrayToPS, unpackPS ) + +import PosixUtil ( strcpy, unvectorize ) +import SocketPrim + +\end{code} + + +%*************************************************************************** +%* * +\subsection[BSD-DBTypes]{Service, Protocol \& Host Database Types} +%* * +%*************************************************************************** + +\begin{code} +type HostName = String +type ProtocolName = String +type ProtocolNumber = Int +type ServiceName = String +type PortNumber = Int + +data ProtocolEntry = + ProtocolEntry + ProtocolName -- Official Name + [ProtocolName] -- aliases + Int -- Protocol Number + +data ServiceEntry = + ServiceEntry + ServiceName -- Official Name + [ServiceName] -- aliases + PortNumber -- Port Number + ProtocolName -- Protocol + +data HostEntry = + HostEntry + HostName -- Official Name + [HostName] -- aliases + Family -- Host Type (currently AF_INET) + [HostAddress] -- Set of Network Addresses + +\end{code} + +%*************************************************************************** +%* * +\subsection[BSD-DBAccess]{Service, Protocol Host Database Access} +%* * +%*************************************************************************** + +Calling @getServiceByName@ for a given service and protocol returns the +systems service entry. This should be used to find the port numbers +for standard protocols such as SMTP and FTP. The remaining three +functions should be used for browsing the service database +sequentially. + +Calling @setServiceEntry@ with \tr{True} indicates that the service +database should be left open between calls to @getServiceEntry@. To +close the database a call to @endServiceEntry@ is required. This +database file is usually stored in the file /etc/services. + +\begin{code} +getServiceByName :: ServiceName -- Service Name + -> ProtocolName -- Protocol Name + -> IO ServiceEntry -- Service Entry +getServiceByName name proto = do + ptr <- _ccall_ getservbyname name proto + if ptr == ``NULL'' + then fail (IOError Nothing NoSuchThing "no such service entry") + else unpackServiceEntry ptr + +getServiceByPort :: PortNumber -> + ProtocolName -> + IO ServiceEntry +getServiceByPort port proto = do + ptr <- _ccall_ getservbyport port proto + if ptr == ``NULL'' + then fail (IOError Nothing NoSuchThing "no such service entry") + else unpackServiceEntry ptr + +getServicePortNumber :: ServiceName -> IO PortNumber +getServicePortNumber name = do + (ServiceEntry _ _ port _) <- getServiceByName name "tcp" + return port + +getServiceEntry :: IO ServiceEntry +getServiceEntry = do + ptr <- _ccall_ getservent + if ptr == ``NULL'' + then fail (IOError Nothing NoSuchThing "no such service entry") + else unpackServiceEntry ptr + +setServiceEntry :: Bool -> IO () +setServiceEntry flg = _ccall_ setservent stayOpen + where stayOpen = if flg then 1 else 0 + +endServiceEntry :: IO () +endServiceEntry = _ccall_ endservent + +getServiceEntries :: Bool -> IO [ServiceEntry] +getServiceEntries stayOpen = do + setServiceEntry stayOpen + getEntries (getServiceEntry) (endServiceEntry) + +\end{code} + +The following relate directly to the corresponding \tr{UNIX} {C} calls for +returning the protocol entries. The protocol entry is represented by +the Haskell type @ProtocolEntry@. + +As for @setServiceEntry@ above, calling @setProtocolEntry@. +determines whether or not the protocol database file, usually +\tr{/etc/protocols}, is to be kept open between calls of +@getProtocolEntry@. Similarly, + +\begin{code} +getProtocolByName :: ProtocolName -> IO ProtocolEntry +getProtocolByNumber :: PortNumber -> IO ProtocolEntry +getProtocolNumber :: ProtocolName -> IO ProtocolNumber + +setProtocolEntry :: Bool -> IO () -- Keep DB Open ? +getProtocolEntry :: IO ProtocolEntry -- Next Protocol Entry from DB +endProtocolEntry :: IO () +getProtocolEntries :: Bool -> IO [ProtocolEntry] +\end{code} + +\begin{code} +--getProtocolByName :: ProtocolName -> IO ProtocolEntry +getProtocolByName name = do + ptr <- _ccall_ getprotobyname name + if (ptr == ``NULL'' ) + then fail (IOError Nothing NoSuchThing "no such protocol entry") + else unpackProtocolEntry ptr + +--getProtocolByNumber :: PortNumber -> IO ProtocolEntry +getProtocolByNumber num = do + ptr <- _ccall_ getprotobynumber num + if ptr == ``NULL'' + then fail (IOError Nothing NoSuchThing "no such protocol entry") + else unpackProtocolEntry ptr + +--getProtocolNumber :: ProtocolName -> IO ProtocolNumber +getProtocolNumber proto = do + (ProtocolEntry _ _ num) <- getProtocolByName proto + return num + +--getProtocolEntry :: IO ProtocolEntry -- Next Protocol Entry from DB +getProtocolEntry = do + ptr <- _ccall_ getprotoent + if ptr == ``NULL'' + then fail (IOError Nothing NoSuchThing "no such protocol entry") + else unpackProtocolEntry ptr + +--setProtocolEntry :: Bool -> IO () -- Keep DB Open ? +setProtocolEntry flg = _ccall_ setprotoent v + where v = if flg then 1 else 0 + +--endProtocolEntry :: IO () +endProtocolEntry = _ccall_ endprotoent + +--getProtocolEntries :: Bool -> IO [ProtocolEntry] +getProtocolEntries stayOpen = do + setProtocolEntry stayOpen + getEntries (getProtocolEntry) (endProtocolEntry) + +\end{code} + +\begin{code} +getHostByName :: HostName -> IO HostEntry +getHostByName name = do + ptr <- _ccall_ gethostbyname name + if ptr == ``NULL'' + then fail (IOError Nothing NoSuchThing "no such host entry") + else unpackHostEntry ptr + +getHostByAddr :: Family -> HostAddress -> IO HostEntry +getHostByAddr family addr = do + ptr <- _casm_ ``struct in_addr addr; + addr.s_addr = htonl(%0); + %r = gethostbyaddr ((char*)&addr, sizeof(struct in_addr), %1);'' + addr + (packFamily family) + if ptr == ``NULL'' + then fail (IOError Nothing NoSuchThing "no such host entry") + else unpackHostEntry ptr + +getHostEntry :: IO HostEntry +getHostEntry = do + ptr <- _ccall_ gethostent + if ptr == ``NULL'' + then fail (IOError Nothing NoSuchThing "unable to retrieve host entry") + else unpackHostEntry ptr + +setHostEntry :: Bool -> IO () +setHostEntry flg = _ccall_ sethostent v + where v = if flg then 1 else 0 + +endHostEntry :: IO () +endHostEntry = _ccall_ endhostent + +getHostEntries :: Bool -> IO [HostEntry] +getHostEntries stayOpen = do + setHostEntry stayOpen + getEntries (getHostEntry) (endHostEntry) + +\end{code} + +%*************************************************************************** +%* * +\subsection[BSD-Network]{Accessing network information} +%* * +%*************************************************************************** + +Same set of access functions as for accessing host,protocol and service +system info, this time for the types of networks supported. + +\begin{code} +type NetworkAddr = Word +type NetworkName = String + +data NetworkEntry = + NetworkEntry + NetworkName -- official name + [NetworkName] -- aliases + Family -- type + NetworkAddr + +getNetworkByName :: NetworkName -> IO NetworkEntry +getNetworkByName name = do + ptr <- _ccall_ getnetbyname name + if ptr == ``NULL'' + then fail (IOError Nothing NoSuchThing "no such network entry") + else unpackNetworkEntry ptr + +getNetworkByAddr :: NetworkAddr -> Family -> IO NetworkEntry +getNetworkByAddr addr family = do + ptr <- _casm_ ``long naddr = htonl(%0); + %r = getnetbyaddr (naddr, (int)%1);'' + addr + (packFamily family) + if ptr == ``NULL'' + then fail (IOError Nothing NoSuchThing "no such network entry") + else unpackNetworkEntry ptr + +getNetworkEntry :: IO NetworkEntry +getNetworkEntry = do + ptr <- _ccall_ getnetent + if ptr == ``NULL'' + then fail (IOError Nothing NoSuchThing "no more network entries") + else unpackNetworkEntry ptr + +setNetworkEntry :: Bool -> IO () +setNetworkEntry flg = _ccall_ setnetent v + where v = if flg then 1 else 0 + +endNetworkEntry :: IO () +endNetworkEntry = _ccall_ endnetent + +getNetworkEntries :: Bool -> IO [NetworkEntry] +getNetworkEntries stayOpen = do + setNetworkEntry stayOpen + getEntries (getNetworkEntry) (endNetworkEntry) + +\end{code} + +%*************************************************************************** +%* * +\subsection[BSD-Misc]{Miscellaneous Functions} +%* * +%*************************************************************************** + +Calling @getHostName@ returns the standard host name for the current +processor, as set at boot time. + +\begin{code} +getHostName :: IO HostName +getHostName = do + ptr <- stToIO (newCharArray (0,256)) + rc <- _casm_ ``%r=gethostname(%0, 256);'' ptr + ba <- stToIO (unsafeFreezeByteArray ptr) + if rc == -1 + then fail (userError "getHostName: unable to determine host name") + else return (unpackPS (byteArrayToPS ba)) +\end{code} + +Helper function used by the exported functions that provides a +Haskellised view of the enumerator functions: + +\begin{code} +getEntries :: IO a -- read + -> IO () -- at end + -> IO [a] +getEntries getOne atEnd = loop + where + loop = + catch (do { v <- getOne; vs <- loop ; return (v:vs) }) + (\ _ -> do { atEnd; return [] } ) +\end{code} + + +\begin{verbatim} + struct servent { + char *s_name; /* official name of service */ + char **s_aliases; /* alias list */ + int s_port; /* port service resides at */ + char *s_proto; /* protocol to use */ + }; + + The members of this structure are: + s_name The official name of the service. + s_aliases A zero terminated list of alternate + names for the service. + s_port The port number at which the ser- + vice resides. Port numbers are + returned in network short byte + order. + s_proto The name of the protocol to use + when contacting the service. +\end{verbatim} + +\begin{code} +unpackServiceEntry :: Addr -> PrimIO ServiceEntry +unpackServiceEntry ptr = do + str <- _casm_ ``%r = ((struct servent*)%0)->s_name;'' ptr + name <- strcpy str + alias <- _casm_ ``%r = ((struct servent*)%0)->s_aliases;'' ptr + aliases <- unvectorize alias 0 + -- Note: port numbers are represented as ints in (struct servent), but + -- inet port numbers are 16-bit, hence the use of ntohs() rather than ntohl() + port <- _casm_ ``%r = (int)ntohs((int)(((struct servent*)%0)->s_port));'' ptr + str <- _casm_ ``%r = (char *)((struct servent*)%0)->s_proto;'' ptr + proto <- strcpy str + return (ServiceEntry name aliases port proto) + +------------------------------------------------------------------------------- + +unpackProtocolEntry :: Addr -> IO ProtocolEntry +unpackProtocolEntry ptr = do + str <- _casm_ ``%r = ((struct protoent*)%0)->p_name;'' ptr + name <- strcpy str + alias <- _casm_ ``%r = ((struct protoent*)%0)->p_aliases;'' ptr + aliases <- unvectorize alias 0 + proto <- _casm_ ``%r = ((struct protoent*)%0)->p_proto;'' ptr + return (ProtocolEntry name aliases proto) + +------------------------------------------------------------------------------- + +unpackHostEntry :: Addr -> IO HostEntry +unpackHostEntry ptr = do + str <- _casm_ ``%r = ((struct hostent*)%0)->h_name;'' ptr + name <- strcpy str + alias <- _casm_ ``%r = ((struct hostent*)%0)->h_aliases;'' ptr + aliases <- unvectorize alias 0 + addrList <- unvectorizeHostAddrs ptr 0 + return (HostEntry name aliases AF_INET addrList) + +------------------------------------------------------------------------------- + +unpackNetworkEntry :: Addr -> IO NetworkEntry +unpackNetworkEntry ptr = do + str <- _casm_ ``%r = ((struct netent*)%0)->n_name;'' ptr + name <- strcpy str + alias <- _casm_ ``%r = ((struct netent*)%0)->n_aliases;'' ptr + aliases <- unvectorize alias 0 + fam <- _casm_ ``%r = ((struct netent*)%0)->n_addrtype;'' ptr + na <- _casm_ ``%r = ((struct netent*)%0)->n_net;'' ptr + return (NetworkEntry name aliases (unpackFamily fam) na) + +------------------------------------------------------------------------------- + +unvectorizeHostAddrs :: Addr -> Int -> IO [Word] +unvectorizeHostAddrs ptr n + | str == ``NULL'' = return [] + | otherwise = do + x <- _casm_ ``{ u_long tmp; + if ((((struct hostent*)%0)->h_addr_list[(int)%1]) == NULL) + tmp=(W_)0; + else + tmp = (W_)ntohl(((struct in_addr *)(((struct hostent*)%0)->h_addr_list[(int)%1]))->s_addr); + %r=(W_)tmp;} '' + ptr n + xs <- unvectorizeHostAddrs ptr (n+1) + return (x : xs) + where str = indexAddrOffAddr ptr n + +\end{code} diff --git a/ghc/lib/misc/Bag.lhs b/ghc/lib/misc/Bag.lhs new file mode 100644 index 0000000..15678cf --- /dev/null +++ b/ghc/lib/misc/Bag.lhs @@ -0,0 +1,171 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1996 +% +\section[Bags]{@Bag@: an unordered collection with duplicates} + +\begin{code} +#ifdef COMPILING_GHC +#include "HsVersions.h" +#endif + +module Bag ( + Bag, -- abstract type + + emptyBag, unitBag, unionBags, unionManyBags, + mapBag, +#ifndef COMPILING_GHC + elemBag, +#endif + filterBag, partitionBag, concatBag, foldBag, + isEmptyBag, consBag, snocBag, + listToBag, bagToList + ) where + +#ifdef COMPILING_GHC +IMP_Ubiq(){-uitous-} +IMPORT_1_3(List(partition)) + +import Outputable ( interpp'SP ) +import Pretty +#else +import List(partition) +#endif + +data Bag a + = EmptyBag + | UnitBag a + | TwoBags (Bag a) (Bag a) -- The ADT guarantees that at least + -- one branch is non-empty + | ListBag [a] -- The list is non-empty + | ListOfBags [Bag a] -- The list is non-empty + +emptyBag = EmptyBag +unitBag = UnitBag + +#ifndef COMPILING_GHC +elemBag :: Eq a => a -> Bag a -> Bool + +elemBag x EmptyBag = False +elemBag x (UnitBag y) = x==y +elemBag x (TwoBags b1 b2) = x `elemBag` b1 || x `elemBag` b2 +elemBag x (ListBag ys) = any (x ==) ys +elemBag x (ListOfBags bs) = any (x `elemBag`) bs +#endif + +unionManyBags [] = EmptyBag +unionManyBags xs = ListOfBags xs + +-- This one is a bit stricter! The bag will get completely evaluated. + +unionBags EmptyBag b = b +unionBags b EmptyBag = b +unionBags b1 b2 = TwoBags b1 b2 + +consBag :: a -> Bag a -> Bag a +snocBag :: Bag a -> a -> Bag a + +consBag elt bag = (unitBag elt) `unionBags` bag +snocBag bag elt = bag `unionBags` (unitBag elt) + +isEmptyBag EmptyBag = True +isEmptyBag (UnitBag x) = False +isEmptyBag (TwoBags b1 b2) = isEmptyBag b1 && isEmptyBag b2 -- Paranoid, but safe +isEmptyBag (ListBag xs) = null xs -- Paranoid, but safe +isEmptyBag (ListOfBags bs) = all isEmptyBag bs + +filterBag :: (a -> Bool) -> Bag a -> Bag a +filterBag pred EmptyBag = EmptyBag +filterBag pred b@(UnitBag val) = if pred val then b else EmptyBag +filterBag pred (TwoBags b1 b2) = sat1 `unionBags` sat2 + where + sat1 = filterBag pred b1 + sat2 = filterBag pred b2 +filterBag pred (ListBag vs) = listToBag (filter pred vs) +filterBag pred (ListOfBags bs) = ListOfBags sats + where + sats = [filterBag pred b | b <- bs] + +concatBag :: Bag (Bag a) -> Bag a + +concatBag EmptyBag = EmptyBag +concatBag (UnitBag b) = b +concatBag (TwoBags b1 b2) = concatBag b1 `TwoBags` concatBag b2 +concatBag (ListBag bs) = ListOfBags bs +concatBag (ListOfBags bbs) = ListOfBags (map concatBag bbs) + +partitionBag :: (a -> Bool) -> Bag a -> (Bag a {- Satisfy predictate -}, + Bag a {- Don't -}) +partitionBag pred EmptyBag = (EmptyBag, EmptyBag) +partitionBag pred b@(UnitBag val) = if pred val then (b, EmptyBag) else (EmptyBag, b) +partitionBag pred (TwoBags b1 b2) = (sat1 `unionBags` sat2, fail1 `unionBags` fail2) + where + (sat1,fail1) = partitionBag pred b1 + (sat2,fail2) = partitionBag pred b2 +partitionBag pred (ListBag vs) = (listToBag sats, listToBag fails) + where + (sats,fails) = partition pred vs +partitionBag pred (ListOfBags bs) = (ListOfBags sats, ListOfBags fails) + where + (sats, fails) = unzip [partitionBag pred b | b <- bs] + + +foldBag :: (r -> r -> r) -- Replace TwoBags with this; should be associative + -> (a -> r) -- Replace UnitBag with this + -> r -- Replace EmptyBag with this + -> Bag a + -> r + +{- Standard definition +foldBag t u e EmptyBag = e +foldBag t u e (UnitBag x) = u x +foldBag t u e (TwoBags b1 b2) = (foldBag t u e b1) `t` (foldBag t u e b2) +foldBag t u e (ListBag xs) = foldr (t.u) e xs +foldBag t u e (ListOfBags bs) = foldr (\b r -> foldBag e u t b `t` r) e bs +-} + +-- More tail-recursive definition, exploiting associativity of "t" +foldBag t u e EmptyBag = e +foldBag t u e (UnitBag x) = u x `t` e +foldBag t u e (TwoBags b1 b2) = foldBag t u (foldBag t u e b2) b1 +foldBag t u e (ListBag xs) = foldr (t.u) e xs +foldBag t u e (ListOfBags bs) = foldr (\b r -> foldBag t u r b) e bs + + +mapBag :: (a -> b) -> Bag a -> Bag b +mapBag f EmptyBag = EmptyBag +mapBag f (UnitBag x) = UnitBag (f x) +mapBag f (TwoBags b1 b2) = TwoBags (mapBag f b1) (mapBag f b2) +mapBag f (ListBag xs) = ListBag (map f xs) +mapBag f (ListOfBags bs) = ListOfBags (map (mapBag f) bs) + + +listToBag :: [a] -> Bag a +listToBag [] = EmptyBag +listToBag vs = ListBag vs + +bagToList :: Bag a -> [a] +bagToList EmptyBag = [] +bagToList (ListBag vs) = vs +bagToList b = bagToList_append b [] + + -- (bagToList_append b xs) flattens b and puts xs on the end. + -- (not exported) +bagToList_append EmptyBag xs = xs +bagToList_append (UnitBag x) xs = x:xs +bagToList_append (TwoBags b1 b2) xs = bagToList_append b1 (bagToList_append b2 xs) +bagToList_append (ListBag xx) xs = xx++xs +bagToList_append (ListOfBags bs) xs = foldr bagToList_append xs bs +\end{code} + +\begin{code} +#ifdef COMPILING_GHC + +instance (Outputable a) => Outputable (Bag a) where + ppr sty EmptyBag = ppStr "emptyBag" + ppr sty (UnitBag a) = ppr sty a + ppr sty (TwoBags b1 b2) = ppCat [ppr sty b1, pp'SP, ppr sty b2] + ppr sty (ListBag as) = interpp'SP sty as + ppr sty (ListOfBags bs) = ppCat [ppLbrack, interpp'SP sty bs, ppRbrack] + +#endif {- COMPILING_GHC -} +\end{code} diff --git a/ghc/lib/misc/BitSet.lhs b/ghc/lib/misc/BitSet.lhs new file mode 100644 index 0000000..fe49d4b --- /dev/null +++ b/ghc/lib/misc/BitSet.lhs @@ -0,0 +1,196 @@ +% +% (c) The GRASP Project, Glasgow University, 1994-1995 +% +\section[BitSet]{An implementation of very small sets} + +Bit sets are a fast implementation of sets of integers ranging from 0 +to one less than the number of bits in a machine word (typically 31). +If any element exceeds the maximum value for a particular machine +architecture, the results of these operations are undefined. You have +been warned. If you put any safety checks in this code, I will have +to kill you. + +Note: the Yale Haskell implementation won't provide a full 32 bits. +However, if you can handle the performance loss, you could change to +Integer and get virtually unlimited sets. + +\begin{code} + +module BitSet ( + BitSet, -- abstract type + mkBS, listBS, emptyBS, unitBS, + unionBS, minusBS +#if ! defined(COMPILING_GHC) + , elementBS, intersectBS, isEmptyBS +#endif + ) where + +#ifdef __GLASGOW_HASKELL__ +import + PrelBase + +-- nothing to import +#elif defined(__YALE_HASKELL__) +{-hide import from mkdependHS-} +import + LogOpPrims +#else +{-hide import from mkdependHS-} +import + Word +#endif + +#ifdef __GLASGOW_HASKELL__ + +data BitSet = MkBS Word# + +emptyBS :: BitSet +emptyBS = MkBS (int2Word# 0#) + +mkBS :: [Int] -> BitSet +mkBS xs = foldr (unionBS . unitBS) emptyBS xs + +unitBS :: Int -> BitSet +unitBS x = case x of + I# i# -> MkBS ((int2Word# 1#) `shiftL#` i#) + +unionBS :: BitSet -> BitSet -> BitSet +unionBS (MkBS x#) (MkBS y#) = MkBS (x# `or#` y#) + +minusBS :: BitSet -> BitSet -> BitSet +minusBS (MkBS x#) (MkBS y#) = MkBS (x# `and#` (not# y#)) + +#if ! defined(COMPILING_GHC) +-- not used in GHC +isEmptyBS :: BitSet -> Bool +isEmptyBS (MkBS s#) + = case word2Int# s# of + 0# -> True + _ -> False + +intersectBS :: BitSet -> BitSet -> BitSet +intersectBS (MkBS x#) (MkBS y#) = MkBS (x# `and#` y#) + +elementBS :: Int -> BitSet -> Bool +elementBS x (MkBS s#) = case x of + I# i# -> case word2Int# (((int2Word# 1#) `shiftL#` i#) `and#` s#) of + 0# -> False + _ -> True +#endif + +listBS :: BitSet -> [Int] +listBS s = listify s 0 + where listify (MkBS s#) n = + case word2Int# s# of + 0# -> [] + _ -> let s' = (MkBS (s# `shiftr` 1#)) + more = listify s' (n + 1) + in case word2Int# (s# `and#` (int2Word# 1#)) of + 0# -> more + _ -> n : more + shiftr x y = shiftRL# x y + +#elif defined(__YALE_HASKELL__) + +data BitSet = MkBS Int + +emptyBS :: BitSet +emptyBS = MkBS 0 + +mkBS :: [Int] -> BitSet +mkBS xs = foldr (unionBS . unitBS) emptyBS xs + +unitBS :: Int -> BitSet +unitBS x = MkBS (1 `ashInt` x) + +unionBS :: BitSet -> BitSet -> BitSet +unionBS (MkBS x) (MkBS y) = MkBS (x `logiorInt` y) + +#if ! defined(COMPILING_GHC) +-- not used in GHC +isEmptyBS :: BitSet -> Bool +isEmptyBS (MkBS s) + = case s of + 0 -> True + _ -> False + +intersectBS :: BitSet -> BitSet -> BitSet +intersectBS (MkBS x) (MkBS y) = MkBS (x `logandInt` y) + +elementBS :: Int -> BitSet -> Bool +elementBS x (MkBS s) + = case logbitpInt x s of + 0 -> False + _ -> True +#endif + +minusBS :: BitSet -> BitSet -> BitSet +minusBS (MkBS x) (MkBS y) = MkBS (x `logandc2Int` y) + +-- rewritten to avoid right shifts (which would give nonsense on negative +-- values. +listBS :: BitSet -> [Int] +listBS (MkBS s) = listify s 0 1 + where listify s n m = + case s of + 0 -> [] + _ -> let n' = n+1; m' = m+m in + case logbitpInt s m of + 0 -> listify s n' m' + _ -> n : listify (s `logandc2Int` m) n' m' + +#else /* HBC, perhaps? */ + +data BitSet = MkBS Word + +emptyBS :: BitSet +emptyBS = MkBS 0 + +mkBS :: [Int] -> BitSet +mkBS xs = foldr (unionBS . unitBS) emptyBS xs + +unitBS :: Int -> BitSet +unitBS x = MkBS (1 `bitLsh` x) + +unionBS :: BitSet -> BitSet -> BitSet +unionBS (MkBS x) (MkBS y) = MkBS (x `bitOr` y) + +#if ! defined(COMPILING_GHC) +-- not used in GHC +isEmptyBS :: BitSet -> Bool +isEmptyBS (MkBS s) + = case s of + 0 -> True + _ -> False + +intersectBS :: BitSet -> BitSet -> BitSet +intersectBS (MkBS x) (MkBS y) = MkBS (x `bitAnd` y) + +elementBS :: Int -> BitSet -> Bool +elementBS x (MkBS s) + = case (1 `bitLsh` x) `bitAnd` s of + 0 -> False + _ -> True +#endif + +minusBS :: BitSet -> BitSet -> BitSet +minusBS (MkBS x) (MkBS y) = MkBS (x `bitAnd` (bitCompl y)) + +listBS :: BitSet -> [Int] +listBS (MkBS s) = listify s 0 + where listify s n = + case s of + 0 -> [] + _ -> let s' = s `bitRsh` 1 + more = listify s' (n + 1) + in case (s `bitAnd` 1) of + 0 -> more + _ -> n : more + +#endif + +\end{code} + + + + diff --git a/ghc/lib/misc/CharSeq.lhs b/ghc/lib/misc/CharSeq.lhs new file mode 100644 index 0000000..43dfb7f --- /dev/null +++ b/ghc/lib/misc/CharSeq.lhs @@ -0,0 +1,202 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1995 +% +\section[CharSeq]{Characters sequences: the @CSeq@ type} + +\begin{code} +#if defined(COMPILING_GHC) +# include "HsVersions.h" +#else +# define FAST_STRING String +# define FAST_INT Int +# define ILIT(x) (x) +# define IBOX(x) (x) +# define _GE_ >= +# define _ADD_ + +# define _SUB_ - +# define FAST_BOOL Bool +# define _TRUE_ True +# define _FALSE_ False +#endif + +module CharSeq ( + CSeq, + cNil, cAppend, cIndent, cNL, cStr, cPStr, cCh, cInt, +#if ! defined(COMPILING_GHC) + cLength, + cShows, +#endif + cShow + +#if ! defined(COMPILING_GHC) + ) where +#else + , cPutStr + ) where + +CHK_Ubiq() -- debugging consistency check +IMPORT_1_3(IO) + +#endif +\end{code} + +%************************************************ +%* * + \subsection{The interface} +%* * +%************************************************ + +\begin{code} +cShow :: CSeq -> [Char] + +#if ! defined(COMPILING_GHC) +-- not used in GHC +cShows :: CSeq -> ShowS +cLength :: CSeq -> Int +#endif + +cNil :: CSeq +cAppend :: CSeq -> CSeq -> CSeq +cIndent :: Int -> CSeq -> CSeq +cNL :: CSeq +cStr :: [Char] -> CSeq +cPStr :: FAST_STRING -> CSeq +cCh :: Char -> CSeq +cInt :: Int -> CSeq + +#if defined(COMPILING_GHC) +cPutStr :: Handle -> CSeq -> IO () +#endif +\end{code} + +%************************************************ +%* * + \subsection{The representation} +%* * +%************************************************ + +\begin{code} +data CSeq + = CNil + | CAppend CSeq CSeq + | CIndent Int CSeq + | CNewline -- Move to start of next line, unless we're + -- already at the start of a line. + | CStr [Char] + | CCh Char + | CInt Int -- equiv to "CStr (show the_int)" +#if defined(COMPILING_GHC) + | CPStr FAST_STRING +#endif +\end{code} + +The construction functions do pattern matching, to ensure that +redundant CNils are eliminated. This is bound to have some effect on +evaluation order, but quite what I don't know. + +\begin{code} +cNil = CNil +\end{code} + +The following special cases were eating our lunch! They make the whole +thing too strict. A classic strictness bug! +\begin{code} +-- cAppend CNil cs2 = cs2 +-- cAppend cs1 CNil = cs1 + +cAppend cs1 cs2 = CAppend cs1 cs2 + +cIndent n cs = CIndent n cs + +cNL = CNewline +cStr = CStr +cCh = CCh +cInt = CInt + +#if defined(COMPILING_GHC) +cPStr = CPStr +#else +cPStr = CStr +#endif + +cShow seq = flatten ILIT(0) _TRUE_ seq [] + +#if ! defined(COMPILING_GHC) +cShows seq rest = cShow seq ++ rest +cLength seq = length (cShow seq) -- *not* the best way to do this! +#endif +\end{code} + +This code is {\em hammered}. We are not above doing sleazy +non-standard things. (WDP 94/10) + +\begin{code} +data WorkItem = WI FAST_INT CSeq -- indentation, and sequence + +flatten :: FAST_INT -- Indentation + -> FAST_BOOL -- True => just had a newline + -> CSeq -- Current seq to flatten + -> [WorkItem] -- Work list with indentation + -> String + +flatten n nlp CNil seqs = flattenS nlp seqs + +flatten n nlp (CAppend seq1 seq2) seqs = flatten n nlp seq1 ((WI n seq2) : seqs) +flatten n nlp (CIndent IBOX(n2) seq) seqs = flatten (n2 _ADD_ n) nlp seq seqs + +flatten n _FALSE_ CNewline seqs = '\n' : flattenS _TRUE_ seqs +flatten n _TRUE_ CNewline seqs = flattenS _TRUE_ seqs -- Already at start of line + +flatten n _FALSE_ (CStr s) seqs = s ++ flattenS _FALSE_ seqs +flatten n _FALSE_ (CCh c) seqs = c : flattenS _FALSE_ seqs +flatten n _FALSE_ (CInt i) seqs = show i ++ flattenS _FALSE_ seqs +#if defined(COMPILING_GHC) +flatten n _FALSE_ (CPStr s) seqs = _UNPK_ s ++ flattenS _FALSE_ seqs +#endif + +flatten n _TRUE_ (CStr s) seqs = mkIndent n (s ++ flattenS _FALSE_ seqs) +flatten n _TRUE_ (CCh c) seqs = mkIndent n (c : flattenS _FALSE_ seqs) +flatten n _TRUE_ (CInt i) seqs = mkIndent n (show i ++ flattenS _FALSE_ seqs) +#if defined(COMPILING_GHC) +flatten n _TRUE_ (CPStr s) seqs = mkIndent n ( _UNPK_ s ++ flattenS _FALSE_ seqs) +#endif +\end{code} + +\begin{code} +flattenS :: FAST_BOOL -> [WorkItem] -> String +flattenS nlp [] = "" +flattenS nlp ((WI col seq):seqs) = flatten col nlp seq seqs +\end{code} + +\begin{code} +mkIndent :: FAST_INT -> String -> String +mkIndent ILIT(0) s = s +mkIndent n s + = if (n _GE_ ILIT(8)) + then '\t' : mkIndent (n _SUB_ ILIT(8)) s + else ' ' : mkIndent (n _SUB_ ILIT(1)) s + -- Hmm.. a little Unix-y. +\end{code} + +Now the I/O version. +This code is massively {\em hammered}. +It {\em ignores} indentation. + +(NB: 1.3 compiler: efficiency hacks removed for now!) + +\begin{code} +#if defined(COMPILING_GHC) + +cPutStr handle sq = flat sq + where + flat CNil = return () + flat (CIndent n2 seq) = flat seq + flat (CAppend s1 s2) = flat s1 >> flat s2 + flat CNewline = hPutChar handle '\n' + flat (CCh c) = hPutChar handle c + flat (CInt i) = hPutStr handle (show i) + flat (CStr s) = hPutStr handle s + flat (CPStr s) = hPutStr handle (_UNPK_ s) + +#endif {- COMPILING_GHC -} +\end{code} diff --git a/ghc/lib/misc/FiniteMap.lhs b/ghc/lib/misc/FiniteMap.lhs new file mode 100644 index 0000000..8436c8c --- /dev/null +++ b/ghc/lib/misc/FiniteMap.lhs @@ -0,0 +1,831 @@ +% +% (c) The AQUA Project, Glasgow University, 1994-1996 +% +\section[FiniteMap]{An implementation of finite maps} + +``Finite maps'' are the heart of the compiler's +lookup-tables/environments and its implementation of sets. Important +stuff! + +This code is derived from that in the paper: +\begin{display} + S Adams + "Efficient sets: a balancing act" + Journal of functional programming 3(4) Oct 1993, pp553-562 +\end{display} + +The code is SPECIALIZEd to various highly-desirable types (e.g., Id) +near the end (only \tr{#ifdef COMPILING_GHC}). + +\begin{code} +#ifdef COMPILING_GHC +#include "HsVersions.h" +#define IF_NOT_GHC(a) {--} +#else +#define ASSERT(e) {--} +#define IF_NOT_GHC(a) a +#define COMMA , +#define _tagCmp compare +#define _LT LT +#define _GT GT +#define _EQ EQ +#endif + +#if defined(COMPILING_GHC) && defined(DEBUG_FINITEMAPS)/* NB NB NB */ +#define OUTPUTABLE_key , Outputable key +#else +#define OUTPUTABLE_key {--} +#endif + +module FiniteMap ( + FiniteMap, -- abstract type + + emptyFM, unitFM, listToFM, + + addToFM, + addToFM_C, + addListToFM, + addListToFM_C, + IF_NOT_GHC(delFromFM COMMA) + delListFromFM, + + plusFM, + plusFM_C, + minusFM, + foldFM, + + IF_NOT_GHC(intersectFM COMMA) + IF_NOT_GHC(intersectFM_C COMMA) + IF_NOT_GHC(mapFM COMMA filterFM COMMA) + + sizeFM, isEmptyFM, elemFM, lookupFM, lookupWithDefaultFM, + + fmToList, keysFM, eltsFM + +#ifdef COMPILING_GHC + , bagToFM + , SYN_IE(FiniteSet), emptySet, mkSet, isEmptySet + , elementOf, setToList, union, minusSet +#endif + ) where + +import PrelBase +import Maybes +#ifdef COMPILING_GHC +IMP_Ubiq(){-uitous-} +# ifdef DEBUG +import Pretty +# endif +import Bag ( foldBag ) + +# if ! OMIT_NATIVE_CODEGEN +# define IF_NCG(a) a +# else +# define IF_NCG(a) {--} +# endif +#endif + +-- SIGH: but we use unboxed "sizes"... +#if __GLASGOW_HASKELL__ +#define IF_GHC(a,b) a +#else /* not GHC */ +#define IF_GHC(a,b) b +#endif /* not GHC */ +\end{code} + + +%************************************************************************ +%* * +\subsection{The signature of the module} +%* * +%************************************************************************ + +\begin{code} +-- BUILDING +emptyFM :: FiniteMap key elt +unitFM :: key -> elt -> FiniteMap key elt +listToFM :: (Ord key OUTPUTABLE_key) => [(key,elt)] -> FiniteMap key elt + -- In the case of duplicates, the last is taken +#ifdef COMPILING_GHC +bagToFM :: (Ord key OUTPUTABLE_key) => Bag (key,elt) -> FiniteMap key elt + -- In the case of duplicates, who knows which is taken +#endif + +-- ADDING AND DELETING + -- Throws away any previous binding + -- In the list case, the items are added starting with the + -- first one in the list +addToFM :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> key -> elt -> FiniteMap key elt +addListToFM :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> [(key,elt)] -> FiniteMap key elt + + -- Combines with previous binding + -- In the combining function, the first argument is the "old" element, + -- while the second is the "new" one. +addToFM_C :: (Ord key OUTPUTABLE_key) => (elt -> elt -> elt) + -> FiniteMap key elt -> key -> elt + -> FiniteMap key elt +addListToFM_C :: (Ord key OUTPUTABLE_key) => (elt -> elt -> elt) + -> FiniteMap key elt -> [(key,elt)] + -> FiniteMap key elt + + -- Deletion doesn't complain if you try to delete something + -- which isn't there +delFromFM :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> key -> FiniteMap key elt +delListFromFM :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> [key] -> FiniteMap key elt + +-- COMBINING + -- Bindings in right argument shadow those in the left +plusFM :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> FiniteMap key elt + -> FiniteMap key elt + + -- Combines bindings for the same thing with the given function +plusFM_C :: (Ord key OUTPUTABLE_key) => (elt -> elt -> elt) + -> FiniteMap key elt -> FiniteMap key elt -> FiniteMap key elt + +minusFM :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> FiniteMap key elt -> FiniteMap key elt + -- (minusFM a1 a2) deletes from a1 any bindings which are bound in a2 + +intersectFM :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> FiniteMap key elt -> FiniteMap key elt +intersectFM_C :: (Ord key OUTPUTABLE_key) => (elt -> elt -> elt) + -> FiniteMap key elt -> FiniteMap key elt -> FiniteMap key elt + +-- MAPPING, FOLDING, FILTERING +foldFM :: (key -> elt -> a -> a) -> a -> FiniteMap key elt -> a +mapFM :: (key -> elt1 -> elt2) -> FiniteMap key elt1 -> FiniteMap key elt2 +filterFM :: (Ord key OUTPUTABLE_key) => (key -> elt -> Bool) + -> FiniteMap key elt -> FiniteMap key elt + +-- INTERROGATING +sizeFM :: FiniteMap key elt -> Int +isEmptyFM :: FiniteMap key elt -> Bool + +elemFM :: (Ord key OUTPUTABLE_key) => key -> FiniteMap key elt -> Bool +lookupFM :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> key -> Maybe elt +lookupWithDefaultFM + :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> elt -> key -> elt + -- lookupWithDefaultFM supplies a "default" elt + -- to return for an unmapped key + +-- LISTIFYING +fmToList :: FiniteMap key elt -> [(key,elt)] +keysFM :: FiniteMap key elt -> [key] +eltsFM :: FiniteMap key elt -> [elt] +\end{code} + +%************************************************************************ +%* * +\subsection{The @FiniteMap@ data type, and building of same} +%* * +%************************************************************************ + +Invariants about @FiniteMap@: +\begin{enumerate} +\item +all keys in a FiniteMap are distinct +\item +all keys in left subtree are $<$ key in Branch and +all keys in right subtree are $>$ key in Branch +\item +size field of a Branch gives number of Branch nodes in the tree +\item +size of left subtree is differs from size of right subtree by a +factor of at most \tr{sIZE_RATIO} +\end{enumerate} + +\begin{code} +data FiniteMap key elt + = EmptyFM + | Branch key elt -- Key and elt stored here + IF_GHC(Int#,Int{-STRICT-}) -- Size >= 1 + (FiniteMap key elt) -- Children + (FiniteMap key elt) +\end{code} + +\begin{code} +emptyFM = EmptyFM +{- +emptyFM + = Branch bottom bottom IF_GHC(0#,0) bottom bottom + where + bottom = panic "emptyFM" +-} + +-- #define EmptyFM (Branch _ _ IF_GHC(0#,0) _ _) + +unitFM key elt = Branch key elt IF_GHC(1#,1) emptyFM emptyFM + +listToFM = addListToFM emptyFM + +#ifdef COMPILING_GHC +bagToFM = foldBag plusFM (\ (k,v) -> unitFM k v) emptyFM +#endif +\end{code} + +%************************************************************************ +%* * +\subsection{Adding to and deleting from @FiniteMaps@} +%* * +%************************************************************************ + +\begin{code} +addToFM fm key elt = addToFM_C (\ old new -> new) fm key elt + +addToFM_C combiner EmptyFM key elt = unitFM key elt +addToFM_C combiner (Branch key elt size fm_l fm_r) new_key new_elt +#ifdef __GLASGOW_HASKELL__ + = case _tagCmp new_key key of + _LT -> mkBalBranch key elt (addToFM_C combiner fm_l new_key new_elt) fm_r + _GT -> mkBalBranch key elt fm_l (addToFM_C combiner fm_r new_key new_elt) + _EQ -> Branch new_key (combiner elt new_elt) size fm_l fm_r +#else + | new_key < key = mkBalBranch key elt (addToFM_C combiner fm_l new_key new_elt) fm_r + | new_key > key = mkBalBranch key elt fm_l (addToFM_C combiner fm_r new_key new_elt) + | otherwise = Branch new_key (combiner elt new_elt) size fm_l fm_r +#endif + +addListToFM fm key_elt_pairs = addListToFM_C (\ old new -> new) fm key_elt_pairs + +addListToFM_C combiner fm key_elt_pairs + = foldl add fm key_elt_pairs -- foldl adds from the left + where + add fmap (key,elt) = addToFM_C combiner fmap key elt +\end{code} + +\begin{code} +delFromFM EmptyFM del_key = emptyFM +delFromFM (Branch key elt size fm_l fm_r) del_key +#if __GLASGOW_HASKELL__ + = case _tagCmp del_key key of + _GT -> mkBalBranch key elt fm_l (delFromFM fm_r del_key) + _LT -> mkBalBranch key elt (delFromFM fm_l del_key) fm_r + _EQ -> glueBal fm_l fm_r +#else + | del_key > key + = mkBalBranch key elt fm_l (delFromFM fm_r del_key) + + | del_key < key + = mkBalBranch key elt (delFromFM fm_l del_key) fm_r + + | key == del_key + = glueBal fm_l fm_r +#endif + +delListFromFM fm keys = foldl delFromFM fm keys +\end{code} + +%************************************************************************ +%* * +\subsection{Combining @FiniteMaps@} +%* * +%************************************************************************ + +\begin{code} +plusFM_C combiner EmptyFM fm2 = fm2 +plusFM_C combiner fm1 EmptyFM = fm1 +plusFM_C combiner fm1 (Branch split_key elt2 _ left right) + = mkVBalBranch split_key new_elt + (plusFM_C combiner lts left) + (plusFM_C combiner gts right) + where + lts = splitLT fm1 split_key + gts = splitGT fm1 split_key + new_elt = case lookupFM fm1 split_key of + Nothing -> elt2 + Just elt1 -> combiner elt1 elt2 + +-- It's worth doing plusFM specially, because we don't need +-- to do the lookup in fm1. + +plusFM EmptyFM fm2 = fm2 +plusFM fm1 EmptyFM = fm1 +plusFM fm1 (Branch split_key elt1 _ left right) + = mkVBalBranch split_key elt1 (plusFM lts left) (plusFM gts right) + where + lts = splitLT fm1 split_key + gts = splitGT fm1 split_key + +minusFM EmptyFM fm2 = emptyFM +minusFM fm1 EmptyFM = fm1 +minusFM fm1 (Branch split_key elt _ left right) + = glueVBal (minusFM lts left) (minusFM gts right) + -- The two can be way different, so we need glueVBal + where + lts = splitLT fm1 split_key -- NB gt and lt, so the equal ones + gts = splitGT fm1 split_key -- are not in either. + +intersectFM fm1 fm2 = intersectFM_C (\ left right -> right) fm1 fm2 + +intersectFM_C combiner fm1 EmptyFM = emptyFM +intersectFM_C combiner EmptyFM fm2 = emptyFM +intersectFM_C combiner fm1 (Branch split_key elt2 _ left right) + + | maybeToBool maybe_elt1 -- split_elt *is* in intersection + = mkVBalBranch split_key (combiner elt1 elt2) (intersectFM_C combiner lts left) + (intersectFM_C combiner gts right) + + | otherwise -- split_elt is *not* in intersection + = glueVBal (intersectFM_C combiner lts left) (intersectFM_C combiner gts right) + + where + lts = splitLT fm1 split_key -- NB gt and lt, so the equal ones + gts = splitGT fm1 split_key -- are not in either. + + maybe_elt1 = lookupFM fm1 split_key + Just elt1 = maybe_elt1 +\end{code} + +%************************************************************************ +%* * +\subsection{Mapping, folding, and filtering with @FiniteMaps@} +%* * +%************************************************************************ + +\begin{code} +foldFM k z EmptyFM = z +foldFM k z (Branch key elt _ fm_l fm_r) + = foldFM k (k key elt (foldFM k z fm_r)) fm_l + +mapFM f EmptyFM = emptyFM +mapFM f (Branch key elt size fm_l fm_r) + = Branch key (f key elt) size (mapFM f fm_l) (mapFM f fm_r) + +filterFM p EmptyFM = emptyFM +filterFM p (Branch key elt _ fm_l fm_r) + | p key elt -- Keep the item + = mkVBalBranch key elt (filterFM p fm_l) (filterFM p fm_r) + + | otherwise -- Drop the item + = glueVBal (filterFM p fm_l) (filterFM p fm_r) +\end{code} + +%************************************************************************ +%* * +\subsection{Interrogating @FiniteMaps@} +%* * +%************************************************************************ + +\begin{code} +--{-# INLINE sizeFM #-} +sizeFM EmptyFM = 0 +sizeFM (Branch _ _ size _ _) = IF_GHC(I# size, size) + +isEmptyFM fm = sizeFM fm == 0 + +lookupFM EmptyFM key = Nothing +lookupFM (Branch key elt _ fm_l fm_r) key_to_find +#if __GLASGOW_HASKELL__ + = case _tagCmp key_to_find key of + _LT -> lookupFM fm_l key_to_find + _GT -> lookupFM fm_r key_to_find + _EQ -> Just elt +#else + | key_to_find < key = lookupFM fm_l key_to_find + | key_to_find > key = lookupFM fm_r key_to_find + | otherwise = Just elt +#endif + +key `elemFM` fm + = case (lookupFM fm key) of { Nothing -> False; Just elt -> True } + +lookupWithDefaultFM fm deflt key + = case (lookupFM fm key) of { Nothing -> deflt; Just elt -> elt } +\end{code} + +%************************************************************************ +%* * +\subsection{Listifying @FiniteMaps@} +%* * +%************************************************************************ + +\begin{code} +fmToList fm = foldFM (\ key elt rest -> (key,elt) : rest) [] fm +keysFM fm = foldFM (\ key elt rest -> key : rest) [] fm +eltsFM fm = foldFM (\ key elt rest -> elt : rest) [] fm +\end{code} + + +%************************************************************************ +%* * +\subsection{The implementation of balancing} +%* * +%************************************************************************ + +%************************************************************************ +%* * +\subsubsection{Basic construction of a @FiniteMap@} +%* * +%************************************************************************ + +@mkBranch@ simply gets the size component right. This is the ONLY +(non-trivial) place the Branch object is built, so the ASSERTion +recursively checks consistency. (The trivial use of Branch is in +@unitFM@.) + +\begin{code} +sIZE_RATIO :: Int +sIZE_RATIO = 5 + +mkBranch :: (Ord key OUTPUTABLE_key) -- Used for the assertion checking only + => Int + -> key -> elt + -> FiniteMap key elt -> FiniteMap key elt + -> FiniteMap key elt + +mkBranch which key elt fm_l fm_r + = --ASSERT( left_ok && right_ok && balance_ok ) +#if defined(COMPILING_GHC) && defined(DEBUG_FINITEMAPS) + if not ( left_ok && right_ok && balance_ok ) then + pprPanic ("mkBranch:"++show which) (ppAboves [ppr PprDebug [left_ok, right_ok, balance_ok], + ppr PprDebug key, + ppr PprDebug fm_l, + ppr PprDebug fm_r]) + else +#endif + let + result = Branch key elt (unbox (1 + left_size + right_size)) fm_l fm_r + in +-- if sizeFM result <= 8 then + result +-- else +-- pprTrace ("mkBranch:"++(show which)) (ppr PprDebug result) ( +-- result +-- ) + where + left_ok = case fm_l of + EmptyFM -> True + Branch left_key _ _ _ _ -> let + biggest_left_key = fst (findMax fm_l) + in + biggest_left_key < key + right_ok = case fm_r of + EmptyFM -> True + Branch right_key _ _ _ _ -> let + smallest_right_key = fst (findMin fm_r) + in + key < smallest_right_key + balance_ok = True -- sigh +{- LATER: + balance_ok + = -- Both subtrees have one or no elements... + (left_size + right_size <= 1) +-- NO || left_size == 0 -- ??? +-- NO || right_size == 0 -- ??? + -- ... or the number of elements in a subtree does not exceed + -- sIZE_RATIO times the number of elements in the other subtree + || (left_size * sIZE_RATIO >= right_size && + right_size * sIZE_RATIO >= left_size) +-} + + left_size = sizeFM fm_l + right_size = sizeFM fm_r + +#if __GLASGOW_HASKELL__ + unbox :: Int -> Int# + unbox (I# size) = size +#else + unbox :: Int -> Int + unbox x = x +#endif +\end{code} + +%************************************************************************ +%* * +\subsubsection{{\em Balanced} construction of a @FiniteMap@} +%* * +%************************************************************************ + +@mkBalBranch@ rebalances, assuming that the subtrees aren't too far +out of whack. + +\begin{code} +mkBalBranch :: (Ord key OUTPUTABLE_key) + => key -> elt + -> FiniteMap key elt -> FiniteMap key elt + -> FiniteMap key elt + +mkBalBranch key elt fm_L fm_R + + | size_l + size_r < 2 + = mkBranch 1{-which-} key elt fm_L fm_R + + | size_r > sIZE_RATIO * size_l -- Right tree too big + = case fm_R of + Branch _ _ _ fm_rl fm_rr + | sizeFM fm_rl < 2 * sizeFM fm_rr -> single_L fm_L fm_R + | otherwise -> double_L fm_L fm_R + -- Other case impossible + + | size_l > sIZE_RATIO * size_r -- Left tree too big + = case fm_L of + Branch _ _ _ fm_ll fm_lr + | sizeFM fm_lr < 2 * sizeFM fm_ll -> single_R fm_L fm_R + | otherwise -> double_R fm_L fm_R + -- Other case impossible + + | otherwise -- No imbalance + = mkBranch 2{-which-} key elt fm_L fm_R + + where + size_l = sizeFM fm_L + size_r = sizeFM fm_R + + single_L fm_l (Branch key_r elt_r _ fm_rl fm_rr) + = mkBranch 3{-which-} key_r elt_r (mkBranch 4{-which-} key elt fm_l fm_rl) fm_rr + + double_L fm_l (Branch key_r elt_r _ (Branch key_rl elt_rl _ fm_rll fm_rlr) fm_rr) + = mkBranch 5{-which-} key_rl elt_rl (mkBranch 6{-which-} key elt fm_l fm_rll) + (mkBranch 7{-which-} key_r elt_r fm_rlr fm_rr) + + single_R (Branch key_l elt_l _ fm_ll fm_lr) fm_r + = mkBranch 8{-which-} key_l elt_l fm_ll (mkBranch 9{-which-} key elt fm_lr fm_r) + + double_R (Branch key_l elt_l _ fm_ll (Branch key_lr elt_lr _ fm_lrl fm_lrr)) fm_r + = mkBranch 10{-which-} key_lr elt_lr (mkBranch 11{-which-} key_l elt_l fm_ll fm_lrl) + (mkBranch 12{-which-} key elt fm_lrr fm_r) +\end{code} + + +\begin{code} +mkVBalBranch :: (Ord key OUTPUTABLE_key) + => key -> elt + -> FiniteMap key elt -> FiniteMap key elt + -> FiniteMap key elt + +-- Assert: in any call to (mkVBalBranch_C comb key elt l r), +-- (a) all keys in l are < all keys in r +-- (b) all keys in l are < key +-- (c) all keys in r are > key + +mkVBalBranch key elt EmptyFM fm_r = addToFM fm_r key elt +mkVBalBranch key elt fm_l EmptyFM = addToFM fm_l key elt + +mkVBalBranch key elt fm_l@(Branch key_l elt_l _ fm_ll fm_lr) + fm_r@(Branch key_r elt_r _ fm_rl fm_rr) + | sIZE_RATIO * size_l < size_r + = mkBalBranch key_r elt_r (mkVBalBranch key elt fm_l fm_rl) fm_rr + + | sIZE_RATIO * size_r < size_l + = mkBalBranch key_l elt_l fm_ll (mkVBalBranch key elt fm_lr fm_r) + + | otherwise + = mkBranch 13{-which-} key elt fm_l fm_r + + where + size_l = sizeFM fm_l + size_r = sizeFM fm_r +\end{code} + +%************************************************************************ +%* * +\subsubsection{Gluing two trees together} +%* * +%************************************************************************ + +@glueBal@ assumes its two arguments aren't too far out of whack, just +like @mkBalBranch@. But: all keys in first arg are $<$ all keys in +second. + +\begin{code} +glueBal :: (Ord key OUTPUTABLE_key) + => FiniteMap key elt -> FiniteMap key elt + -> FiniteMap key elt + +glueBal EmptyFM fm2 = fm2 +glueBal fm1 EmptyFM = fm1 +glueBal fm1 fm2 + -- The case analysis here (absent in Adams' program) is really to deal + -- with the case where fm2 is a singleton. Then deleting the minimum means + -- we pass an empty tree to mkBalBranch, which breaks its invariant. + | sizeFM fm2 > sizeFM fm1 + = mkBalBranch mid_key2 mid_elt2 fm1 (deleteMin fm2) + + | otherwise + = mkBalBranch mid_key1 mid_elt1 (deleteMax fm1) fm2 + where + (mid_key1, mid_elt1) = findMax fm1 + (mid_key2, mid_elt2) = findMin fm2 +\end{code} + +@glueVBal@ copes with arguments which can be of any size. +But: all keys in first arg are $<$ all keys in second. + +\begin{code} +glueVBal :: (Ord key OUTPUTABLE_key) + => FiniteMap key elt -> FiniteMap key elt + -> FiniteMap key elt + +glueVBal EmptyFM fm2 = fm2 +glueVBal fm1 EmptyFM = fm1 +glueVBal fm_l@(Branch key_l elt_l _ fm_ll fm_lr) + fm_r@(Branch key_r elt_r _ fm_rl fm_rr) + | sIZE_RATIO * size_l < size_r + = mkBalBranch key_r elt_r (glueVBal fm_l fm_rl) fm_rr + + | sIZE_RATIO * size_r < size_l + = mkBalBranch key_l elt_l fm_ll (glueVBal fm_lr fm_r) + + | otherwise -- We now need the same two cases as in glueBal above. + = glueBal fm_l fm_r + where + (mid_key_l,mid_elt_l) = findMax fm_l + (mid_key_r,mid_elt_r) = findMin fm_r + size_l = sizeFM fm_l + size_r = sizeFM fm_r +\end{code} + +%************************************************************************ +%* * +\subsection{Local utilities} +%* * +%************************************************************************ + +\begin{code} +splitLT, splitGT :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> key -> FiniteMap key elt + +-- splitLT fm split_key = fm restricted to keys < split_key +-- splitGT fm split_key = fm restricted to keys > split_key + +splitLT EmptyFM split_key = emptyFM +splitLT (Branch key elt _ fm_l fm_r) split_key +#if __GLASGOW_HASKELL__ + = case _tagCmp split_key key of + _LT -> splitLT fm_l split_key + _GT -> mkVBalBranch key elt fm_l (splitLT fm_r split_key) + _EQ -> fm_l +#else + | split_key < key = splitLT fm_l split_key + | split_key > key = mkVBalBranch key elt fm_l (splitLT fm_r split_key) + | otherwise = fm_l +#endif + +splitGT EmptyFM split_key = emptyFM +splitGT (Branch key elt _ fm_l fm_r) split_key +#if __GLASGOW_HASKELL__ + = case _tagCmp split_key key of + _GT -> splitGT fm_r split_key + _LT -> mkVBalBranch key elt (splitGT fm_l split_key) fm_r + _EQ -> fm_r +#else + | split_key > key = splitGT fm_r split_key + | split_key < key = mkVBalBranch key elt (splitGT fm_l split_key) fm_r + | otherwise = fm_r +#endif + +findMin :: FiniteMap key elt -> (key,elt) +findMin (Branch key elt _ EmptyFM _) = (key,elt) +findMin (Branch key elt _ fm_l _) = findMin fm_l + +deleteMin :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> FiniteMap key elt +deleteMin (Branch key elt _ EmptyFM fm_r) = fm_r +deleteMin (Branch key elt _ fm_l fm_r) = mkBalBranch key elt (deleteMin fm_l) fm_r + +findMax :: FiniteMap key elt -> (key,elt) +findMax (Branch key elt _ _ EmptyFM) = (key,elt) +findMax (Branch key elt _ _ fm_r) = findMax fm_r + +deleteMax :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> FiniteMap key elt +deleteMax (Branch key elt _ fm_l EmptyFM) = fm_l +deleteMax (Branch key elt _ fm_l fm_r) = mkBalBranch key elt fm_l (deleteMax fm_r) +\end{code} + +%************************************************************************ +%* * +\subsection{Output-ery} +%* * +%************************************************************************ + +\begin{code} +#if defined(COMPILING_GHC) && defined(DEBUG_FINITEMAPS) + +instance (Outputable key) => Outputable (FiniteMap key elt) where + ppr sty fm = pprX sty fm + +pprX sty EmptyFM = ppChar '!' +pprX sty (Branch key elt sz fm_l fm_r) + = ppBesides [ppLparen, pprX sty fm_l, ppSP, + ppr sty key, ppSP, ppInt (IF_GHC(I# sz, sz)), ppSP, + pprX sty fm_r, ppRparen] +#endif + +#ifndef COMPILING_GHC +instance (Eq key, Eq elt) => Eq (FiniteMap key elt) where + fm_1 == fm_2 = (sizeFM fm_1 == sizeFM fm_2) && -- quick test + (fmToList fm_1 == fmToList fm_2) + +{- NO: not clear what The Right Thing to do is: +instance (Ord key, Ord elt) => Ord (FiniteMap key elt) where + fm_1 <= fm_2 = (sizeFM fm_1 <= sizeFM fm_2) && -- quick test + (fmToList fm_1 <= fmToList fm_2) +-} +#endif +\end{code} + +%************************************************************************ +%* * +\subsection{FiniteSets---a thin veneer} +%* * +%************************************************************************ + +\begin{code} +#ifdef COMPILING_GHC + +type FiniteSet key = FiniteMap key () +emptySet :: FiniteSet key +mkSet :: (Ord key OUTPUTABLE_key) => [key] -> FiniteSet key +isEmptySet :: FiniteSet key -> Bool +elementOf :: (Ord key OUTPUTABLE_key) => key -> FiniteSet key -> Bool +minusSet :: (Ord key OUTPUTABLE_key) => FiniteSet key -> FiniteSet key -> FiniteSet key +setToList :: FiniteSet key -> [key] +union :: (Ord key OUTPUTABLE_key) => FiniteSet key -> FiniteSet key -> FiniteSet key + +emptySet = emptyFM +mkSet xs = listToFM [ (x, ()) | x <- xs] +isEmptySet = isEmptyFM +elementOf = elemFM +minusSet = minusFM +setToList = keysFM +union = plusFM + +#endif +\end{code} + +%************************************************************************ +%* * +\subsection{Efficiency pragmas for GHC} +%* * +%************************************************************************ + +When the FiniteMap module is used in GHC, we specialise it for +\tr{Uniques}, for dastardly efficiency reasons. + +\begin{code} +#if defined(COMPILING_GHC) && __GLASGOW_HASKELL__ && !defined(REALLY_HASKELL_1_3) + +{-# SPECIALIZE addListToFM + :: FiniteMap (FAST_STRING, FAST_STRING) elt -> [((FAST_STRING, FAST_STRING),elt)] -> FiniteMap (FAST_STRING, FAST_STRING) elt + , FiniteMap RdrName elt -> [(RdrName,elt)] -> FiniteMap RdrName elt + IF_NCG(COMMA FiniteMap Reg elt -> [(Reg COMMA elt)] -> FiniteMap Reg elt) + #-} +{-# SPECIALIZE addListToFM_C + :: (elt -> elt -> elt) -> FiniteMap TyCon elt -> [(TyCon,elt)] -> FiniteMap TyCon elt + , (elt -> elt -> elt) -> FiniteMap FAST_STRING elt -> [(FAST_STRING,elt)] -> FiniteMap FAST_STRING elt + IF_NCG(COMMA (elt -> elt -> elt) -> FiniteMap Reg elt -> [(Reg COMMA elt)] -> FiniteMap Reg elt) + #-} +{-# SPECIALIZE addToFM + :: FiniteMap CLabel elt -> CLabel -> elt -> FiniteMap CLabel elt + , FiniteMap FAST_STRING elt -> FAST_STRING -> elt -> FiniteMap FAST_STRING elt + , FiniteMap (FAST_STRING, FAST_STRING) elt -> (FAST_STRING, FAST_STRING) -> elt -> FiniteMap (FAST_STRING, FAST_STRING) elt + , FiniteMap RdrName elt -> RdrName -> elt -> FiniteMap RdrName elt + , FiniteMap OrigName elt -> OrigName -> elt -> FiniteMap OrigName elt + IF_NCG(COMMA FiniteMap Reg elt -> Reg -> elt -> FiniteMap Reg elt) + #-} +{-# SPECIALIZE addToFM_C + :: (elt -> elt -> elt) -> FiniteMap (RdrName, RdrName) elt -> (RdrName, RdrName) -> elt -> FiniteMap (RdrName, RdrName) elt + , (elt -> elt -> elt) -> FiniteMap (OrigName, OrigName) elt -> (OrigName, OrigName) -> elt -> FiniteMap (OrigName, OrigName) elt + , (elt -> elt -> elt) -> FiniteMap FAST_STRING elt -> FAST_STRING -> elt -> FiniteMap FAST_STRING elt + IF_NCG(COMMA (elt -> elt -> elt) -> FiniteMap Reg elt -> Reg -> elt -> FiniteMap Reg elt) + #-} +{-# SPECIALIZE bagToFM + :: Bag (FAST_STRING,elt) -> FiniteMap FAST_STRING elt + #-} +{-# SPECIALIZE delListFromFM + :: FiniteMap RdrName elt -> [RdrName] -> FiniteMap RdrName elt + , FiniteMap OrigName elt -> [OrigName] -> FiniteMap OrigName elt + , FiniteMap FAST_STRING elt -> [FAST_STRING] -> FiniteMap FAST_STRING elt + IF_NCG(COMMA FiniteMap Reg elt -> [Reg] -> FiniteMap Reg elt) + #-} +{-# SPECIALIZE listToFM + :: [([Char],elt)] -> FiniteMap [Char] elt + , [(FAST_STRING,elt)] -> FiniteMap FAST_STRING elt + , [((FAST_STRING,FAST_STRING),elt)] -> FiniteMap (FAST_STRING, FAST_STRING) elt + , [(OrigName,elt)] -> FiniteMap OrigName elt + IF_NCG(COMMA [(Reg COMMA elt)] -> FiniteMap Reg elt) + #-} +{-# SPECIALIZE lookupFM + :: FiniteMap CLabel elt -> CLabel -> Maybe elt + , FiniteMap [Char] elt -> [Char] -> Maybe elt + , FiniteMap FAST_STRING elt -> FAST_STRING -> Maybe elt + , FiniteMap (FAST_STRING,FAST_STRING) elt -> (FAST_STRING,FAST_STRING) -> Maybe elt + , FiniteMap OrigName elt -> OrigName -> Maybe elt + , FiniteMap (OrigName,OrigName) elt -> (OrigName,OrigName) -> Maybe elt + , FiniteMap RdrName elt -> RdrName -> Maybe elt + , FiniteMap (RdrName,RdrName) elt -> (RdrName,RdrName) -> Maybe elt + IF_NCG(COMMA FiniteMap Reg elt -> Reg -> Maybe elt) + #-} +{-# SPECIALIZE lookupWithDefaultFM + :: FiniteMap FAST_STRING elt -> elt -> FAST_STRING -> elt + IF_NCG(COMMA FiniteMap Reg elt -> elt -> Reg -> elt) + #-} +{-# SPECIALIZE plusFM + :: FiniteMap RdrName elt -> FiniteMap RdrName elt -> FiniteMap RdrName elt + , FiniteMap OrigName elt -> FiniteMap OrigName elt -> FiniteMap OrigName elt + , FiniteMap FAST_STRING elt -> FiniteMap FAST_STRING elt -> FiniteMap FAST_STRING elt + IF_NCG(COMMA FiniteMap Reg elt -> FiniteMap Reg elt -> FiniteMap Reg elt) + #-} +{-# SPECIALIZE plusFM_C + :: (elt -> elt -> elt) -> FiniteMap FAST_STRING elt -> FiniteMap FAST_STRING elt -> FiniteMap FAST_STRING elt + IF_NCG(COMMA (elt -> elt -> elt) -> FiniteMap Reg elt -> FiniteMap Reg elt -> FiniteMap Reg elt) + #-} + +#endif {- compiling for GHC -} +\end{code} diff --git a/ghc/lib/misc/ListSetOps.lhs b/ghc/lib/misc/ListSetOps.lhs new file mode 100644 index 0000000..3917247 --- /dev/null +++ b/ghc/lib/misc/ListSetOps.lhs @@ -0,0 +1,81 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1995 +% +\section[ListSetOps]{Set-like operations on lists} + +\begin{code} +#ifdef COMPILING_GHC +#include "HsVersions.h" +#endif + +module ListSetOps ( + unionLists, + intersectLists, + minusList +#ifndef COMPILING_GHC + , disjointLists, intersectingLists +#endif + ) where + +#if defined(COMPILING_GHC) +IMP_Ubiq(){-uitous-} + +import Util ( isIn, isn'tIn ) +#endif +\end{code} + +\begin{code} +unionLists :: (Eq a) => [a] -> [a] -> [a] +unionLists [] [] = [] +unionLists [] b = b +unionLists a [] = a +unionLists (a:as) b + | a `is_elem` b = unionLists as b + | otherwise = a : unionLists as b + where +#if defined(COMPILING_GHC) + is_elem = isIn "unionLists" +#else + is_elem = elem +#endif + +intersectLists :: (Eq a) => [a] -> [a] -> [a] +intersectLists [] [] = [] +intersectLists [] b = [] +intersectLists a [] = [] +intersectLists (a:as) b + | a `is_elem` b = a : intersectLists as b + | otherwise = intersectLists as b + where +#if defined(COMPILING_GHC) + is_elem = isIn "intersectLists" +#else + is_elem = elem +#endif +\end{code} + +Everything in the first list that is not in the second list: +\begin{code} +minusList :: (Eq a) => [a] -> [a] -> [a] +minusList xs ys = [ x | x <- xs, x `not_elem` ys] + where +#if defined(COMPILING_GHC) + not_elem = isn'tIn "minusList" +#else + not_elem = notElem +#endif +\end{code} + +\begin{code} +#if ! defined(COMPILING_GHC) + +disjointLists, intersectingLists :: Eq a => [a] -> [a] -> Bool + +disjointLists [] bs = True +disjointLists (a:as) bs + | a `elem` bs = False + | otherwise = disjointLists as bs + +intersectingLists xs ys = not (disjointLists xs ys) +#endif +\end{code} diff --git a/ghc/lib/misc/Makefile b/ghc/lib/misc/Makefile new file mode 100644 index 0000000..b2bfe19 --- /dev/null +++ b/ghc/lib/misc/Makefile @@ -0,0 +1,73 @@ +# +# Makefile for hslibs subdir +# +TOP = ../.. +include $(TOP)/mk/boilerplate.mk + +WAYS=$(GhcLibWays) + +ifeq "$(way)" "" +SUBDIRS = cbits +else +SUBDIRS= +endif + +#----------------------------------------------------------------------------- +# Setting the standard variables +# + +LIBRARY = libHS$(_way).a +HS_SRCS = $(wildcard *.lhs) +HS_OBJS = $(HS_SRCS:.lhs=.$(way_)o) +LIBOBJS = $(HS_OBJS) +HS_IFACES= $(HS_SRCS:.lhs=.$(way_)hi) GHC.$(way_)hi + + +#----------------------------------------------------------------------------- +# Setting the GHC compile options + +SRC_HC_OPTS += -recomp -cpp -fglasgow-exts -fvia-C -Rghc-timing $(GhcLibHcOpts) + +# +# Profiling options +WAY_p_HC_OPTS += -GPrelude +WAY_mr_HC_OPTS += -GPrelude + +# +# Object and interface files have suffixes tagged with their ways +# +ifneq "$(way)" "" +SRC_HC_OPTS += -hisuf $(way_)hi +endif + +# +# Specific flags +# +BSD_HC_OPTS += -syslib posix -optc-DNON_POSIX_SOURCE +Socket_HC_OPTS += -I../std/cbits -optc-DNON_POSIX_SOURCE +SocketPrim_HC_OPTS += -I../std/cbits -H10m -syslib posix -optc-DNON_POSIX_SOURCE + +#----------------------------------------------------------------------------- +# Dependency generation + +SRC_MKDEPENDHS_OPTS += -syslib posix -I$(GHC_INCLUDE_DIR) + +#----------------------------------------------------------------------------- +# Installation; need to install .hi files as well as libraries +# +# The interface files are put inside the $(libdir), since they +# might (potentially) be platform specific.. +# +# override is used here because for binary distributions, datadir is +# set on the command line. sigh. +# +override datadir:=$(libdir)/imports/misc + +# +# Files to install from here +# +INSTALL_LIBS += $(LIBRARY) +INSTALL_DATAS += $(HS_IFACES) + +include $(TOP)/mk/target.mk + diff --git a/ghc/lib/misc/MatchPS.lhs b/ghc/lib/misc/MatchPS.lhs new file mode 100644 index 0000000..761f0a0 --- /dev/null +++ b/ghc/lib/misc/MatchPS.lhs @@ -0,0 +1,484 @@ +\section[match]{PackedString functions for matching} + +This module provides regular expression matching and substitution +at the PackedString level. It is built on top of the GNU Regex +library modified to handle perl regular expression syntax. +For a complete description of the perl syntax, do `man perlre` +or have a gander in (Programming|Learning) Perl. Here's +a short summary: + +^ matches the beginning of line +$ matches end of line +\b matches word boundary +\B matches non-word boundary +\w matches a word(alpha-numeric) character +\W matches a non-word character +\d matches a digit +\D matches a non-digit +\s matches whitespace +\S matches non-whitespace +\A matches beginning of buffer +\Z matches end-of-buffer +. matches any (bar newline in single-line mode) ++ matches 1 or more times +* matches 0 or more times +? matches 0 or 1 +{n,m} matches >=n and <=m atoms +{n,} matches at least n times +{n} matches n times +[..] matches any character member of char class. +(..) if pattern inside parens match, then the ith group is bound + to the matched string +\digit matches whatever the ith group matched. + +Backslashed letters +\n newline +\r carriage return +\t tab +\f formfeed +\v vertical tab +\a alarm bell +\e escape + + +\begin{code} +module MatchPS + + ( + matchPS, + searchPS, + substPS, + replacePS, + + match2PS, + search2PS, + + getMatchesNo, + getMatchedGroup, + getWholeMatch, + getLastMatch, + getAfterMatch, + + findPS, + rfindPS, + chopPS, + + matchPrefixPS, + + REmatch(..) + ) where + +import GlaExts +import PackedString + +import Array ((!), bounds) +import Char ( isDigit, ord ) +import PrelBase ( Char(..) ) + +import Regex + +\end{code} + +\subsection[ps-matching]{PackedString matching} + +Posix matching, returning an array of the the intervals that +the individual groups matched within the string. + +\begin{code} + +matchPS :: PackedString -- reg. exp + -> PackedString -- string to match + -> [Char] -- flags + -> Maybe REmatch +matchPS reg str flags + = let + insensitive = 'i' `elem` flags + mode = 's' `elem` flags + in + unsafePerformIO (do + pat <- re_compile_pattern reg mode insensitive + re_match pat str 0 True) + + +match2PS :: PackedString -- reg. exp + -> PackedString -- string1 to match + -> PackedString -- string2 to match + -> [Char] -- flags + -> Maybe REmatch +match2PS reg str1 str2 flags + = let + insensitive = 'i' `elem` flags + mode = 's' `elem` flags + len1 = lengthPS str1 + len2 = lengthPS str2 + in + unsafePerformIO (do + pat <- re_compile_pattern reg mode insensitive + re_match2 pat str1 str2 0 (len1+len2) True) + +\end{code} + +PackedString front-end to searching with GNU Regex + +\begin{code} + +searchPS :: PackedString -- reg. exp + -> PackedString -- string to match + -> [Char] -- flags + -> Maybe REmatch +searchPS reg str flags + = let + insensitive = 'i' `elem` flags + mode = 's' `elem` flags + in + unsafePerformIO (do + pat <- re_compile_pattern reg mode insensitive + re_search pat str + 0 + (lengthPS str) + True) + + + +search2PS :: PackedString -- reg. exp + -> PackedString -- string to match + -> PackedString -- string to match + -> [Char] -- flags + -> Maybe REmatch +search2PS reg str1 str2 flags + = let + insensitive = 'i' `elem` flags + mode = 's' `elem` flags + len1 = lengthPS str1 + len2 = lengthPS str2 + len = len1+len2 + in + unsafePerformIO (do + pat <- re_compile_pattern reg mode insensitive + re_search2 pat + str1 + str2 + 0 + len + len + True) + + + +\end{code} + +@substrPS s st end@ cuts out the chunk in \tr{s} between \tr{st} and \tr{end}, inclusive. +The \tr{Regex} registers represent substrings by storing the start and the end point plus +one( st==end => empty string) , so we use @chunkPS@ instead. + + +\begin{code} + +chunkPS :: PackedString + -> (Int,Int) + -> PackedString +chunkPS str (st,end) + = if st==end then + nilPS + else + substrPS str st (max 0 (end-1)) + +\end{code} + +Perl-like match and substitute + +\begin{code} + +substPS :: PackedString -- reg. exp + -> PackedString -- replacement + -> [Char] -- flags + -> PackedString -- string + -> PackedString +substPS rexp + repl + flags + str + = search str + where + global = 'g' `elem` flags + case_insensitive = 'i' `elem` flags + mode = 's' `elem` flags -- single-line mode + pat = unsafePerformIO ( + re_compile_pattern rexp mode case_insensitive) + + search str + = let + search_res + = unsafePerformIO (re_search pat str 0 (lengthPS str) True) + in + case search_res of + Nothing -> str + Just matcher@(REmatch arr before match after lst) -> + let + (st,en) = match + prefix = chunkPS str before + suffix + = if global && (st /= en) then + search (dropPS en str) + else + chunkPS str after + in + concatPS [prefix, + replace matcher repl str, + suffix] + + +replace :: REmatch + -> PackedString + -> PackedString + -> PackedString +replace (REmatch arr before@(_,b_end) match after lst) + replacement + str + = concatPS (reverse acc) -- ToDo: write a `reversed' version of concatPS + where + (_,b) = bounds arr + + acc = replace' [] replacement False + + single :: Char -> PackedString + single x = consPS x nilPS + + replace' :: [PackedString] + -> PackedString + -> Bool + -> [PackedString] + replace' acc repl escaped + = if (nullPS repl) then + acc + else + let + x = headPS repl + x# = case x of { C# c -> c } + xs = tailPS repl + in + case x# of + '\\'# -> + if escaped then + replace' acc xs True + else + replace' ((single x):acc) xs (not escaped) + '$'# -> + if (not escaped) then + let + x' = headPS xs + xs' = tailPS xs + ith_ival = arr!num + (num,xs_num) = getNumber ((ord x') - ord '0') xs' + in + if (isDigit x') && (num<=b) then + replace' ((chunkPS str ith_ival):acc) xs_num escaped + else if x' == '&' then + replace' ((chunkPS str match):acc) xs' escaped + else if x' == '+' then + replace' ((chunkPS str lst):acc) xs' escaped + else if x' == '`' then + replace' ((chunkPS str (0,b_end)):acc) xs' escaped + else if x' == '\'' then + replace' ((chunkPS str after):acc) xs' escaped + else -- ignore + replace' acc xs escaped + else + replace' ((single x):acc) xs False + + _ -> if escaped then + (case x# of + 'n'# -> -- newline + replace' ((single '\n'):acc) + 'f'# -> -- formfeed + replace' ((single '\f'):acc) + 'r'# -> -- carriage return + replace' ((single '\r'):acc) + 't'# -> -- (horiz) tab + replace' ((single '\t'):acc) + 'v'# -> -- vertical tab + replace' ((single '\v'):acc) + 'a'# -> -- alarm bell + replace' ((single '\a'):acc) + 'e'# -> -- escape + replace' ((single '\033'):acc) + _ -> + replace' ((single x):acc)) xs False + else + replace' ((single x):acc) xs False + + +getNumber :: Int -> PackedString -> (Int,PackedString) +getNumber acc ps + = if nullPS ps then + (acc,ps) + else + let + x = headPS ps + xs = tailPS ps + in + if (isDigit x) then + getNumber (acc*10+(ord x - ord '0')) xs + else + (acc,ps) + +\end{code} + +Just like substPS, but no prefix and suffix. + +\begin{code} + +replacePS :: PackedString -- reg. exp + -> PackedString -- replacement + -> [Char] -- flags + -> PackedString -- string + -> PackedString +replacePS rexp + repl + flags + str + = search str + where + global = 'g' `elem` flags + case_insensitive = 'i' `elem` flags + mode = 's' `elem` flags -- single-line mode + pat = unsafePerformIO ( + re_compile_pattern rexp mode case_insensitive) + + search str + = let + search_res + = unsafePerformIO (re_search pat str 0 (lengthPS str) True) + in + case search_res of + Nothing -> str + Just matcher@(REmatch arr before match after lst) -> + replace matcher repl str + +\end{code} + +Picking matched groups out of string + +\begin{code} + +getMatchesNo :: REmatch + -> Int +getMatchesNo (REmatch arr _ _ _ _) + = snd (bounds arr) + +getMatchedGroup :: REmatch + -> Int + -> PackedString + -> PackedString +getMatchedGroup (REmatch arr bef mtch after lst) nth str + = let + (1,grps) = bounds arr + in + if (nth >= 1) && (nth <= grps) then + chunkPS str (arr!nth) + else + error "getMatchedGroup: group out of range" + +getWholeMatch :: REmatch + -> PackedString + -> PackedString +getWholeMatch (REmatch _ _ mtch _ _) str + = chunkPS str mtch + +getLastMatch :: REmatch + -> PackedString + -> PackedString +getLastMatch (REmatch _ _ _ _ lst) str + = chunkPS str lst + +getAfterMatch :: REmatch + -> PackedString + -> PackedString +getAfterMatch (REmatch _ _ _ aft _) str + = chunkPS str aft + +\end{code} + + +More or less straight translation of a brute-force string matching +function written in C. (Sedgewick ch. 18) + +This is intended to provide much the same facilities as index/rindex in perl. + +\begin{code} + + +findPS :: PackedString + -> PackedString + -> Maybe Int +findPS str substr + = let + m = lengthPS substr + n = lengthPS str + + loop i j + | j>=m || i>=n = if j==m then (Just (i-m)) else Nothing + | otherwise + = inner_loop i j + + inner_loop i j + = if j PackedString + -> Maybe Int +rfindPS str substr + = let + m = lengthPS substr - 1 + n = lengthPS str - 1 + + loop i j + | j<0 || i<0 = if j<0 then (Just (i+1)) else Nothing + | otherwise + = inner_loop i j + + inner_loop i j + = if j>=0 && i>=0 && (indexPS str i /= indexPS substr j) then + inner_loop (i+(m-j)-1) m + else + loop (i-1) (j-1) + in + loop n m + + +\end{code} + +\begin{code} + +chopPS :: PackedString -> PackedString +chopPS str = if nullPS str then + nilPS + else + chunkPS str (0,lengthPS str-1) + +\end{code} + +Tries to match as much as possible of strA starting from the beginning of strB +(handy when matching fancy literals in parsers) + +\begin{code} +matchPrefixPS :: PackedString + -> PackedString + -> Int +matchPrefixPS pref str + = matchPrefixPS' pref str 0 + where + matchPrefixPS' pref str n + = if (nullPS pref) || (nullPS str) then + n + else if (headPS pref) == (headPS str) then + matchPrefixPS' (tailPS pref) (tailPS str) (n+1) + else + n + +\end{code} diff --git a/ghc/lib/misc/Maybes.lhs b/ghc/lib/misc/Maybes.lhs new file mode 100644 index 0000000..1f17679 --- /dev/null +++ b/ghc/lib/misc/Maybes.lhs @@ -0,0 +1,233 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1996 +% +\section[Maybes]{The `Maybe' types and associated utility functions} + +\begin{code} +#if defined(COMPILING_GHC) +#include "HsVersions.h" +#endif + +module Maybes ( +-- Maybe(..), -- no, it's in 1.3 + MaybeErr(..), + + allMaybes, + firstJust, + expectJust, + maybeToBool, + + assocMaybe, + mkLookupFun, mkLookupFunDef, + + failMaB, + failMaybe, + seqMaybe, + returnMaB, + returnMaybe, + thenMaB + +#if defined(COMPILING_GHC) + , catMaybes +#else + , findJust + , foldlMaybeErrs + , listMaybeErrs +#endif + ) where + +#if defined(COMPILING_GHC) + +CHK_Ubiq() -- debugging consistency check + +import Unique (Unique) -- only for specialising + +#else +import Maybe -- renamer will tell us if there are any conflicts +#endif +\end{code} + + +%************************************************************************ +%* * +\subsection[Maybe type]{The @Maybe@ type} +%* * +%************************************************************************ + +\begin{code} +maybeToBool :: Maybe a -> Bool +maybeToBool Nothing = False +maybeToBool (Just x) = True +\end{code} + +@catMaybes@ takes a list of @Maybe@s and returns a list of +the contents of all the @Just@s in it. @allMaybes@ collects +a list of @Justs@ into a single @Just@, returning @Nothing@ if there +are any @Nothings@. + +\begin{code} +#ifdef COMPILING_GHC +catMaybes :: [Maybe a] -> [a] +catMaybes [] = [] +catMaybes (Nothing : xs) = catMaybes xs +catMaybes (Just x : xs) = (x : catMaybes xs) +#endif + +allMaybes :: [Maybe a] -> Maybe [a] +allMaybes [] = Just [] +allMaybes (Nothing : ms) = Nothing +allMaybes (Just x : ms) = case (allMaybes ms) of + Nothing -> Nothing + Just xs -> Just (x:xs) +\end{code} + +@firstJust@ takes a list of @Maybes@ and returns the +first @Just@ if there is one, or @Nothing@ otherwise. + +\begin{code} +firstJust :: [Maybe a] -> Maybe a +firstJust [] = Nothing +firstJust (Just x : ms) = Just x +firstJust (Nothing : ms) = firstJust ms +\end{code} + +\begin{code} +findJust :: (a -> Maybe b) -> [a] -> Maybe b +findJust f [] = Nothing +findJust f (a:as) = case f a of + Nothing -> findJust f as + b -> b +\end{code} + +\begin{code} +expectJust :: String -> Maybe a -> a +{-# INLINE expectJust #-} +expectJust err (Just x) = x +expectJust err Nothing = error ("expectJust " ++ err) +\end{code} + +The Maybe monad +~~~~~~~~~~~~~~~ +\begin{code} +seqMaybe :: Maybe a -> Maybe a -> Maybe a +seqMaybe (Just x) _ = Just x +seqMaybe Nothing my = my + +returnMaybe :: a -> Maybe a +returnMaybe = Just + +failMaybe :: Maybe a +failMaybe = Nothing +\end{code} + +Lookup functions +~~~~~~~~~~~~~~~~ + +@assocMaybe@ looks up in an assocation list, returning +@Nothing@ if it fails. + +\begin{code} +assocMaybe :: (Eq a) => [(a,b)] -> a -> Maybe b + +assocMaybe alist key + = lookup alist + where + lookup [] = Nothing + lookup ((tv,ty):rest) = if key == tv then Just ty else lookup rest + +#if defined(COMPILING_GHC) +{-# SPECIALIZE assocMaybe + :: [(FAST_STRING, b)] -> FAST_STRING -> Maybe b + , [(Int, b)] -> Int -> Maybe b + , [(Unique, b)] -> Unique -> Maybe b + , [(RdrName, b)] -> RdrName -> Maybe b + #-} +#endif +\end{code} + +@mkLookupFun eq alist@ is a function which looks up +its argument in the association list @alist@, returning a Maybe type. +@mkLookupFunDef@ is similar except that it is given a value to return +on failure. + +\begin{code} +mkLookupFun :: (key -> key -> Bool) -- Equality predicate + -> [(key,val)] -- The assoc list + -> key -- The key + -> Maybe val -- The corresponding value + +mkLookupFun eq alist s + = case [a | (s',a) <- alist, s' `eq` s] of + [] -> Nothing + (a:_) -> Just a + +mkLookupFunDef :: (key -> key -> Bool) -- Equality predicate + -> [(key,val)] -- The assoc list + -> val -- Value to return on failure + -> key -- The key + -> val -- The corresponding value + +mkLookupFunDef eq alist deflt s + = case [a | (s',a) <- alist, s' `eq` s] of + [] -> deflt + (a:_) -> a +\end{code} + +%************************************************************************ +%* * +\subsection[MaybeErr type]{The @MaybeErr@ type} +%* * +%************************************************************************ + +\begin{code} +data MaybeErr val err = Succeeded val | Failed err +\end{code} + +\begin{code} +thenMaB :: MaybeErr val1 err -> (val1 -> MaybeErr val2 err) -> MaybeErr val2 err +thenMaB m k + = case m of + Succeeded v -> k v + Failed e -> Failed e + +returnMaB :: val -> MaybeErr val err +returnMaB v = Succeeded v + +failMaB :: err -> MaybeErr val err +failMaB e = Failed e +\end{code} + + +@listMaybeErrs@ takes a list of @MaybeErrs@ and, if they all succeed, returns +a @Succeeded@ of a list of their values. If any fail, it returns a +@Failed@ of the list of all the errors in the list. + +\begin{code} +listMaybeErrs :: [MaybeErr val err] -> MaybeErr [val] [err] +listMaybeErrs + = foldr combine (Succeeded []) + where + combine (Succeeded v) (Succeeded vs) = Succeeded (v:vs) + combine (Failed err) (Succeeded _) = Failed [err] + combine (Succeeded v) (Failed errs) = Failed errs + combine (Failed err) (Failed errs) = Failed (err:errs) +\end{code} + +@foldlMaybeErrs@ works along a list, carrying an accumulator; it +applies the given function to the accumulator and the next list item, +accumulating any errors that occur. + +\begin{code} +foldlMaybeErrs :: (acc -> input -> MaybeErr acc err) + -> acc + -> [input] + -> MaybeErr acc [err] + +foldlMaybeErrs k accum ins = do_it [] accum ins + where + do_it [] acc [] = Succeeded acc + do_it errs acc [] = Failed errs + do_it errs acc (v:vs) = case (k acc v) of + Succeeded acc' -> do_it errs acc' vs + Failed err -> do_it (err:errs) acc vs +\end{code} diff --git a/ghc/lib/misc/PackedString.lhs b/ghc/lib/misc/PackedString.lhs new file mode 100644 index 0000000..59a552f --- /dev/null +++ b/ghc/lib/misc/PackedString.lhs @@ -0,0 +1,1087 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1993-1996 +% +\section{Packed strings} + +This sits on top of the sequencing/arrays world, notably @ByteArray#@s. + +Glorious hacking (all the hard work) by Bryan O'Sullivan. + +\begin{code} +module PackedString ( + PackedString, -- abstract + + -- Creating the beasts + packString, -- :: [Char] -> PackedString + packStringST, -- :: [Char] -> ST s PackedString + packCBytesST, -- :: Int -> Addr -> ST s PackedString + + byteArrayToPS, -- :: ByteArray Int -> PackedString + unsafeByteArrayToPS, -- :: ByteArray a -> Int -> PackedString + + psToByteArray, -- :: PackedString -> ByteArray Int + psToByteArrayST, -- :: PackedString -> ST s (ByteArray Int) + + unpackPS, -- :: PackedString -> [Char] +{-LATER: + hPutPS, -- :: Handle -> PackedString -> IO () + putPS, -- :: FILE -> PackedString -> PrimIO () -- ToDo: more sensible type + getPS, -- :: FILE -> Int -> PrimIO PackedString +-} + nilPS, -- :: PackedString + consPS, -- :: Char -> PackedString -> PackedString + headPS, -- :: PackedString -> Char + tailPS, -- :: PackedString -> PackedString + nullPS, -- :: PackedString -> Bool + appendPS, -- :: PackedString -> PackedString -> PackedString + lengthPS, -- :: PackedString -> Int + {- 0-origin indexing into the string -} + indexPS, -- :: PackedString -> Int -> Char + mapPS, -- :: (Char -> Char) -> PackedString -> PackedString + filterPS, -- :: (Char -> Bool) -> PackedString -> PackedString + foldlPS, -- :: (a -> Char -> a) -> a -> PackedString -> a + foldrPS, -- :: (Char -> a -> a) -> a -> PackedString -> a + takePS, -- :: Int -> PackedString -> PackedString + dropPS, -- :: Int -> PackedString -> PackedString + splitAtPS, -- :: Int -> PackedString -> (PackedString, PackedString) + takeWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString + dropWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString + spanPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) + breakPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) + linesPS, -- :: PackedString -> [PackedString] + + wordsPS, -- :: PackedString -> [PackedString] + reversePS, -- :: PackedString -> PackedString + splitPS, -- :: Char -> PackedString -> [PackedString] + splitWithPS, -- :: (Char -> Bool) -> PackedString -> [PackedString] + joinPS, -- :: PackedString -> [PackedString] -> PackedString + concatPS, -- :: [PackedString] -> PackedString + elemPS, -- :: Char -> PackedString -> Bool + + {- + Pluck out a piece of a PS start and end + chars you want; both 0-origin-specified + -} + substrPS, -- :: PackedString -> Int -> Int -> PackedString + + comparePS, + + -- Converting to C strings + packCString#, + unpackCString#, unpackCString2#, unpackAppendCString#, unpackFoldrCString#, + unpackCString + ) where + +import GlaExts +import PrelBase ( showList__ ) -- ToDo: better +import Addr + +import PrelArr ( StateAndMutableByteArray#(..) , StateAndByteArray#(..) ) +import PrelST +import ST +import IOExts ( unsafePerformIO ) + +import Ix +import Char (isSpace) + +\end{code} + +%************************************************************************ +%* * +\subsection{@PackedString@ type declaration} +%* * +%************************************************************************ + +\begin{code} +data PackedString + = PS ByteArray# -- the bytes + Int# -- length (*not* including NUL at the end) + Bool -- True <=> contains a NUL + | CPS Addr# -- pointer to the (null-terminated) bytes in C land + Int# -- length, as per strlen + -- definitely doesn't contain a NUL + +instance Eq PackedString where + x == y = compare x y == EQ + x /= y = compare x y /= EQ + +instance Ord PackedString where + compare = comparePS + x <= y = compare x y /= GT + x < y = compare x y == LT + x >= y = compare x y /= LT + x > y = compare x y == GT + max x y = case (compare x y) of { LT -> y ; EQ -> x ; GT -> x } + min x y = case (compare x y) of { LT -> x ; EQ -> x ; GT -> y } + +--instance Read PackedString: ToDo + +instance Show PackedString where + showsPrec p ps r = showsPrec p (unpackPS ps) r + showList = showList__ (showsPrec 0) +\end{code} + + +%************************************************************************ +%* * +\subsection{@PackedString@ instances} +%* * +%************************************************************************ + +We try hard to make this go fast: +\begin{code} +comparePS :: PackedString -> PackedString -> Ordering + +comparePS (PS bs1 len1 has_null1) (PS bs2 len2 has_null2) + | not has_null1 && not has_null2 + = unsafePerformIO ( + _ccall_ strcmp ba1 ba2 >>= \ (I# res) -> + return ( + if res <# 0# then LT + else if res ==# 0# then EQ + else GT + )) + where + ba1 = ByteArray (0, I# (len1 -# 1#)) bs1 + ba2 = ByteArray (0, I# (len2 -# 1#)) bs2 + +comparePS (PS bs1 len1 has_null1) (CPS bs2 len2) + | not has_null1 + = unsafePerformIO ( + _ccall_ strcmp ba1 ba2 >>= \ (I# res) -> + return ( + if res <# 0# then LT + else if res ==# 0# then EQ + else GT + )) + where + ba1 = ByteArray (0, I# (len1 -# 1#)) bs1 + ba2 = A# bs2 + +comparePS (CPS bs1 len1) (CPS bs2 len2) + = unsafePerformIO ( + _ccall_ strcmp ba1 ba2 >>= \ (I# res) -> + return ( + if res <# 0# then LT + else if res ==# 0# then EQ + else GT + )) + where + ba1 = A# bs1 + ba2 = A# bs2 + +comparePS a@(CPS _ _) b@(PS _ _ has_null2) + | not has_null2 + = -- try them the other way 'round + case (comparePS b a) of { LT -> GT; EQ -> EQ; GT -> LT } + +comparePS ps1 ps2 -- slow catch-all case (esp for "has_null" True) + = looking_at 0# + where + end1 = lengthPS# ps1 -# 1# + end2 = lengthPS# ps2 -# 1# + + looking_at char# + = if char# ># end1 then + if char# ># end2 then -- both strings ran out at once + EQ + else -- ps1 ran out before ps2 + LT + else if char# ># end2 then + GT -- ps2 ran out before ps1 + else + let + ch1 = indexPS# ps1 char# + ch2 = indexPS# ps2 char# + in + if ch1 `eqChar#` ch2 then + looking_at (char# +# 1#) + else if ch1 `ltChar#` ch2 then LT + else GT +\end{code} + + +%************************************************************************ +%* * +\subsection{Constructor functions} +%* * +%************************************************************************ + +Easy ones first. @packString@ requires getting some heap-bytes and +scribbling stuff into them. + +\begin{code} +nilPS :: PackedString +nilPS = CPS ""# 0# + +consPS :: Char -> PackedString -> PackedString +consPS c cs = packString (c : (unpackPS cs)) -- ToDo:better + +packString :: [Char] -> PackedString +packString str = runST (packStringST str) + +packStringST :: [Char] -> ST s PackedString +packStringST str = + let len = length str in + packNCharsST len str + +packNCharsST :: Int -> [Char] -> ST s PackedString +packNCharsST len@(I# length#) str = + {- + allocate an array that will hold the string + (not forgetting the NUL byte at the end) + -} + new_ps_array (length# +# 1#) >>= \ ch_array -> + -- fill in packed string from "str" + fill_in ch_array 0# str >> + -- freeze the puppy: + freeze_ps_array ch_array >>= \ (ByteArray _ frozen#) -> + let has_null = byteArrayHasNUL# frozen# length# in + return (PS frozen# length# has_null) + where + fill_in :: MutableByteArray s Int -> Int# -> [Char] -> ST s () + fill_in arr_in# idx [] = + write_ps_array arr_in# idx (chr# 0#) >> + return () + + fill_in arr_in# idx (C# c : cs) = + write_ps_array arr_in# idx c >> + fill_in arr_in# (idx +# 1#) cs + +byteArrayToPS :: ByteArray Int -> PackedString +byteArrayToPS (ByteArray ixs@(_, ix_end) frozen#) = + let + n# = + case ( + if null (range ixs) + then 0 + else ((index ixs ix_end) + 1) + ) of { I# x -> x } + in + PS frozen# n# (byteArrayHasNUL# frozen# n#) + +unsafeByteArrayToPS :: ByteArray a -> Int -> PackedString +unsafeByteArrayToPS (ByteArray _ frozen#) (I# n#) + = PS frozen# n# (byteArrayHasNUL# frozen# n#) + +psToByteArray :: PackedString -> ByteArray Int +psToByteArray (PS bytes n has_null) + = ByteArray (0, I# (n -# 1#)) bytes + +psToByteArray (CPS addr len#) + = let + len = I# len# + byte_array_form = packCBytes len (A# addr) + in + case byte_array_form of { PS bytes _ _ -> + ByteArray (0, len - 1) bytes } +\end{code} + +%************************************************************************ +%* * +\subsection{Destructor functions (taking @PackedStrings@ apart)} +%* * +%************************************************************************ + +\begin{code} +-- OK, but this code gets *hammered*: +-- unpackPS ps +-- = [ indexPS ps n | n <- [ 0::Int .. lengthPS ps - 1 ] ] + +unpackPS :: PackedString -> [Char] +unpackPS (PS bytes len has_null) + = unpack 0# + where + unpack nh + | nh >=# len = [] + | otherwise = C# ch : unpack (nh +# 1#) + where + ch = indexCharArray# bytes nh + +unpackPS (CPS addr len) + = unpack 0# + where + unpack nh + | ch `eqChar#` '\0'# = [] + | otherwise = C# ch : unpack (nh +# 1#) + where + ch = indexCharOffAddr# addr nh +\end{code} + +Output a packed string via a handle: + +\begin{code} +{- LATER: +hPutPS :: Handle -> PackedString -> IO () +hPutPS handle ps = + let + len = + case ps of + PS _ len _ -> len + CPS _ len -> len + in + if len ==# 0# then + return () + else + _readHandle handle >>= \ htype -> + case htype of + _ErrorHandle ioError -> + _writeHandle handle htype >> + failWith ioError + _ClosedHandle -> + _writeHandle handle htype >> + failWith (IllegalOperation "handle is closed") + _SemiClosedHandle _ _ -> + _writeHandle handle htype >> + failWith (IllegalOperation "handle is closed") + _ReadHandle _ _ _ -> + _writeHandle handle htype >> + failWith (IllegalOperation "handle is not open for writing") + other -> + _getBufferMode other >>= \ other -> + (case _bufferMode other of + Just LineBuffering -> + writeLines (_filePtr other) + Just (BlockBuffering (Just size)) -> + writeBlocks (_filePtr other) size + Just (BlockBuffering Nothing) -> + writeBlocks (_filePtr other) ``BUFSIZ'' + _ -> -- Nothing is treated pessimistically as NoBuffering + writeChars (_filePtr other) 0# + ) >>= \ success -> + _writeHandle handle (_markHandle other) >> + if success then + return () + else + _constructError "hPutStr" >>= \ ioError -> + failWith ioError + + where + pslen = lengthPS# ps + + writeLines :: Addr -> IO Bool + writeLines = writeChunks ``BUFSIZ'' True + + writeBlocks :: Addr -> Int -> IO Bool + writeBlocks fp size = writeChunks size False fp + + {- + The breaking up of output into lines along \n boundaries + works fine as long as there are newlines to split by. + Avoid the splitting up into lines altogether (doesn't work + for overly long lines like the stuff that showsPrec instances + normally return). Instead, we split them up into fixed size + chunks before blasting them off to the Real World. + + Hacked to avoid multiple passes over the strings - unsightly, but + a whole lot quicker. -- SOF 3/96 + -} + + writeChunks :: Int -> Bool -> Addr -> IO Bool + writeChunks (I# bufLen) chopOnNewLine fp = + newCharArray (0,I# bufLen) >>= \ arr@(MutableByteArray _ arr#) -> + let + shoveString :: Int# -> Int# -> IO Bool + shoveString n i + | i ==# pslen = -- end of string + if n ==# 0# then + return True + else + _ccall_ writeFile arr fp (I# n) >>= \rc -> + return (rc==0) + | otherwise = + (\ (S# s#) -> + case writeCharArray# arr# n (indexPS# ps i) s# of + s1# -> + {- Flushing lines - should we bother? -} + (if n ==# bufLen then + _ccall_ writeFile arr fp (I# (n +# 1#)) >>= \rc -> + if rc == 0 then + shoveString 0# (i +# 1#) + else + return False + else + shoveString (n +# 1#) (i +# 1#)) (S# s1#)) + in + shoveString 0# 0# + + writeChars :: Addr -> Int# -> IO Bool + writeChars fp i + | i ==# pslen = return True + | otherwise = + _ccall_ filePutc fp (ord (C# (indexPS# ps i))) >>= \ rc -> + if rc == 0 then + writeChars fp (i +# 1#) + else + return False + +--------------------------------------------- + +putPS :: _FILE -> PackedString -> IO () +putPS file ps@(PS bytes len has_null) + | len ==# 0# + = return () + | otherwise + = let + byte_array = ByteArray (0, I# (len -# 1#)) bytes + in + _ccall_ fwrite byte_array (1::Int){-size-} (I# len) file + >>= \ (I# written) -> + if written ==# len then + return () + else + error "putPS: fwrite failed!\n" + +putPS file (CPS addr len) + | len ==# 0# + = return () + | otherwise + = _ccall_ fputs (A# addr) file >>= \ (I# _){-force type-} -> + return () +\end{code} + +The dual to @_putPS@, note that the size of the chunk specified +is the upper bound of the size of the chunk returned. + +\begin{code} +getPS :: _FILE -> Int -> IO PackedString +getPS file len@(I# len#) + | len# <=# 0# = return nilPS -- I'm being kind here. + | otherwise = + -- Allocate an array for system call to store its bytes into. + new_ps_array len# >>= \ ch_arr -> + freeze_ps_array ch_arr >>= \ (ByteArray _ frozen#) -> + let + byte_array = ByteArray (0, I# len#) frozen# + in + _ccall_ fread byte_array (1::Int) len file >>= \ (I# read#) -> + if read# ==# 0# then -- EOF or other error + error "getPS: EOF reached or other error" + else + {- + The system call may not return the number of + bytes requested. Instead of failing with an error + if the number of bytes read is less than requested, + a packed string containing the bytes we did manage + to snarf is returned. + -} + let + has_null = byteArrayHasNUL# frozen# read# + in + return (PS frozen# read# has_null) +END LATER -} +\end{code} + +%************************************************************************ +%* * +\subsection{List-mimicking functions for @PackedStrings@} +%* * +%************************************************************************ + +First, the basic functions that do look into the representation; +@indexPS@ is the most important one. + +\begin{code} +lengthPS :: PackedString -> Int +lengthPS ps = I# (lengthPS# ps) + +{-# INLINE lengthPS# #-} + +lengthPS# (PS _ i _) = i +lengthPS# (CPS _ i) = i + +{-# INLINE strlen# #-} + +strlen# :: Addr# -> Int +strlen# a + = unsafePerformIO ( + _ccall_ strlen (A# a) >>= \ len@(I# _) -> + return len + ) + +byteArrayHasNUL# :: ByteArray# -> Int#{-length-} -> Bool +byteArrayHasNUL# bs len + = unsafePerformIO ( + _ccall_ byteArrayHasNUL__ ba (I# len) >>= \ (I# res) -> + return ( + if res ==# 0# then False else True + )) + where + ba = ByteArray (0, I# (len -# 1#)) bs + +----------------------- + +indexPS :: PackedString -> Int -> Char +indexPS ps (I# n) = C# (indexPS# ps n) + +{-# INLINE indexPS# #-} + +indexPS# (PS bs i _) n + = --ASSERT (n >=# 0# && n <# i) -- error checking: my eye! (WDP 94/10) + indexCharArray# bs n + +indexPS# (CPS a _) n + = indexCharOffAddr# a n +\end{code} + +Now, the rest of the functions can be defined without digging +around in the representation. + +\begin{code} +headPS :: PackedString -> Char +headPS ps + | nullPS ps = error "headPS: head []" + | otherwise = C# (indexPS# ps 0#) + +tailPS :: PackedString -> PackedString +tailPS ps + | len <=# 0# = error "tailPS: tail []" + | len ==# 1# = nilPS + | otherwise = substrPS# ps 1# (len -# 1#) + where + len = lengthPS# ps + +nullPS :: PackedString -> Bool +nullPS (PS _ i _) = i ==# 0# +nullPS (CPS _ i) = i ==# 0# + +{- (ToDo: some non-lousy implementations...) + + Old : _appendPS xs ys = packString (unpackPS xs ++ unpackPS ys) + +-} +appendPS :: PackedString -> PackedString -> PackedString +appendPS xs ys + | nullPS xs = ys + | nullPS ys = xs + | otherwise = concatPS [xs,ys] + +{- OLD: mapPS f xs = packString (map f (unpackPS xs)) -} + +mapPS :: (Char -> Char) -> PackedString -> PackedString {-or String?-} +mapPS f xs = + if nullPS xs then + xs + else + runST ( + new_ps_array (length +# 1#) >>= \ ps_arr -> + whizz ps_arr length 0# >> + freeze_ps_array ps_arr >>= \ (ByteArray _ frozen#) -> + let has_null = byteArrayHasNUL# frozen# length in + return (PS frozen# length has_null)) + where + length = lengthPS# xs + + whizz :: MutableByteArray s Int -> Int# -> Int# -> ST s () + whizz arr# n i + | n ==# 0# + = write_ps_array arr# i (chr# 0#) >> + return () + | otherwise + = let + ch = indexPS# xs i + in + write_ps_array arr# i (case f (C# ch) of { (C# x) -> x}) >> + whizz arr# (n -# 1#) (i +# 1#) + +filterPS :: (Char -> Bool) -> PackedString -> PackedString {-or String?-} +filterPS pred ps = + if nullPS ps then + ps + else + {- + Filtering proceeds as follows: + + * traverse the list, applying the pred. to each element, + remembering the positions where it was satisfied. + + Encode these positions using a run-length encoding of the gaps + between the matching positions. + + * Allocate a MutableByteArray in the heap big enough to hold + all the matched entries, and copy the elements that matched over. + + A better solution that merges the scan© passes into one, + would be to copy the filtered elements over into a growable + buffer. No such operation currently supported over + MutableByteArrays (could of course use malloc&realloc) + But, this solution may in the case of repeated realloc's + be worse than the current solution. + -} + runST ( + let + (rle,len_filtered) = filter_ps (len# -# 1#) 0# 0# [] + len_filtered# = case len_filtered of { I# x# -> x#} + in + if len# ==# len_filtered# then + {- not much filtering as everything passed through. -} + return ps + else if len_filtered# ==# 0# then + return nilPS + else + new_ps_array (len_filtered# +# 1#) >>= \ ps_arr -> + copy_arr ps_arr rle 0# 0# >> + freeze_ps_array ps_arr >>= \ (ByteArray _ frozen#) -> + let has_null = byteArrayHasNUL# frozen# len_filtered# in + return (PS frozen# len_filtered# has_null)) + where + len# = lengthPS# ps + + matchOffset :: Int# -> [Char] -> (Int,[Char]) + matchOffset off [] = (I# off,[]) + matchOffset off (C# c:cs) = + let + x = ord# c + off' = off +# x + in + if x==# 0# then -- escape code, add 255# + matchOffset off' cs + else + (I# off', cs) + + copy_arr :: MutableByteArray s Int -> [Char] -> Int# -> Int# -> ST s () + copy_arr arr# [_] _ _ = return () + copy_arr arr# ls n i = + let + (x,ls') = matchOffset 0# ls + n' = n +# (case x of { (I# x#) -> x#}) -# 1# + ch = indexPS# ps n' + in + write_ps_array arr# i ch >> + copy_arr arr# ls' (n' +# 1#) (i +# 1#) + + esc :: Int# -> Int# -> [Char] -> [Char] + esc v 0# ls = (C# (chr# v)):ls + esc v n ls = esc v (n -# 1#) (C# (chr# 0#):ls) + + filter_ps :: Int# -> Int# -> Int# -> [Char] -> ([Char],Int) + filter_ps n hits run acc + | n <# 0# = + let + escs = run `quotInt#` 255# + v = run `remInt#` 255# + in + (esc (v +# 1#) escs acc, I# hits) + | otherwise + = let + ch = indexPS# ps n + n' = n -# 1# + in + if pred (C# ch) then + let + escs = run `quotInt#` 255# + v = run `remInt#` 255# + acc' = esc (v +# 1#) escs acc + in + filter_ps n' (hits +# 1#) 0# acc' + else + filter_ps n' hits (run +# 1#) acc + + +foldlPS :: (a -> Char -> a) -> a -> PackedString -> a +foldlPS f b ps + = if nullPS ps then + b + else + whizzLR b 0# + where + len = lengthPS# ps + + --whizzLR :: a -> Int# -> a + whizzLR b idx + | idx ==# len = b + | otherwise = whizzLR (f b (C# (indexPS# ps idx))) (idx +# 1#) + + +foldrPS :: (Char -> a -> a) -> a -> PackedString -> a +foldrPS f b ps + = if nullPS ps then + b + else + whizzRL b len + where + len = lengthPS# ps + + --whizzRL :: a -> Int# -> a + whizzRL b idx + | idx <# 0# = b + | otherwise = whizzRL (f (C# (indexPS# ps idx)) b) (idx -# 1#) + +takePS :: Int -> PackedString -> PackedString +takePS (I# n) ps + | n ==# 0# = nilPS + | otherwise = substrPS# ps 0# (n -# 1#) + +dropPS :: Int -> PackedString -> PackedString +dropPS (I# n) ps + | n ==# len = ps + | otherwise = substrPS# ps n (lengthPS# ps -# 1#) + where + len = lengthPS# ps + +splitAtPS :: Int -> PackedString -> (PackedString, PackedString) +splitAtPS n ps = (takePS n ps, dropPS n ps) + +takeWhilePS :: (Char -> Bool) -> PackedString -> PackedString +takeWhilePS pred ps + = let + break_pt = char_pos_that_dissatisfies + (\ c -> pred (C# c)) + ps + (lengthPS# ps) + 0# + in + if break_pt ==# 0# then + nilPS + else + substrPS# ps 0# (break_pt -# 1#) + +dropWhilePS :: (Char -> Bool) -> PackedString -> PackedString +dropWhilePS pred ps + = let + len = lengthPS# ps + break_pt = char_pos_that_dissatisfies + (\ c -> pred (C# c)) + ps + len + 0# + in + if len ==# break_pt then + nilPS + else + substrPS# ps break_pt (len -# 1#) + +elemPS :: Char -> PackedString -> Bool +elemPS (C# ch) ps + = let + len = lengthPS# ps + break_pt = first_char_pos_that_satisfies + (`eqChar#` ch) + ps + len + 0# + in + break_pt <# len + +char_pos_that_dissatisfies :: (Char# -> Bool) -> PackedString -> Int# -> Int# -> Int# + +char_pos_that_dissatisfies p ps len pos + | pos >=# len = pos -- end + | p (indexPS# ps pos) = -- predicate satisfied; keep going + char_pos_that_dissatisfies p ps len (pos +# 1#) + | otherwise = pos -- predicate not satisfied + +first_char_pos_that_satisfies :: (Char# -> Bool) -> PackedString -> Int# -> Int# -> Int# +first_char_pos_that_satisfies p ps len pos + | pos >=# len = pos -- end + | p (indexPS# ps pos) = pos -- got it! + | otherwise = first_char_pos_that_satisfies p ps len (pos +# 1#) + +-- ToDo: could certainly go quicker +spanPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) +spanPS p ps = (takeWhilePS p ps, dropWhilePS p ps) + +breakPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) +breakPS p ps = spanPS (not . p) ps + +linesPS :: PackedString -> [PackedString] +linesPS ps = splitPS '\n' ps + +wordsPS :: PackedString -> [PackedString] +wordsPS ps = splitWithPS isSpace ps + +reversePS :: PackedString -> PackedString +reversePS ps = + if nullPS ps then -- don't create stuff unnecessarily. + ps + else + runST ( + new_ps_array (length +# 1#) >>= \ arr# -> -- incl NUL byte! + fill_in arr# (length -# 1#) 0# >> + freeze_ps_array arr# >>= \ (ByteArray _ frozen#) -> + let has_null = byteArrayHasNUL# frozen# length in + return (PS frozen# length has_null)) + where + length = lengthPS# ps + + fill_in :: MutableByteArray s Int -> Int# -> Int# -> ST s () + fill_in arr_in# n i = + let + ch = indexPS# ps n + in + write_ps_array arr_in# i ch >> + if n ==# 0# then + write_ps_array arr_in# (i +# 1#) (chr# 0#) >> + return () + else + fill_in arr_in# (n -# 1#) (i +# 1#) + +concatPS :: [PackedString] -> PackedString +concatPS [] = nilPS +concatPS pss + = let + tot_len# = case (foldr ((+) . lengthPS) 0 pss) of { I# x -> x } + tot_len = I# tot_len# + in + runST ( + new_ps_array (tot_len# +# 1#) >>= \ arr# -> -- incl NUL byte! + packum arr# pss 0# >> + freeze_ps_array arr# >>= \ (ByteArray _ frozen#) -> + + let has_null = byteArrayHasNUL# frozen# tot_len# in + + return (PS frozen# tot_len# has_null) + ) + where + packum :: MutableByteArray s Int -> [PackedString] -> Int# -> ST s () + + packum arr [] pos + = write_ps_array arr pos (chr# 0#) >> + return () + packum arr (ps : pss) pos + = fill arr pos ps 0# (lengthPS# ps) >>= \ (I# next_pos) -> + packum arr pss next_pos + + fill :: MutableByteArray s Int -> Int# -> PackedString -> Int# -> Int# -> ST s Int + + fill arr arr_i ps ps_i ps_len + | ps_i ==# ps_len + = return (I# (arr_i +# ps_len)) + | otherwise + = write_ps_array arr (arr_i +# ps_i) (indexPS# ps ps_i) >> + fill arr arr_i ps (ps_i +# 1#) ps_len + +------------------------------------------------------------ +joinPS :: PackedString -> [PackedString] -> PackedString +joinPS filler pss = concatPS (splice pss) + where + splice [] = [] + splice [x] = [x] + splice (x:y:xs) = x:filler:splice (y:xs) + +-- ToDo: the obvious generalisation +{- + Some properties that hold: + + * splitPS x ls = ls' + where False = any (map (x `elemPS`) ls') + False = any (map (nullPS) ls') + + * all x's have been chopped out. + * no empty PackedStrings in returned list. A conseq. + of this is: + splitPS x nilPS = [] + + + * joinPS (packString [x]) (_splitPS x ls) = ls + +-} + +splitPS :: Char -> PackedString -> [PackedString] +splitPS (C# ch) = splitWithPS (\ (C# c) -> c `eqChar#` ch) + +splitWithPS :: (Char -> Bool) -> PackedString -> [PackedString] +splitWithPS pred ps = + splitify 0# + where + len = lengthPS# ps + + splitify n + | n >=# len = [] + | otherwise = + let + break_pt = + first_char_pos_that_satisfies + (\ c -> pred (C# c)) + ps + len + n + in + if break_pt ==# n then -- immediate match, no substring to cut out. + splitify (break_pt +# 1#) + else + substrPS# ps n (break_pt -# 1#): -- leave out the matching character + splitify (break_pt +# 1#) +\end{code} + +%************************************************************************ +%* * +\subsection{Local utility functions} +%* * +%************************************************************************ + +The definition of @_substrPS@ is essentially: +@take (end - begin + 1) (drop begin str)@. + +\begin{code} +substrPS :: PackedString -> Int -> Int -> PackedString +substrPS ps (I# begin) (I# end) = substrPS# ps begin end + +substrPS# ps s e + | s <# 0# || e <# s + = error "substrPS: bounds out of range" + + | s >=# len || result_len# <=# 0# + = nilPS + + | otherwise + = runST ( + new_ps_array (result_len# +# 1#) >>= \ ch_arr -> -- incl NUL byte! + fill_in ch_arr 0# >> + freeze_ps_array ch_arr >>= \ (ByteArray _ frozen#) -> + + let has_null = byteArrayHasNUL# frozen# result_len# in + + return (PS frozen# result_len# has_null) + ) + where + len = lengthPS# ps + + result_len# = (if e <# len then (e +# 1#) else len) -# s + result_len = I# result_len# + + ----------------------- + fill_in :: MutableByteArray s Int -> Int# -> ST s () + + fill_in arr_in# idx + | idx ==# result_len# + = write_ps_array arr_in# idx (chr# 0#) >> + return () + | otherwise + = let + ch = indexPS# ps (s +# idx) + in + write_ps_array arr_in# idx ch >> + fill_in arr_in# (idx +# 1#) +\end{code} + +(Very :-) ``Specialised'' versions of some CharArray things... + +\begin{code} +new_ps_array :: Int# -> ST s (MutableByteArray s Int) +write_ps_array :: MutableByteArray s Int -> Int# -> Char# -> ST s () +freeze_ps_array :: MutableByteArray s Int -> ST s (ByteArray Int) + +new_ps_array size = ST $ \ s# -> + case newCharArray# size s# of { StateAndMutableByteArray# s2# barr# -> + STret s2# (MutableByteArray bot barr#)} + where + bot = error "new_ps_array" + +write_ps_array (MutableByteArray _ barr#) n ch = ST $ \ s# -> + case writeCharArray# barr# n ch s# of { s2# -> + STret s2# ()} + +-- same as unsafeFreezeByteArray +freeze_ps_array (MutableByteArray ixs arr#) = ST $ \ s# -> + case unsafeFreezeByteArray# arr# s# of { StateAndByteArray# s2# frozen# -> + STret s2# (ByteArray ixs frozen#) } +\end{code} + + +%********************************************************* +%* * +\subsection{Packing and unpacking C strings} +%* * +%********************************************************* + +\begin{code} +unpackCString :: Addr -> [Char] + +-- Calls to the next four are injected by the compiler itself, +-- to deal with literal strings +packCString# :: [Char] -> ByteArray# +unpackCString# :: Addr# -> [Char] +unpackCString2# :: Addr# -> Int# -> [Char] +unpackAppendCString# :: Addr# -> [Char] -> [Char] +unpackFoldrCString# :: Addr# -> (Char -> a -> a) -> a -> a + +packCString# str = case (packString str) of { PS bytes _ _ -> bytes } + +unpackCString a@(A# addr) = + if a == ``NULL'' then + [] + else + unpackCString# addr + +unpackCString# addr + = unpack 0# + where + unpack nh + | ch `eqChar#` '\0'# = [] + | otherwise = C# ch : unpack (nh +# 1#) + where + ch = indexCharOffAddr# addr nh + +unpackCString2# addr len + -- This one is called by the compiler to unpack literal strings with NULs in them; rare. + = unpackPS (packCBytes (I# len) (A# addr)) + +unpackAppendCString# addr rest + = unpack 0# + where + unpack nh + | ch `eqChar#` '\0'# = rest + | otherwise = C# ch : unpack (nh +# 1#) + where + ch = indexCharOffAddr# addr nh + +unpackFoldrCString# addr f z + = unpack 0# + where + unpack nh + | ch `eqChar#` '\0'# = z + | otherwise = C# ch `f` unpack (nh +# 1#) + where + ch = indexCharOffAddr# addr nh + + +cStringToPS :: Addr -> PackedString +cStringToPS (A# a#) = -- the easy one; we just believe the caller + CPS a# len + where + len = case (strlen# a#) of { I# x -> x } + +packBytesForC :: [Char] -> ByteArray Int +packBytesForC str = psToByteArray (packString str) + +psToByteArrayST :: [Char] -> ST s (ByteArray Int) +psToByteArrayST str = + packStringST str >>= \ (PS bytes n has_null) -> + --later? ASSERT(not has_null) + return (ByteArray (0, I# (n -# 1#)) bytes) + +packNBytesForCST :: Int -> [Char] -> ST s (ByteArray Int) +packNBytesForCST len str = + packNCharsST len str >>= \ (PS bytes n has_null) -> + return (ByteArray (0, I# (n -# 1#)) bytes) + +packCBytes :: Int -> Addr -> PackedString +packCBytes len addr = runST (packCBytesST len addr) + +packCBytesST :: Int -> Addr -> ST s PackedString +packCBytesST len@(I# length#) (A# addr) = + {- + allocate an array that will hold the string + (not forgetting the NUL byte at the end) + -} + new_ps_array (length# +# 1#) >>= \ ch_array -> + -- fill in packed string from "addr" + fill_in ch_array 0# >> + -- freeze the puppy: + freeze_ps_array ch_array >>= \ (ByteArray _ frozen#) -> + let has_null = byteArrayHasNUL# frozen# length# in + return (PS frozen# length# has_null) + where + fill_in :: MutableByteArray s Int -> Int# -> ST s () + + fill_in arr_in# idx + | idx ==# length# + = write_ps_array arr_in# idx (chr# 0#) >> + return () + | otherwise + = case (indexCharOffAddr# addr idx) of { ch -> + write_ps_array arr_in# idx ch >> + fill_in arr_in# (idx +# 1#) } + +\end{code} diff --git a/ghc/lib/misc/Pretty.lhs b/ghc/lib/misc/Pretty.lhs new file mode 100644 index 0000000..4e19f36 --- /dev/null +++ b/ghc/lib/misc/Pretty.lhs @@ -0,0 +1,421 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1996 +% +\section[Pretty]{Pretty-printing data type} + +\begin{code} +#if defined(COMPILING_GHC) +# include "HsVersions.h" +#else +# define FAST_STRING String +# define _LENGTH_ length +#endif + +module Pretty ( + +#if defined(COMPILING_GHC) + SYN_IE(Pretty), + prettyToUn, +#else + Pretty, +#endif + ppNil, ppStr, ppPStr, ppChar, ppInt, ppInteger, + ppFloat, ppDouble, +#if __GLASGOW_HASKELL__ + -- may be able to *replace* ppDouble + ppRational, +#endif + ppSP, pp'SP, ppLbrack, ppRbrack, ppLparen, ppRparen, + ppSemi, ppComma, ppEquals, + ppBracket, ppParens, ppQuote, + + ppCat, ppBeside, ppBesides, ppAbove, ppAboves, + ppNest, ppSep, ppHang, ppInterleave, ppIntersperse, + ppShow, speakNth, + +#if defined(COMPILING_GHC) + ppPutStr, +#endif + + -- abstract type, to complete the interface... + --PrettyRep(..), Delay + ) where + +#if defined(COMPILING_GHC) + +CHK_Ubiq() -- debugging consistency check +IMPORT_1_3(Ratio) +IMPORT_1_3(IO) + +import Unpretty ( SYN_IE(Unpretty) ) +#else +import Ratio +#endif + +import CharSeq +\end{code} + +Based on John Hughes's pretty-printing library. Loosely. Very +loosely. + +%************************************************ +%* * + \subsection{The interface} +%* * +%************************************************ + +\begin{code} +ppNil :: Pretty +ppSP, pp'SP, ppLbrack, ppRbrack, ppLparen, ppRparen, ppSemi, ppComma, ppEquals :: Pretty + +ppStr :: [Char] -> Pretty +ppPStr :: FAST_STRING -> Pretty +ppChar :: Char -> Pretty +ppInt :: Int -> Pretty +ppInteger :: Integer -> Pretty +ppDouble :: Double -> Pretty +ppFloat :: Float -> Pretty +ppRational :: Rational -> Pretty + +ppBracket :: Pretty -> Pretty -- put brackets around it +ppParens :: Pretty -> Pretty -- put parens around it + +ppBeside :: Pretty -> Pretty -> Pretty +ppBesides :: [Pretty] -> Pretty +ppBesideSP :: Pretty -> Pretty -> Pretty +ppCat :: [Pretty] -> Pretty -- i.e., ppBesidesSP + +ppAbove :: Pretty -> Pretty -> Pretty +ppAboves :: [Pretty] -> Pretty + +ppInterleave :: Pretty -> [Pretty] -> Pretty +ppIntersperse :: Pretty -> [Pretty] -> Pretty -- no spaces between, no ppSep +ppSep :: [Pretty] -> Pretty +ppHang :: Pretty -> Int -> Pretty -> Pretty +ppNest :: Int -> Pretty -> Pretty + +ppShow :: Int -> Pretty -> [Char] + +#if defined(COMPILING_GHC) +ppPutStr :: Handle -> Int -> Pretty -> IO () +#endif +\end{code} + +%************************************************ +%* * + \subsection{The representation} +%* * +%************************************************ + +\begin{code} +type Pretty = Int -- The width to print in + -> Bool -- True => vertical context + -> PrettyRep + +data PrettyRep + = MkPrettyRep CSeq -- The text + (Delay Int) -- No of chars in last line + Bool -- True if empty object + Bool -- Fits on a single line in specified width + +data Delay a = MkDelay a + +forceDel (MkDelay _) r = r + +forceBool True r = r +forceBool False r = r + +forceInfo ll emp sl r = forceDel ll (forceBool emp (forceBool sl r)) + +ppShow width p + = case (p width False) of + MkPrettyRep seq ll emp sl -> cShow seq + +#if defined(COMPILING_GHC) +ppPutStr f width p + = case (p width False) of + MkPrettyRep seq ll emp sl -> cPutStr f seq +#endif + +ppNil width is_vert = MkPrettyRep cNil (MkDelay 0) True (width >= 0) + -- Doesn't fit if width < 0, otherwise, ppNil + -- will make ppBesides always return True. + +ppStr s width is_vert = MkPrettyRep (cStr s) (MkDelay ls) False (width >= ls) + where ls = length s +ppPStr s width is_vert = MkPrettyRep (cPStr s) (MkDelay ls) False (width >= ls) + where ls = _LENGTH_ s +ppChar c width is_vert = MkPrettyRep (cCh c) (MkDelay 1) False (width >= 1) + +ppInt n width is_vert = MkPrettyRep (cStr s) (MkDelay ls) False (width >= ls) + where s = show n; ls = length s + +ppInteger n = ppStr (show n) +ppDouble n = ppStr (show n) +ppFloat n = ppStr (show n) + +ppRational n = ppStr (show (fromRationalX n)) -- _showRational 30 n) + +ppSP = ppChar ' ' +pp'SP = ppStr ", " +ppLbrack = ppChar '[' +ppRbrack = ppChar ']' +ppLparen = ppChar '(' +ppRparen = ppChar ')' +ppSemi = ppChar ';' +ppComma = ppChar ',' +ppEquals = ppChar '=' + +ppBracket p = ppBeside ppLbrack (ppBeside p ppRbrack) +ppParens p = ppBeside ppLparen (ppBeside p ppRparen) +ppQuote p = ppBeside (ppChar '`') (ppBeside p (ppChar '\'')) + +ppInterleave sep ps = ppSep (pi ps) + where + pi [] = [] + pi [x] = [x] + pi (x:xs) = (ppBeside x sep) : pi xs +\end{code} + +ToDo: this could be better: main pt is: no extra spaces in between. + +\begin{code} +ppIntersperse sep ps = ppBesides (pi ps) + where + pi [] = [] + pi [x] = [x] + pi (x:xs) = (ppBeside x sep) : pi xs +\end{code} + +Laziness is important in @ppBeside@. If the first thing is not a +single line it will return @False@ for the single-line boolean without +laying out the second. + +\begin{code} +ppBeside p1 p2 width is_vert + = case (p1 width False) of + MkPrettyRep seq1 (MkDelay ll1) emp1 sl1 -> + MkPrettyRep (seq1 `cAppend` (cIndent ll1 seq2)) + (MkDelay (ll1 + ll2)) + (emp1 && emp2) + ((width >= 0) && (sl1 && sl2)) + -- This sequence of (&&)'s ensures that ppBeside + -- returns a False for sl as soon as possible. + where -- NB: for case alt + seq2 = forceInfo x_ll2 emp2 sl2 x_seq2 + MkDelay ll2 = x_ll2 + MkPrettyRep x_seq2 x_ll2 emp2 sl2 = p2 (width-ll1) False + -- ToDo: if emp{1,2} then we really + -- should be passing on "is_vert" to p{2,1}. + +ppBesides [] = ppNil +ppBesides ps = foldr1 ppBeside ps +\end{code} + +@ppBesideSP@ puts two things beside each other separated by a space. + +\begin{code} +ppBesideSP p1 p2 width is_vert + = case (p1 width False) of + MkPrettyRep seq1 (MkDelay ll1) emp1 sl1 -> + MkPrettyRep (seq1 `cAppend` (sp `cAppend` (cIndent li seq2))) + (MkDelay (li + ll2)) + (emp1 && emp2) + ((width >= wi) && (sl1 && sl2)) + where -- NB: for case alt + seq2 = forceInfo x_ll2 emp2 sl2 x_seq2 + MkDelay ll2 = x_ll2 + MkPrettyRep x_seq2 x_ll2 emp2 sl2 = p2 (width-li) False + li, wi :: Int + li = if emp1 then 0 else ll1+1 + wi = if emp1 then 0 else 1 + sp = if emp1 || emp2 then cNil else (cCh ' ') +\end{code} + +@ppCat@ is the name I (WDP) happen to have been using for @ppBesidesSP@. + +\begin{code} +ppCat [] = ppNil +ppCat ps = foldr1 ppBesideSP ps +\end{code} + +\begin{code} +ppAbove p1 p2 width is_vert + = case (p1 width True) of + MkPrettyRep seq1 (MkDelay ll1) emp1 sl1 -> + MkPrettyRep (seq1 `cAppend` (nl `cAppend` seq2)) + (MkDelay ll2) + -- ToDo: make ll depend on empties? + (emp1 && emp2) + False + where -- NB: for case alt + nl = if emp1 || emp2 then cNil else cNL + seq2 = forceInfo x_ll2 emp2 sl2 x_seq2 + MkDelay ll2 = x_ll2 -- Don't "optimise" this away! + MkPrettyRep x_seq2 x_ll2 emp2 sl2 = p2 width True + -- ToDo: ditto about passing is_vert if empties + +ppAboves [] = ppNil +ppAboves ps = foldr1 ppAbove ps +\end{code} + +\begin{code} +ppNest n p width False = p width False +ppNest n p width True + = case (p (width-n) True) of + MkPrettyRep seq (MkDelay ll) emp sl -> + MkPrettyRep (cIndent n seq) (MkDelay (ll+n)) emp sl +\end{code} + +The length-check below \tr{(ll1+ll2+1) <= width} should really check for +max widths not the width of the last line. + +\begin{code} +ppHang p1 n p2 width is_vert -- This is a little bit stricter than it could + -- be made with a little more effort. + -- Eg the output always starts with seq1 + = case (p1 width False) of + MkPrettyRep seq1 (MkDelay ll1) emp1 sl1 -> + if emp1 then + p2 width is_vert + else + if (ll1 <= n) || sl2 then -- very ppBesideSP'ish + -- Hang it if p1 shorter than indent or if it doesn't fit + MkPrettyRep (seq1 `cAppend` ((cCh ' ') `cAppend` (cIndent (ll1+1) seq2))) + (MkDelay (ll1 + 1 + ll2)) + False + (sl1 && sl2) + else + -- Nest it (pretty ppAbove-ish) + MkPrettyRep (seq1 `cAppend` (cNL `cAppend` (cIndent n seq2'))) + (MkDelay ll2') -- ToDo: depend on empties + False + False + where -- NB: for case alt + seq2 = forceInfo x_ll2 emp2 sl2 x_seq2 + MkDelay ll2 = x_ll2 + MkPrettyRep x_seq2 x_ll2 emp2 sl2 = p2 (width-(ll1+1)) False + -- ToDo: more "is_vert if empty" stuff + + seq2' = forceInfo x_ll2' emp2' sl2' x_seq2' + MkDelay ll2' = x_ll2' -- Don't "optimise" this away! + MkPrettyRep x_seq2' x_ll2' emp2' sl2' = p2 (width-n) False -- ToDo: True? +\end{code} + +\begin{code} +ppSep [] width is_vert = ppNil width is_vert +ppSep [p] width is_vert = p width is_vert + +-- CURRENT, but BAD. Quadratic behaviour on the perfectly reasonable +-- ppSep [a, ppSep[b, ppSep [c, ... ]]] + +ppSep ps width is_vert + = case (ppCat ps width is_vert) of + MkPrettyRep seq x_ll emp sl -> + if sl then -- Fits on one line + MkPrettyRep seq x_ll emp sl + else + ppAboves ps width is_vert -- Takes several lines +\end{code} + + +@speakNth@ converts an integer to a verbal index; eg 1 maps to +``first'' etc. + +\begin{code} +speakNth :: Int -> Pretty + +speakNth 1 = ppStr "first" +speakNth 2 = ppStr "second" +speakNth 3 = ppStr "third" +speakNth 4 = ppStr "fourth" +speakNth 5 = ppStr "fifth" +speakNth 6 = ppStr "sixth" +speakNth n = ppBesides [ ppInt n, ppStr st_nd_rd_th ] + where + st_nd_rd_th | n_rem_10 == 1 = "st" + | n_rem_10 == 2 = "nd" + | n_rem_10 == 3 = "rd" + | otherwise = "th" + + n_rem_10 = n `rem` 10 +\end{code} + + +%************************************************************************ +%* * +\subsection[Outputable-print]{Pretty-printing stuff} +%* * +%************************************************************************ + +\begin{code} +#if defined(COMPILING_GHC) + -- to the end of file + +prettyToUn :: Pretty -> Unpretty + +prettyToUn p + = case (p 999999{-totally bogus width-} False{-also invented-}) of + MkPrettyRep seq ll emp sl -> seq + +#endif {-COMPILING_GHC-} +\end{code} + +----------------------------------- +\begin{code} +-- from Lennart +fromRationalX :: (RealFloat a) => Rational -> a + +fromRationalX r = + let + h = ceiling (huge `asTypeOf` x) + b = toInteger (floatRadix x) + x = fromRat 0 r + fromRat e0 r' = + let d = denominator r' + n = numerator r' + in if d > h then + let e = integerLogBase b (d `div` h) + 1 + in fromRat (e0-e) (n % (d `div` (b^e))) + else if abs n > h then + let e = integerLogBase b (abs n `div` h) + 1 + in fromRat (e0+e) ((n `div` (b^e)) % d) + else + scaleFloat e0 (fromRational r') + in x + +-- Compute the discrete log of i in base b. +-- Simplest way would be just divide i by b until it's smaller then b, but that would +-- be very slow! We are just slightly more clever. +integerLogBase :: Integer -> Integer -> Int +integerLogBase b i = + if i < b then + 0 + else + -- Try squaring the base first to cut down the number of divisions. + let l = 2 * integerLogBase (b*b) i + + doDiv :: Integer -> Int -> Int + doDiv j k = if j < b then k else doDiv (j `div` b) (k+1) + in + doDiv (i `div` (b^l)) l + + +------------ + +-- Compute smallest and largest floating point values. +{- +tiny :: (RealFloat a) => a +tiny = + let (l, _) = floatRange x + x = encodeFloat 1 (l-1) + in x +-} + +huge :: (RealFloat a) => a +huge = + let (_, u) = floatRange x + d = floatDigits x + x = encodeFloat (floatRadix x ^ d - 1) (u - d) + in x +\end{code} diff --git a/ghc/lib/misc/Readline.lhs b/ghc/lib/misc/Readline.lhs new file mode 100644 index 0000000..7c1d12e --- /dev/null +++ b/ghc/lib/misc/Readline.lhs @@ -0,0 +1,315 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1995-1996 +% +\section[Readline]{GNU Readline Library Bindings} + +This module attempts to provide a better line based editing facility +for Haskell programmers by providing access to the GNU Readline +library. Related to this are bindings for the GNU History library +which can be found in History. + + +\begin{code} +{-# OPTIONS -#include "cbits/ghcReadline.h" #-} + +module Readline ( + rlInitialize, + readline, addHistory, + + rlBindKey, rlAddDefun, + RlCallbackFunction, + + rlGetLineBuffer, rlSetLineBuffer, + rlGetPoint, rlSetPoint, + rlGetEnd, rlSetEnd, + rlGetMark, rlSetMark, + rlSetDone, + rlPendingInput, + + rlPrompt, rlTerminalName, rlSetReadlineName, rlGetReadlineName + + ) where + +import GlaExts + +import PackedString ( unpackCString ) +import Foreign + +import System + +--#include + +type KeyCode = Int + +type RlCallbackFunction = + (Int -> -- Numeric Argument + KeyCode -> -- KeyCode of pressed Key + IO Int) +\end{code} + +%*************************************************************************** +%* * +\subsection[Readline-Functions]{Main Readline Functions} +%* * +%*************************************************************************** +\begin{code} + +readline :: String -> -- Prompt String + IO String -- Returned line +readline prompt = +--ToDo: Get the "Live register in _casm_GC_ " bug fixed +-- this stops us passing the prompt string to readline directly :-( +-- _casm_GC_ ``%r = readline %0;'' prompt `thenPrimIO` \ litstr -> + + _casm_ ``rl_prompt_hack = (char*)realloc(rl_prompt_hack, %1); + strcpy (rl_prompt_hack,%0);'' + prompt (length prompt) `thenIO_Prim` \ () -> + _casm_GC_ ``%r = readline (rl_prompt_hack);'' `thenIO_Prim` \ litstr -> + if (litstr == ``NULL'') then + fail (userError "Readline has read EOF") + else ( + let str = unpackCString litstr in + _casm_ ``free %0;'' litstr `thenIO_Prim` \ () -> + return str + ) + + +addHistory :: String -> -- String to enter in history + IO () +addHistory str = primIOToIO (_ccall_ add_history str) + + +rlBindKey :: KeyCode -> -- Key to Bind to + RlCallbackFunction -> -- Function to exec on execution + IO () +rlBindKey key cback = + if (0 > key) || (key > 255) then + fail (userError "Invalid ASCII Key Code, must be in range 0.255") + else + addCbackEntry (key,cback) `thenIO_Prim` \ _ -> + _casm_ `` rl_bind_key((KeyCode)%0,&genericRlCback); '' + key `thenIO_Prim` \ () -> + return () + +\end{code} + +i.e. add the (KeyCode,RlCallbackFunction) key to the assoc. list and register +the generic callback for this KeyCode. + +The entry point that $genericRlCback$ calls would then read the +global variables $current\_i$ and $current\_kc$ and do a lookup: + +\begin{code} +rlAddDefun :: String -> -- Function Name + RlCallbackFunction -> -- Function to call + KeyCode -> -- Key to bind to, or -1 for no bind + IO () +rlAddDefun name cback key = + if (0 > key) || (key > 255) then + fail (userError "Invalid ASCII Key Code, must be in range 0..255") + else + addCbackEntry (key, cback) `thenIO_Prim` \ _ -> + _casm_ ``rl_add_defun (%0, &genericRlCback, (KeyCode)%1);'' + name key `thenIO_Prim` \ () -> + return () + +\end{code} + + +The C function $genericRlCallback$ puts the callback arguments into +global variables and enters the Haskell world through the +$haskellRlEntry$ function. Before exiting, the Haskell function will +deposit its result in the global varariable $rl\_return$. + +In the Haskell action that is invoked via $enterStablePtr$, a match +between the Keycode in $current\_kc$ and the Haskell callback needs to +be made. To essentially keep the same assoc. list of (KeyCode,cback +function) as Readline does, we make use of yet another global variable +$cbackList$: + +\begin{code} + +createCbackList :: [(KeyCode,RlCallbackFunction)] -> PrimIO () +createCbackList ls = +#ifndef __PARALLEL_HASKELL__ + makeStablePtr ls >>= \ stable_ls -> + _casm_ `` cbackList=(StgStablePtr)%0; '' stable_ls +#else + error "createCbackList: not available for Parallel Haskell" +#endif + +getCbackList :: PrimIO [(KeyCode,RlCallbackFunction)] +getCbackList = +#ifndef __PARALLEL_HASKELL__ + _casm_ `` %r=(StgStablePtr)cbackList; '' >>= \ stable_ls -> + deRefStablePtr stable_ls +#else + error "getCbackList: not available for Parallel Haskell" +#endif + +setCbackList :: [(KeyCode,RlCallbackFunction)] -> PrimIO () +setCbackList ls = +#ifndef __PARALLEL_HASKELL__ + _casm_ `` %r=(StgStablePtr)cbackList; '' >>= \ old_stable_ls -> + freeStablePtr old_stable_ls >> + createCbackList ls +#else + error "setCbackList: not available for Parallel Haskell" +#endif + +addCbackEntry :: (KeyCode,RlCallbackFunction) -> PrimIO () +addCbackEntry entry = + getCbackList >>= \ ls -> + setCbackList (entry:ls) +\end{code} + +The above functions allows us to query and augment the assoc. list in +Haskell. + +\begin{code} + +invokeRlCback :: PrimIO () +invokeRlCback = + _casm_ `` %r=(KeyCode)current_kc; '' >>= \ kc -> + _casm_ `` %r=(int)current_narg; '' >>= \ narg -> + getCbackList >>= \ ls -> + (case (dropWhile (\ (key,_) -> kc/=key) ls) of + [] -> -- no match + returnPrimIO (-1) + ((_,cback):_) -> + ioToPrimIO (cback narg kc) + ) >>= \ ret_val -> + _casm_ `` rl_return=(int)%0; '' ret_val >>= \ () -> + returnPrimIO () + +\end{code} + +Finally, we need to initialise this whole, ugly machinery: + +\begin{code} +initRlCbacks :: PrimIO () + +initRlCbacks = +#ifndef __PARALLEL_HASKELL__ + createCbackList [] >> + makeStablePtr (invokeRlCback) >>= \ stable_f -> + _casm_ `` haskellRlEntry=(StgStablePtr)%0; '' stable_f >>= \ () -> + return () +#else + error "initRlCbacks: not available for Parallel Haskell" +#endif +\end{code} + + +%*************************************************************************** +%* * +\subsection[Readline-Globals]{Global Readline Variables} +%* * +%*************************************************************************** + +These are the global variables required by the readline lib. Need to +find a way of making these read/write from the Haskell side. Should +they be in the IO Monad, should they be Mutable Variables? + +\begin{code} + +rlGetLineBuffer :: IO String +rlGetLineBuffer = + _casm_ ``%r = rl_line_buffer;'' `thenIO_Prim` \ litstr -> + return (unpackCString litstr) + +rlSetLineBuffer :: String -> IO () +rlSetLineBuffer str = primIOToIO (_casm_ ``rl_line_buffer = %0;'' str) + + +rlGetPoint :: IO Int +rlGetPoint = primIOToIO (_casm_ ``%r = rl_point;'') + +rlSetPoint :: Int -> IO () +rlSetPoint point = primIOToIO (_casm_ ``rl_point = %0;'' point) + +rlGetEnd :: IO Int +rlGetEnd = primIOToIO (_casm_ ``%r = rl_end;'') + +rlSetEnd :: Int -> IO () +rlSetEnd end = primIOToIO (_casm_ ``rl_end = %0;'' end) + +rlGetMark :: IO Int +rlGetMark = primIOToIO (_casm_ ``%r = rl_mark;'') + +rlSetMark :: Int -> IO () +rlSetMark mark = primIOToIO (_casm_ ``rl_mark = %0;'' mark) + +rlSetDone :: Bool -> IO () +rlSetDone True = primIOToIO (_casm_ ``rl_done = %0;'' 1) +rlSetDone False = primIOToIO (_casm_ ``rl_done = %0;'' 0) + +rlPendingInput :: KeyCode -> IO () +rlPendingInput key = primIOToIO (_casm_ ``rl_pending_input = %0;'' key) + +rlPrompt :: IO String +rlPrompt = + _casm_ ``%r = rl_readline_name;'' `thenIO_Prim` \ litstr -> + return (unpackCString litstr) + +rlTerminalName :: IO String +rlTerminalName = + _casm_ ``%r = rl_terminal_name;'' `thenIO_Prim` \ litstr -> + return (unpackCString litstr) + + +rlGetReadlineName :: IO String +rlGetReadlineName = + _casm_ ``%r = rl_readline_name;'' `thenIO_Prim` \ litstr -> + return (unpackCString litstr) + +rlSetReadlineName :: String -> IO () +rlSetReadlineName str = primIOToIO ( + _casm_ ``rl_readline_name = %0;'' str) +\end{code} + +\begin{verbatim} +-- +-- The following two were taken from PreludeStdIO stdin/stdout +-- +rlInStream :: Handle +rlInStream = unsafePerformPrimIO ( + newMVar >>= \ handle -> + _ccall_ getLock (``rl_instream''::Addr) 0 >>= \ rc -> + (case rc of + 0 -> putMVar handle ClosedHandle + 1 -> putMVar handle (ReadHandle ``rl_instream'' Nothing False) + _ -> constructError >>= \ ioError -> + putMVar handle (ErrorHandle ioError) + ) >> + returnPrimIO handle + ) + + +rlOutStream :: Handle +rlOutStream = unsafePerformPrimIO ( + newMVar >>= \ handle -> + _ccall_ getLock (``rl_outstream''::Addr) 1 >>= \ rc -> + (case rc of + 0 -> putMVar handle ClosedHandle + 1 -> putMVar handle (WriteHandle ``rl_outstream'' Nothing False) + _ -> constructError >>= \ ioError -> + putMVar handle (ErrorHandle ioError) + ) >> + returnPrimIO handle + ) + +\end{verbatim} + + +\begin{code} + +-- rlStartupHook :: RlCallBackFunction -> IO () + +rlInitialize :: IO () +rlInitialize = + getProgName >>= \ pname -> + rlSetReadlineName pname >> + _casm_ ``rl_prompt_hack = (char*)malloc(1);'' `thenIO_Prim` \ () -> + primIOToIO (initRlCbacks) +\end{code} diff --git a/ghc/lib/misc/Regex.lhs b/ghc/lib/misc/Regex.lhs new file mode 100644 index 0000000..2153b62 --- /dev/null +++ b/ghc/lib/misc/Regex.lhs @@ -0,0 +1,367 @@ +\section[regex]{Haskell binding to the GNU regex library} + +What follows is a straightforward binding to the functions +provided by the GNU regex library (the GNU group of functions with Perl +like syntax) + +\begin{code} +{-# OPTIONS -#include "cbits/ghcRegex.h" #-} + +module Regex ( + PatBuffer(..), + re_compile_pattern, + re_match, + re_search, + re_match2, + re_search2, + + REmatch(..) + ) where + +import GlaExts +import CCall +import PackedString +import Array ( array, bounds, (!) ) +import PrelArr ( MutableByteArray(..), Array(..) ) +import PrelGHC ( MutableByteArray# ) +import Char ( ord ) +import Foreign + +\end{code} + +First, the higher level matching structure that the functions herein +return: +\begin{code} +-- +-- GroupBounds hold the interval where a group +-- matched inside a string, e.g. +-- +-- matching "reg(exp)" "a regexp" returns the pair (5,7) for the +-- (exp) group. (PackedString indices start from 0) + +type GroupBounds = (Int, Int) + +data REmatch + = REmatch (Array Int GroupBounds) -- for $1, ... $n + GroupBounds -- for $` (everything before match) + GroupBounds -- for $& (entire matched string) + GroupBounds -- for $' (everything after) + GroupBounds -- for $+ (matched by last bracket) +\end{code} + +Prior to any matching (or searching), the regular expression +have to compiled into an internal form, the pattern buffer. +Represent the pattern buffer as a Haskell heap object: + +\begin{code} +data PatBuffer = PatBuffer# (MutableByteArray# RealWorld) +instance CCallable PatBuffer +instance CReturnable PatBuffer + +createPatBuffer :: Bool -> IO PatBuffer + +createPatBuffer insensitive + = _casm_ ``%r = (int)sizeof(struct re_pattern_buffer);'' >>= \ sz -> + stToIO (newCharArray (0,sz)) >>= \ (MutableByteArray _ pbuf#) -> + let + pbuf = PatBuffer# pbuf# + in + (if insensitive then + {- + See comment re: fastmap below + -} + ((_casm_ ``%r = (char *)malloc(256*sizeof(char));'')::IO Addr) >>= \ tmap -> + {- + Set up the translate table so that any lowercase + char. gets mapped to an uppercase one. Beacuse quoting + inside CAsmStrings is Problematic, we pass in the ordinal values + of 'a','z' and 'A' + -} + _casm_ ``{ int i; + + for(i=0; i<256; i++) + ((char *)%0)[i] = (char)i; + for(i=(int)%1;i <=(int)%2;i++) + ((char *)%0)[i] = i - ((int)%1 - (int)%3); + }'' tmap (ord 'a') (ord 'z') (ord 'A') >> + _casm_ ``((struct re_pattern_buffer *)%0)->translate = %1; '' pbuf tmap + else + _casm_ ``((struct re_pattern_buffer *)%0)->translate = 0; '' pbuf) >> + {- + Use a fastmap to speed things up, would like to have the fastmap + in the Haskell heap, but it will get GCed before we can say regexp, + as the reference to it is buried inside a ByteArray :-( + -} + ((_casm_ ``%r = (char *)malloc(256*sizeof(char));'')::IO Addr) >>= \ fmap -> + _casm_ `` ((struct re_pattern_buffer *)%0)->fastmap = %1; '' pbuf fmap >> + {- + We want the compiler of the pattern to alloc. memory + for the pattern. + -} + _casm_ `` ((struct re_pattern_buffer *)%0)->buffer = 0; '' pbuf >> + _casm_ `` ((struct re_pattern_buffer *)%0)->allocated = 0; '' pbuf >> + return pbuf +\end{code} + +@re_compile_pattern@ converts a regular expression into a pattern buffer, +GNU style. + +Q: should we lift the syntax bits configuration up to the Haskell +programmer level ? + +\begin{code} +re_compile_pattern :: PackedString -- pattern to compile + -> Bool -- True <=> assume single-line mode + -> Bool -- True <=> case-insensitive + -> IO PatBuffer + +re_compile_pattern str single_line_mode insensitive + = createPatBuffer insensitive >>= \ pbuf -> + (if single_line_mode then -- match a multi-line buffer + _casm_ ``re_syntax_options = RE_PERL_SINGLELINE_SYNTAX;'' + else + _casm_ ``re_syntax_options = RE_PERL_MULTILINE_SYNTAX;'') >> + + _casm_ `` (int)re_compile_pattern((char *)%0, + (int)%1, + (struct re_pattern_buffer *)%2);'' + (unpackPS str) (lengthPS str) pbuf >>= \ () -> + -- + -- No checking for how the compilation of the pattern went yet. + -- + return pbuf +\end{code} + +Got a match? + +Each call to re_match uses a new re_registers structures, so we need +to ask the regex library to allocate enough memory to store the +registers in each time. That's what the line '... REGS_UNALLOCATED' +is all about. + +\begin{code} +re_match :: PatBuffer -- compiled regexp + -> PackedString -- string to match + -> Int -- start position + -> Bool -- True <=> record results in registers + -> IO (Maybe REmatch) + +re_match pbuf str start reg + = ((if reg then -- record result of match in registers + _casm_ ``%r = (struct re_registers *)malloc(sizeof(struct re_registers *));'' + else + _casm_ ``%r = (struct re_registers *)NULL;'')::IO Addr) >>= \ regs -> + _casm_ ``((struct re_pattern_buffer *)%0)->regs_allocated = REGS_UNALLOCATED; + %r=(int)re_match((struct re_pattern_buffer *)%0, + (char *)%1, + (int)%2, + (int)%3, + (struct re_registers *)%4);'' pbuf + (unpackPS str) + (lengthPS str) + start + regs >>= \ match_res -> + if match_res == (-2) then + error "re_match: Internal error" + else if match_res < 0 then + _casm_ ``free((struct re_registers *)%0); '' regs >> + return Nothing + else + build_re_match start (lengthPS str) regs >>= \ arr -> + _casm_ ``free(((struct re_registers *)%0)->start); + free(((struct re_registers *)%0)->end); + free((struct re_registers *)%0); '' regs >> + return (Just arr) +\end{code} + +Matching on 2 strings is useful when you're dealing with multiple +buffers, which is something that could prove useful for PackedStrings, +as we don't want to stuff the contents of a file into one massive heap +chunk, but load (smaller chunks) on demand. + +\begin{code} +re_match2 :: PatBuffer + -> PackedString + -> PackedString + -> Int + -> Int + -> Bool + -> IO (Maybe REmatch) + +re_match2 pbuf str1 str2 start stop reg + = ((if reg then -- record result of match in registers + _casm_ ``%r = (struct re_registers *)malloc(sizeof(struct re_registers *));'' + else + _casm_ ``%r = (struct re_registers *)NULL;'')::IO Addr) >>= \ regs -> + _casm_ ``%r=(int)re_match_2((struct re_pattern_buffer *)%0, + (char *)%1, + (int)%2, + (char *)%3, + (int)%4, + (int)%5, + (struct re_registers *)%6, + (int)%7);'' pbuf + (unpackPS str1) + (lengthPS str1) + (unpackPS str2) + (lengthPS str2) + start + regs + stop >>= \ match_res -> + if match_res == (-2) then + error "re_match2: Internal error" + else if match_res < 0 then + _casm_ ``free((struct re_registers *)%0); '' regs >> + return Nothing + else + build_re_match start stop regs >>= \ arr -> + _casm_ ``free((struct re_registers *)%0); '' regs >> + return (Just arr) +\end{code} + +Find all the matches in a string: +\begin{code} +re_search :: PatBuffer -- the compiled regexp + -> PackedString -- the string to search + -> Int -- start index + -> Int -- stop index + -> Bool -- record result of match in registers + -> IO (Maybe REmatch) + +re_search pbuf str start range reg + = (if reg then -- record result of match in registers + _casm_ ``%r = (struct re_registers *)malloc(sizeof(struct re_registers *));'' + else + _casm_ ``%r = (struct re_registers *)NULL;'') >>= \ regs -> + _casm_ ``%r=(int)re_search((struct re_pattern_buffer *)%0, + (char *)%1, + (int)%2, + (int)%3, + (int)%4, + (struct re_registers *)%5);'' pbuf + (unpackPS str) + (lengthPS str) + start + range + regs >>= \ match_res -> + if match_res== (-1) then + _casm_ `` free((struct re_registers *)%0); '' regs >> + return Nothing + else + let + (st,en) = if range > start then + (start,range) + else + (range,start) + in + build_re_match st en regs >>= \ arr -> + _casm_ ``free((struct re_registers *)%0); '' regs >> + return (Just arr) +\end{code} + +Double buffer search: +\begin{code} +re_search2 :: PatBuffer + -> PackedString + -> PackedString + -> Int + -> Int + -> Int + -> Bool + -> IO (Maybe REmatch) + +re_search2 pbuf str1 str2 start range stop reg + + = (if reg then -- record result of match in registers + _casm_ ``%r = (struct re_registers *)malloc(sizeof(struct re_registers *));'' + else + _casm_ ``%r = (struct re_registers *)NULL;'') >>= \ regs -> + _casm_ ``%r=(int)re_search_2((struct re_pattern_buffer *)%0, + (char *)%1, + (int)%2, + (char *)%3, + (int)%4, + (int)%5, + (int)%6, + (struct re_registers *)%7, + (int)%8);'' pbuf + (unpackPS str1) + (lengthPS str1) + (unpackPS str2) + (lengthPS str2) + start + range + regs + stop >>= \ match_res -> + if match_res== (-1) then + _casm_ `` free((struct re_registers *)%0); '' regs >> + return Nothing + else + let + (st,en) = if range > start then + (start,range) + else + (range,start) + in + build_re_match st en regs >>= \ arr -> + _casm_ `` free((struct re_registers *)%0); '' regs >> + return (Just arr) +\end{code} + +\begin{code} +build_re_match :: Int + -> Int + -> Addr + -> IO REmatch + +build_re_match str_start str_end regs + = _casm_ ``%r=(int)(*(struct re_registers *)%0).num_regs;'' regs >>= \ len -> + match_reg_to_array regs len >>= \ (match_start,match_end,arr) -> + let + (1,x) = bounds arr + + bef = (str_start,match_start) -- $' + aft = (match_end,str_end) -- $` + lst = arr!x -- $+ + mtch = (match_start,match_end) -- $& + in + return (REmatch arr + bef + mtch + aft + lst) + where + match_reg_to_array regs len + = trundleIO regs (0,[]) len >>= \ (no,ls) -> + let + (st,end,ls') + = case ls of + [] -> (0,0,[]) + [(a,b)] -> (a,b,ls) + ((a,b):xs) -> (a,b,xs) + in + return + (st, + end, + array (1,max 1 (no-1)) + [ (i, x) | (i,x) <- zip [1..] ls']) + + trundleIO :: Addr + -> (Int,[(Int,Int)]) + -> Int + -> IO (Int,[(Int,Int)]) + + trundleIO regs (i,acc) len + | i==len = return (i,reverse acc) + | otherwise + = _casm_ ``%r = (int)(((struct re_registers *)%0)->start)[(int)%1];'' regs i >>= \ start -> + _casm_ ``%r = (int)(((struct re_registers *)%0)->end)[(int)%1];'' regs i >>= \ end -> + let + acc' = (start,end):acc + in + if (start == (-1)) && (end == (-1)) then + return (i,reverse acc) + else + trundleIO regs (i+1,acc') len +\end{code} + diff --git a/ghc/lib/misc/Set.lhs b/ghc/lib/misc/Set.lhs new file mode 100644 index 0000000..f21c0be --- /dev/null +++ b/ghc/lib/misc/Set.lhs @@ -0,0 +1,91 @@ +% +% (c) The AQUA Project, Glasgow University, 1994-1995 +% +\section[Set]{An implementation of sets} + +This new (94/04) implementation of sets sits squarely upon our +implementation of @FiniteMaps@. The interface is (roughly?) as +before. + +(95/08: This module is no longer part of the GHC compiler proper; it +is now just a GHC library module). + +\begin{code} +module Set ( + Set, -- abstract + -- instance of: Eq + + emptySet, -- :: Set a + mkSet, -- :: Ord a => [a] -> Set a + setToList, -- :: Set a -> [a] + unitSet, -- :: a -> Set a + singletonSet, -- :: a -> Set a + + union, -- :: Ord a => Set a -> Set a -> Set a + unionManySets, -- :: Ord a => [Set a] -> Set a + minusSet, -- :: Ord a => Set a -> Set a -> Set a + mapSet, -- :: Ord a => (b -> a) -> Set b -> Set a + intersect, -- :: Ord a => Set a -> Set a -> Set a + + elementOf, -- :: Ord a => a -> Set a -> Bool + isEmptySet, -- :: Set a -> Bool + + cardinality -- :: Set a -> Int + ) where + +import FiniteMap +import Maybe +\end{code} + +\begin{code} +-- This can't be a type synonym if you want to use constructor classes. +newtype Set a = MkSet (FiniteMap a ()) + +emptySet :: Set a +emptySet = MkSet emptyFM + +unitSet :: a -> Set a +unitSet x = MkSet (unitFM x ()) +singletonSet = unitSet -- old;deprecated. + +setToList :: Set a -> [a] +setToList (MkSet set) = keysFM set + +mkSet :: Ord a => [a] -> Set a +mkSet xs = MkSet (listToFM [ (x, ()) | x <- xs]) + +union :: Ord a => Set a -> Set a -> Set a +union (MkSet set1) (MkSet set2) = MkSet (plusFM set1 set2) + +unionManySets :: Ord a => [Set a] -> Set a +unionManySets ss = foldr union emptySet ss + +minusSet :: Ord a => Set a -> Set a -> Set a +minusSet (MkSet set1) (MkSet set2) = MkSet (minusFM set1 set2) + +intersect :: Ord a => Set a -> Set a -> Set a +intersect (MkSet set1) (MkSet set2) = MkSet (intersectFM set1 set2) + +elementOf :: Ord a => a -> Set a -> Bool +elementOf x (MkSet set) = isJust (lookupFM set x) + +isEmptySet :: Set a -> Bool +isEmptySet (MkSet set) = sizeFM set == 0 + +mapSet :: Ord a => (b -> a) -> Set b -> Set a +mapSet f (MkSet set) = MkSet (listToFM [ (f key, ()) | key <- keysFM set ]) + +cardinality :: Set a -> Int +cardinality (MkSet set) = sizeFM set + +-- fair enough... +instance (Eq a) => Eq (Set a) where + (MkSet set_1) == (MkSet set_2) = set_1 == set_2 + (MkSet set_1) /= (MkSet set_2) = set_1 /= set_2 + +-- but not so clear what the right thing to do is: +{- NO: +instance (Ord a) => Ord (Set a) where + (MkSet set_1) <= (MkSet set_2) = set_1 <= set_2 +-} +\end{code} diff --git a/ghc/lib/misc/Socket.lhs b/ghc/lib/misc/Socket.lhs new file mode 100644 index 0000000..34e99b6 --- /dev/null +++ b/ghc/lib/misc/Socket.lhs @@ -0,0 +1,192 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1995, 1996 +% +% Last Modified: Fri Jul 21 15:53:32 1995 +% Darren J Moffat +% +% Further hacked on by Sigbjorn Finne +% +\section[Socket]{Haskell 1.3 Socket bindings} + + +\begin{code} +{-# OPTIONS -#include "cbits/ghcSockets.h" #-} + +#include "config.h" + +module Socket ( + PortID(..), + Hostname, + + connectTo, -- :: Hostname -> PortID -> IO Handle + listenOn, -- :: PortID -> IO Socket + + accept, -- :: Socket -> IO (Handle, HostName) + + sendTo, -- :: Hostname -> PortID -> String -> IO () + recvFrom, -- :: Hostname -> PortID -> IO String + + socketPort -- :: Socket -> IO PortID + + ) where + +import BSD +import SocketPrim hiding ( accept, socketPort ) +import qualified SocketPrim ( accept, socketPort ) +import IO +\end{code} + +%*************************************************************************** +%* * +\subsection[Socket-Setup]{High Level ``Setup'' functions} +%* * +%*************************************************************************** + +Calling $connectTo$ creates a client side socket which is +connected to the given host and port. The Protocol and socket type is +derived from the given port identifier. If a port number is given +then the result is always an internet family $Stream$ socket. + +If the $PortID$ specifies a unix family socket and the $Hostname$ +differs from that returned by $getHostname$ then an error is +raised. Alternatively an empty string may be given to $connectTo$ +signalling that the current hostname applies. + +\begin{code} +data PortID = + Service String -- Service Name eg "ftp" + | PortNumber Int -- User defined Port Number +#ifndef cygwin32_TARGET_OS + | UnixSocket String -- Unix family socket in file system +#endif + +type Hostname = String +-- Maybe consider this alternative. +-- data Hostname = Name String | IP Int Int Int Int +\end{code} + +If more control over the socket type is required then $socketPrim$ +should be used instead. + +\begin{code} +connectTo :: Hostname -- Hostname + -> PortID -- Port Identifier + -> IO Handle -- Connected Socket + +connectTo hostname (Service serv) = + getProtocolNumber "tcp" >>= \ proto -> + socket AF_INET Stream proto >>= \ sock -> + getServicePortNumber serv >>= \ port -> + getHostByName hostname >>= \ (HostEntry _ _ _ haddrs) -> + connect sock (SockAddrInet port (head haddrs)) >> + socketToHandle sock ReadWriteMode >>= \ h -> + return h +connectTo hostname (PortNumber port) = + getProtocolNumber "tcp" >>= \ proto -> + socket AF_INET Stream proto >>= \ sock -> + getHostByName hostname >>= \ (HostEntry _ _ _ haddrs) -> + connect sock (SockAddrInet port (head haddrs)) >> + socketToHandle sock ReadWriteMode + +#ifndef cygwin32_TARGET_OS +connectTo _ (UnixSocket path) = + socket AF_UNIX Datagram 0 >>= \ sock -> + connect sock (SockAddrUnix path) >> + socketToHandle sock ReadWriteMode +#endif + +\end{code} + +The dual to the $connectTo$ call. This creates the server side +socket which has been bound to the specified port. + +\begin{code} +listenOn :: PortID -- Port Identifier + -> IO Socket -- Connected Socket + +listenOn (Service serv) = + getProtocolNumber "tcp" >>= \ proto -> + socket AF_INET Stream proto >>= \ sock -> + getServicePortNumber serv >>= \ port -> + bindSocket sock (SockAddrInet port iNADDR_ANY) >> + listen sock maxListenQueue >> + return sock +listenOn (PortNumber port) = + getProtocolNumber "tcp" >>= \ proto -> + socket AF_INET Stream proto >>= \ sock -> + bindSocket sock (SockAddrInet port iNADDR_ANY) >> + listen sock maxListenQueue >> + return sock +#ifndef cygwin32_TARGET_OS +listenOn (UnixSocket path) = + socket AF_UNIX Datagram 0 >>= \ sock -> + bindSocket sock (SockAddrUnix path) >> + return sock +#endif +\end{code} + +\begin{code} +accept :: Socket -- Listening Socket + -> IO (Handle, -- StdIO Handle for read/write + HostName) -- HostName of Peer socket + +accept sock = + SocketPrim.accept sock >>= \ (sock', (SockAddrInet _ haddr)) -> + getHostByAddr AF_INET haddr >>= \ (HostEntry peer _ _ _) -> + socketToHandle sock ReadWriteMode >>= \ handle -> + return (handle, peer) +\end{code} + +Send and recived data from/to the given host and port number. These +should normally only be used where the socket will not be required for +further calls. + +Thse are wrappers around socket, bind, and listen. + +\begin{code} +sendTo :: Hostname -- Hostname + -> PortID -- Port Number + -> String -- Message to send + -> IO () +sendTo h p msg = + connectTo h p >>= \ s -> + hPutStr s msg >> + hClose s + +recvFrom :: Hostname -- Hostname + -> PortID -- Port Number + -> IO String -- Received Data +recvFrom host port = + listenOn port >>= \ s -> + let + waiting = + SocketPrim.accept s >>= \ (s', (SockAddrInet _ haddr)) -> + getHostByAddr AF_INET haddr >>= \ (HostEntry peer _ _ _) -> + if peer /= host then + sClose s' >> + waiting + else + readSocketAll s' >>= \ msg -> + sClose s' >> + return msg + in + waiting >>= \ message -> + sClose s >> + return message +\end{code} + +Access function returning the port type/id of socket. + +\begin{code} +socketPort :: Socket -> IO PortID +socketPort s = + getSocketName s >>= \ sockaddr -> + return (case sockaddr of + SockAddrInet port _ -> + (PortNumber port) +#ifndef cygwin32_TARGET_OS + SockAddrUnix path -> + (UnixSocket path) +#endif + ) +\end{code} diff --git a/ghc/lib/misc/SocketPrim.lhs b/ghc/lib/misc/SocketPrim.lhs new file mode 100644 index 0000000..5991a91 --- /dev/null +++ b/ghc/lib/misc/SocketPrim.lhs @@ -0,0 +1,1093 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1995-1996 +% +\section[SocketPrim]{Low-level socket bindings} + +The @SocketPrim@ module is for when you want full control over the +sockets, something like what you have in C (which is very messy). + +\begin{code} +{-# OPTIONS -#include "stgio.h" -#include "cbits/ghcSockets.h" #-} + +#include "config.h" + +module SocketPrim ( + + Socket, + Family(..), + SocketType(..), + SockAddr(..), + HostAddress, + ShutdownCmd(..), + + socket, -- :: Family -> SocketType -> Int -> IO Socket + connect, -- :: Socket -> SockAddr -> IO () + bindSocket, -- :: Socket -> SockAddr -> IO () + listen, -- :: Socket -> Int -> IO () + accept, -- :: Socket -> IO (Socket, SockAddr) + getPeerName, -- :: Socket -> IO SockAddr + getSocketName, -- :: Socket -> IO SockAddr + + socketPort, -- :: Socket -> IO Int + + writeSocket, -- :: Socket -> String -> IO Int + readSocket, -- :: Socket -> Int -> IO (String, Int) + readSocketAll, -- :: Socket -> IO String + + socketToHandle, -- :: Socket -> IO Handle + +-- Alternative read/write interface not yet implemented. +-- sendto -- :: Socket -> String -> SockAddr -> IO Int +-- recvfrm -- :: Socket -> Int -> SockAddr -> IO (String, Int) +-- sendmsg -- :: Socket -> Message -> MsgFlags -> IO Int +-- recvmsg -- :: Socket -> MsgFlags -> IO Message + + shutdown, -- :: Socket -> ShutdownCmd -> IO () + sClose, -- :: Socket -> IO () + + inet_addr, -- :: String -> HostAddress + inet_ntoa, -- :: HostAddress -> String + + sIsConnected, -- :: Socket -> IO Bool + sIsBound, -- :: Socket -> IO Bool + sIsListening, -- :: Socket -> IO Bool + sIsReadable, -- :: Socket -> IO Bool + sIsWritable, -- :: Socket -> IO Bool + + +-- Special Constants + + aNY_PORT, + iNADDR_ANY, +-- sOL_SOCKET, + sOMAXCONN, + maxListenQueue, + + +-- The following are exported ONLY for use in the BSD module and +-- should not be used else where. + + packFamily, unpackFamily, + packSocketType, + packSockAddr, unpackSockAddr + +) where + +import GlaExts +import ST +import PrelIOBase -- IOError, Handle representation +import PrelHandle +import Foreign + +import Posix +import PosixUtil +import IO +import IOExts ( IORef, newIORef, readIORef, writeIORef ) +import PackedString ( unpackPS, byteArrayToPS, unpackCString, packCBytesST ) + +import Ix +\end{code} + + +%************************************************************************ +%* * +\subsection[Socket-SocketTypes]{Socket Types} +%* * +%************************************************************************ + + +There are a few possible ways to do this. The first is convert the +structs used in the C library into an equivalent Haskell type. An +other possible implementation is to keep all the internals in the C +code and use an Int\# and a status flag. The second method is used here +since a lot of the C structures are not required to be manipulated. +Originally the status was non mutable so we had to return a new socket +each time we changed the status. This version now uses mutable +variables to avoid the need to do this. The result is a cleaner +interface and better security since the application programmer now +can't circumvent the status information to perform invalid operations +on sockets. + + +\begin{code} +data SocketStatus + -- Returned Status Function called + = NotConnected -- socket + | Bound -- bindSocket + | Listening -- listen + | Connected -- connect/accept + | Error String -- Any + deriving (Eq, Show) + +data Socket + = MkSocket + Int -- File Descriptor Part + Family + SocketType + Int -- Protocol Number + (IORef SocketStatus) -- Status Flag +\end{code} + +The scheme used for addressing sockets is somewhat quirky. The +calls in the BSD socket API that need to know the socket address all +operate in terms of \tr{struct sockaddr}, a `virtual' type of socket address. +The Internet family of sockets are addressed as \tr{struct sockaddr\_in}, +so when calling functions that operate on \tr{struct sockaddr}, we have +to type cast the Internet socket address into a \tr{struct sockaddr}. By luck(!), +the two structures are of the same size. Same casting is required of other +families of sockets such as Xerox NS. Similarly for Unix domain sockets. + +To represent these socket addresses in Haskell-land, we do what BSD didn't do, +and use a union/algebraic type for the different families. Currently only +Unix domain sockets and the Internet family is supported. + +\begin{code} +type HostAddress = Word + +data SockAddr -- C Names +#ifndef cygwin32_TARGET_OS + = SockAddrUnix -- struct sockaddr_un + String -- sun_path + | +#else + = +#endif + SockAddrInet -- struct sockaddr_in + Int -- sin_port + HostAddress -- sin_addr + deriving Eq +\end{code} + + +%************************************************************************ +%* * +\subsection[Socket-Connections]{Connection Functions} +%* * +%************************************************************************ + +In the following connection and binding primitives. The names of the +equivalent C functions have been preserved where possible. It should +be noted that some of these names used in the C library, \tr{bind} in +particular, have a different meaning to many Haskell programmers and +have thus been renamed by appending the prefix Socket. + +Create an unconnected socket of the given family, type and protocol. +The most common invocation of $socket$ is the following: +\begin{verbatim} + ... + socket AF_INET Stream 6 >>= \ my_socket -> + ... +\end{verbatim} + +\begin{code} +socket :: Family -- Family Name (usually AF_INET) + -> SocketType -- Socket Type (usually Stream) + -> Int -- Protocol Number (getProtocolByName to find value) + -> IO Socket -- Unconnected Socket + +socket family stype protocol = do + status <- _ccall_ createSocket (packFamily family) + (packSocketType stype) + protocol + case status of + -1 -> constructErrorAndFail "socket" + n -> do + socket_status <- newIORef NotConnected + return (MkSocket n family stype protocol socket_status) +\end{code} + +Given a port number this {\em binds} the socket to that port. This +means that the programmer is only interested in data being sent to +that port number. The $Family$ passed to $bindSocket$ must +be the same as that passed to $socket$. If the special port +number $aNY\_PORT$ is passed then the system assigns the next +available use port. + +Port numbers for standard unix services can be found by calling +$getServiceEntry$. These are traditionally port numbers below +1000; although there are afew, namely NFS and IRC, which used higher +numbered ports. + +The port number allocated to a socket bound by using $aNY\_PORT$ can be +found by calling $port$ + +\begin{code} +bindSocket :: Socket -- Unconnected Socket + -> SockAddr -- Address to Bind to + -> IO () + +bindSocket (MkSocket s family stype protocol socketStatus) addr = do +#ifndef cygwin32_TARGET_OS + let isDomainSocket = if family == AF_UNIX then 1 else (0::Int) +#else + let isDomainSocket = 0 +#endif + currentStatus <- readIORef socketStatus + if currentStatus /= NotConnected + then + fail (userError ("bindSocket: can't peform bind on socket in status " ++ + show currentStatus)) + else do + addr' <- packSockAddr addr + let (_,sz) = boundsOfByteArray addr' + status <- _ccall_ bindSocket s addr' sz isDomainSocket + case status of + -1 -> constructErrorAndFail "bindSocket" + 0 -> writeIORef socketStatus (Bound) +\end{code} + + +Make a connection to an already opened socket on a given machine and port. +assumes that we have already called createSocket, othewise it will fail. + +This is the dual to $bindSocket$. The {\em server} process will +usually bind to a port number, the {\em client} will then connect to +the same port number. Port numbers of user applications are normally +agreed in advance, otherwise we must rely on some meta protocol for telling +the other side what port number we have been allocated. + +\begin{code} +connect :: Socket -- Unconnected Socket + -> SockAddr -- Socket address stuff + -> IO () + +connect (MkSocket s family stype protocol socketStatus) addr = do +#ifndef cygwin32_TARGET_OS + let isDomainSocket = if family == AF_UNIX then 1 else (0::Int) +#else + let isDomainSocket = 0 +#endif + currentStatus <- readIORef socketStatus + if currentStatus /= NotConnected + then + fail (userError ("connect: can't peform connect on socket in status " ++ + show currentStatus)) + else do + addr' <- packSockAddr addr + let (_,sz) = boundsOfByteArray addr' + status <- _ccall_ connectSocket s addr' sz isDomainSocket + case status of + -1 -> constructErrorAndFail "connect" + 0 -> writeIORef socketStatus Connected +\end{code} + +The programmer must call $listen$ to tell the system software +that they are now interested in receiving data on this port. This +must be called on the bound socket before any calls to read or write +data are made. + +The programmer also gives a number which indicates the length of the +incoming queue of unread messages for this socket. On most systems the +maximum queue length is around 5. To remove a message from the queue +for processing a call to $accept$ should be made. + +\begin{code} +listen :: Socket -- Connected & Bound Socket + -> Int -- Queue Length + -> IO () + +listen (MkSocket s family stype protocol socketStatus) backlog = do + currentStatus <- readIORef socketStatus + if currentStatus /= Bound + then + fail (userError ("listen: can't peform listen on socket in status " ++ + show currentStatus)) + else do + status <- _ccall_ listenSocket s backlog + case status of + -1 -> constructErrorAndFail "listen" + 0 -> writeIORef socketStatus Listening +\end{code} + +A call to $accept$ only returns when data is available on the given +socket, unless the socket has been set to non-blocking. It will +return a new socket which should be used to read the incoming data and +should then be closed. Using the socket returned by $accept$ allows +incoming requests to be queued on the original socket. + +\begin{code} +accept :: Socket -- Queue Socket + -> IO (Socket, -- Readable Socket + SockAddr) -- Peer details + +accept sock@(MkSocket s family stype protocol status) = do + currentStatus <- readIORef status + okay <- sIsAcceptable sock + if not okay + then + fail (userError ("accept: can't peform accept on socket in status " ++ + show currentStatus)) + else do + (ptr, sz) <- allocSockAddr family + int_star <- stToIO (newIntArray (0,1)) + stToIO (writeIntArray int_star 0 sz) + sock <- _ccall_ acceptSocket s ptr int_star + case sock of + -1 -> constructErrorAndFail "accept" + _ -> do + sz <- stToIO (readIntArray int_star 0) + addr <- unpackSockAddr ptr sz + status <- newIORef Connected + return ((MkSocket sock family stype protocol status), addr) +\end{code} + +%************************************************************************ +%* * +\subsection[Socket-DataPass]{Data Passing Primitives} +%* * +%************************************************************************ + +To allow Haskell to talk to C programs we need to be able to +communicate in terms of byte streams. @writeSocket@ and +@readSocket@ should only be used for this purpose and not for +communication between Haskell programs. Haskell programs should use +the 1.3 IO hPutStr and associated machinery for communicating with +each other. + + +\begin{code} +writeSocket :: Socket -- Connected Socket + -> String -- Data to send + -> IO Int -- Number of Bytes sent + +writeSocket (MkSocket s family stype protocol status) xs = do + currentStatus <- readIORef status + if not ((currentStatus /= Connected) || (currentStatus /= Listening)) + then + fail (userError ("writeSocket: can't peform write on socket in status " ++ + show currentStatus)) + else do + nbytes <- _ccall_ writeDescriptor s xs (length xs) + case nbytes of + -1 -> constructErrorAndFail "writeSocket" + _ -> return nbytes + +readSocket :: Socket -- Connected Socket + -> Int -- Number of Bytes to Read + -> IO (String, Int) -- (Data Read, Number of Bytes) + +readSocket (MkSocket s family stype protocol status) nbytes = do + currentStatus <- readIORef status + if not ((currentStatus /= Connected) || (currentStatus /= Listening)) + then + fail (userError ("readSocket: can't perform read on socket in status " ++ + show currentStatus)) + else do + ptr <- stToIO (newCharArray (0, nbytes)) + nbytes <- _ccall_ readDescriptor s ptr nbytes + case nbytes of + -1 -> constructErrorAndFail "readSocket" + n -> do + barr <- stToIO (unsafeFreezeByteArray ptr) + return (unpackPS (byteArrayToPS barr), n) + +readSocketAll :: Socket -> IO String +readSocketAll s = + let + loop xs = + catch + (readSocket s 4096 >>= \ (str, nbytes) -> + if nbytes /= 0 then + loop (str ++ xs) + else + return xs) + (\ _ -> return xs) + in + loop "" +\end{code} + +The port number the given socket is currently connected to can be +determined by calling $port$, is generally only useful when bind +was given $aNY\_PORT$. + +\begin{code} +socketPort :: Socket -- Connected & Bound Socket + -> IO Int -- Port Number of Socket +socketPort sock@(MkSocket s AF_INET stype protocol status) = + getSocketName sock >>= \(SockAddrInet port _) -> + return port +socketPort (MkSocket s family stype protocol status) = + fail (userError ("socketPort: not supported for Family " ++ show family)) +\end{code} + +Calling $getPeerName$ returns the address details of the machine, +other than the local one, which is connected to the socket. This is +used in programs such as FTP to determine where to send the returning +data. The corresponding call to get the details of the local machine +is $getSocketName$. + +\begin{code} +getPeerName :: Socket -> IO SockAddr + +getPeerName (MkSocket s family stype protocol status) = do + (ptr, sz) <- allocSockAddr family + int_star <- stToIO (newIntArray (0,1)) + stToIO (writeIntArray int_star 0 sz) + status <- _ccall_ getPeerName s ptr int_star + case status of + -1 -> constructErrorAndFail "getPeerName" + _ -> do + sz <- stToIO (readIntArray int_star 0) + unpackSockAddr ptr sz + +getSocketName :: Socket -> IO SockAddr + +getSocketName (MkSocket s family stype protocol status) = do + (ptr, sz) <- allocSockAddr family + int_star <- stToIO (newIntArray (0,1)) + stToIO (writeIntArray int_star 0 sz) + status <- _ccall_ getSockName s ptr int_star + case status of + -1 -> constructErrorAndFail "getSocketName" + _ -> do + sz <- stToIO (readIntArray int_star 0) + unpackSockAddr ptr sz + + +\end{code} + + +%************************************************************************ +%* * +\subsection[Socket-Properties]{Socket Properties} +%* * +%************************************************************************ + +\begin{code} +{- +data SocketOption = + Debug + | AcceptConnection + | ReuseAddr + | KeepAlive + | DontRoute + | Broadcast + | UseLoopBack + | Linger + | OOBInline + | SendBuffer + | RecvBuffer + | SendLowWater + | RecvLowWater + | SendTimeOut + | RecvTimeOut + | Error + | Type + +sOL_SOCKET = ``SOL_SOCKET'' + +setSocketOptions :: Socket -> + Int -> -- Level + SocketOption -> -- Option Name + String -> -- Option Value + IO () + +getSocketOptons :: Socket -> + Int -> -- Level + SocketOption -> -- Option Name + IO String -- Option Value +-} +\end{code} + +A calling sequence table for the main functions is shown in the table below. + +\begin{figure}[h] +\begin{center} +\begin{tabular}{|l|c|c|c|c|c|c|c|} +\hline +{\bf A Call to} & socket & connect & bindSocket & listen & accept & read & write \\ +\hline +{\bf Precedes} & & & & & & & \\ +\hline +socket & & & & & & & \\ +\hline +connect & + & & & & & & \\ +\hline +bindSocket & + & & & & & & \\ +\hline +listen & & & + & & & & \\ +\hline +accept & & & & + & & & \\ +\hline +read & & + & & + & + & + & + \\ +\hline +write & & + & & + & + & + & + \\ +\hline +\end{tabular} +\caption{Sequence Table for Major functions of Socket} +\label{tab:api-seq} +\end{center} +\end{figure} + +%************************************************************************ +%* * +\subsection[Socket-OSDefs]{OS Dependent Definitions} +%* * +%************************************************************************ + + +The following Family and Socket Type declarations were manually derived +from /usr/include/sys/socket.h on the appropriate machines. + +Maybe a configure script that could parse the socket.h file to produce +the following declaration is required to make it ``portable'' rather than +using the dreaded \#ifdefs. + +Presently only the following machine/os combinations are supported: + +\begin{itemize} +\item Intelx86/Linux +\item SPARC/SunOS +\item SPARC/Solaris +\item Alpha/OSF +\item HPPA/HPUX9 +\item MIPS/IRIX6.2 +\end{itemize} + +\begin{code} +unpackFamily :: Int -> Family +packFamily :: Family -> Int + +packSocketType :: SocketType -> Int + + +#if sunos4_TARGET_OS || solaris2_TARGET_OS + +data Family = + AF_UNSPEC -- unspecified + | AF_UNIX -- local to host (pipes, portals + | AF_INET -- internetwork: UDP, TCP, etc + | AF_IMPLINK -- arpanet imp addresses + | AF_PUP -- pup protocols: e.g. BSP + | AF_CHAOS -- mit CHAOS protocols + | AF_NS -- XEROX NS protocols + | AF_NBS -- nbs protocols + | AF_ECMA -- european computer manufacturers + | AF_DATAKIT -- datakit protocols + | AF_CCITT -- CCITT protocols, X.25 etc + | AF_SNA -- IBM SNA + | AF_DECnet -- DECnet + | AF_DLI -- Direct data link interface + | AF_LAT -- LAT + | AF_HYLINK -- NSC Hyperchannel + | AF_APPLETALK -- Apple Talk + | AF_NIT -- Network Interface Tap + | AF_802 -- IEEE 802.2, also ISO 8802 + | AF_OSI -- umbrella of all families used by OSI + | AF_X25 -- CCITT X.25 + | AF_OSINET -- AFI + | AF_GOSSIP -- US Government OSI + | AF_IPX -- Novell Internet Protocol + deriving (Eq, Ord, Ix, Show) + +packFamily = index (AF_UNSPEC, AF_IPX) +unpackFamily family = (range (AF_UNSPEC, AF_IPX))!!family + +#endif + +#if cygwin32_TARGET_OS + +data Family = + AF_UNSPEC -- unspecified + --NOT SUPPORTED: AF_UNIX -- local to host (pipes, portals) + | AF_INET -- internetwork: UDP, TCP, etc + | AF_IMPLINK -- arpanet imp addresses + | AF_PUP -- pup protocols: e.g. BSP + | AF_CHAOS -- mit CHAOS protocols + | AF_NS -- XEROX NS protocols + | AF_ISO -- ISO protocols + | AF_OSI -- OSI protocols + | AF_ECMA -- european computer manufacturers + | AF_DATAKIT -- datakit protocols + | AF_CCITT -- CCITT protocols, X.25 etc + | AF_SNA -- IBM SNA + | AF_DECnet -- DECnet + | AF_DLI -- Direct data link interface + | AF_LAT -- LAT + | AF_HYLINK -- NSC Hyperchannel + | AF_APPLETALK -- Apple Talk + | AF_NETBIOS -- NetBios-style addresses + deriving (Eq, Ord, Ix, Show) + +packFamily = index (AF_UNSPEC, AF_NETBIOS) +unpackFamily family = (range (AF_UNSPEC, AF_NETBIOS))!!family + + +#endif + +#if hpux_TARGET_OS + +data Family = + AF_UNSPEC -- unspecified + | AF_UNIX -- local to host (pipes, portals + | AF_INET -- internetwork: UDP, TCP, etc + | AF_IMPLINK -- arpanet imp addresses + | AF_PUP -- pup protocols: e.g. BSP + | AF_CHAOS -- mit CHAOS protocols + | AF_NS -- XEROX NS protocols + | AF_NBS -- nbs protocols + | AF_ECMA -- european computer manufacturers + | AF_DATAKIT -- datakit protocols + | AF_CCITT -- CCITT protocols, X.25 etc + | AF_SNA -- IBM SNA + | AF_DECnet -- DECnet + | AF_DLI -- Direct data link interface + | AF_LAT -- LAT + | AF_HYLINK -- NSC Hyperchannel + | AF_APPLETALK -- Apple Talk + | AF_NIT -- Network Interface Tap + deriving (Eq, Ord, Ix, Show) + +packFamily = index (AF_UNSPEC, AF_NIT) +unpackFamily family = (range (AF_UNSPEC, AF_NIT))!!family + +#endif + +#if osf1_TARGET_OS + +data Family = + AF_UNSPEC -- unspecified + | AF_UNIX -- local to host (pipes, portals) + | AF_INET -- internetwork: UDP, TCP, etc. + | AF_IMPLINK -- arpanet imp addresses + | AF_PUP -- pup protocols: e.g. BSP + | AF_CHAOS -- mit CHAOS protocols + | AF_NS -- XEROX NS protocols + | AF_ISO -- ISO protocols + | AF_ECMA -- european computer manufacturers + | AF_DATAKIT -- datakit protocols + | AF_CCITT -- CCITT protocols, X.25 etc + | AF_SNA -- IBM SNA + | AF_DECnet -- DECnet + | AF_DLI -- DEC Direct data link interface + | AF_LAT -- LAT + | AF_HYLINK -- NSC Hyperchannel + | AF_APPLETALK -- Apple Talk + | AF_ROUTE -- Internal Routing Protocol + | AF_LINK -- Link layer interface + | Pseudo_AF_XTP -- eXpress Transfer Protocol (no AF) + | AF_NETMAN -- DNA Network Management + | AF_X25 -- X25 protocol + | AF_CTF -- Common Trace Facility + | AF_WAN -- Wide Area Network protocols + deriving (Eq, Ord, Ix, Show) + +packFamily = index (AF_UNSPEC, AF_WAN) +unpackFamily family = (range (AF_UNSPEC, AF_WAN))!!family +#endif + +#if linux_TARGET_OS + +data Family = + AF_UNSPEC + | AF_UNIX + | AF_INET + | AF_AX25 + | AF_IPX + deriving (Eq, Ord, Ix, Show) + +packFamily = index (AF_UNSPEC, AF_IPX) +unpackFamily family = (range (AF_UNSPEC, AF_IPX))!!family + +#endif + +#if irix_TARGET_OS + +data Family = + AF_UNSPEC -- unspecified + | AF_UNIX -- backward compatibility + | AF_INET -- internetwork: UDP, TCP, etc. + | AF_IMPLINK -- arpanet imp addresses + | AF_PUP -- pup protocols: e.g. BSP + | AF_CHAOS -- mit CHAOS protocols + | AF_NS -- XEROX NS protocols + | AF_ISO -- ISO protocols + | AF_ECMA -- european computer manufacturers + | AF_DATAKIT -- datakit protocols + | AF_CCITT -- CCITT protocols, X.25 etc + | AF_SNA -- IBM SNA + | AF_DECnet -- DECnet + | AF_DLI -- DEC Direct data link interface + | AF_LAT -- LAT + | AF_HYLINK -- NSC Hyperchannel + | AF_APPLETALK -- Apple Talk + | AF_ROUTE -- Internal Routing Protocol + | AF_RAW -- Link layer interface + +-- these two overlap AF_ROUTE and AF_RAW +-- | AF_NIT -- Network Interface Tap +-- | AF_802 -- IEEE 802.2, also ISO 8802 + + | AF_OSI -- umbrella for all families used by OSI + | AF_X25 -- CCITT X.25 + | AF_OSINET -- AFI + | AF_GOSIP -- US Government OSI + + | AF_SDL -- SGI Data Link for DLPI + | AF_INET6 -- Internet Protocol version 6 + | AF_LINK -- Link layer interface + deriving (Eq, Ord, Ix, Show) + +packFamily = index (AF_UNSPEC, AF_LINK) +unpackFamily family = (range (AF_UNSPEC, AF_LINK))!!family + +#endif + +#if aix_TARGET_OS + +data Family = + AF_UNSPEC -- unspecified + | AF_UNIX -- local to host (pipes, portals) + | AF_INET -- internetwork: UDP, TCP, etc. + | AF_IMPLINK -- arpanet imp addresses + | AF_PUP -- pup protocols: e.g. BSP + | AF_CHAOS -- mit CHAOS protocols + | AF_NS -- XEROX NS protocols + | AF_ISO -- ISO protocols +-- | AF_OSI is the same as AF_ISO on AIX + | AF_ECMA -- european computer manufacturers + | AF_DATAKIT -- datakit protocols + | AF_CCITT -- CCITT protocols, X.25 etc + | AF_SNA -- IBM SNA + | AF_DECnet -- DECnet + | AF_DLI -- DEC Direct data link interface + | AF_LAT -- LAT + | AF_HYLINK -- NSC Hyperchannel + | AF_APPLETALK -- Apple Talk + | AF_ROUTE -- Internal Routing Protocol + | AF_LINK -- Link layer interface + | Pseudo_AF_XTP -- eXpress Transfer Protocol (no AF) + | AF_INTF -- Debugging use only + | AF_RIF -- raw interface + | AF_NETWARE + | AF_NDD + | AF_MAX + deriving (Eq, Ord, Ix, Show) + +packFamily = index (AF_UNSPEC, AF_MAX) +unpackFamily family = (range (AF_UNSPEC, AF_MAX))!!family + +#endif + +#if freebsd_TARGET_OS + +data Family = + AF_UNSPEC -- unspecified + | AF_UNIX -- local to host (pipes, portals) + | AF_INET -- internetwork: UDP, TCP, etc. + | AF_IMPLINK -- arpanet imp addresses + | AF_PUP -- pup protocols: e.g. BSP + | AF_CHAOS -- mit CHAOS protocols + | AF_NS -- XEROX NS protocols + | AF_ISO -- ISO protocols +-- | AF_OSI is the same as AF_ISO + | AF_ECMA -- european computer manufacturers + | AF_DATAKIT -- datakit protocols + | AF_CCITT -- CCITT protocols, X.25 etc + | AF_SNA -- IBM SNA + | AF_DECnet -- DECnet + | AF_DLI -- DEC Direct data link interface + | AF_LAT -- LAT + | AF_HYLINK -- NSC Hyperchannel + | AF_APPLETALK -- Apple Talk + | AF_ROUTE -- Internal Routing Protocol + | AF_LINK -- Link layer interface + | Pseudo_AF_XTP -- eXpress Transfer Protocol (no AF) + | AF_COIP -- connection-oriented IP, aka ST II + | AF_CNT -- Computer Network Technology + | Psuedo_AF_RTIP -- Help Identify RTIP packets + | AF_IPX -- Novell Internet Protocol + | AF_SIP -- Simple Internet Protocol + | Pseudo_AF_PIP -- Help Identify PIP packets + | AF_ISDN -- Integrated Services Digital Network +-- | AF_E164 is the same as AF_ISDN + | Pseudo_AF_KEY -- Internal key-management function + | AF_INET6 -- IPv6 + | AF_MAX + deriving (Eq, Ord, Ix, Show) + +packFamily = index (AF_UNSPEC, AF_MAX) +unpackFamily family = (range (AF_UNSPEC, AF_MAX))!!family + +#endif + +-- Alpha running OSF or a SPARC with SunOS, rather than Solaris. + +#if osf1_TARGET_OS || sunos4_TARGET_OS || hpux_TARGET_OS || aix_TARGET_OS \ + || freebsd_TARGET_OS +data SocketType = + Stream + | Datagram + | Raw + | RDM + | SeqPacket + deriving (Eq, Ord, Ix, Show) + +packSocketType stype = 1 + (index (Stream, SeqPacket) stype) +#endif + +-- This is for a box running cygwin32 toolchain. + +#if defined(cygwin32_TARGET_OS) +data SocketType = + Stream + | Datagram + | Raw + | RDM -- reliably delivered msg + | SeqPacket + | Packet + deriving (Eq, Ord, Ix, Show) + +packSocketType stype = + case stype of + Stream -> ``SOCK_STREAM'' + Datagram -> ``SOCK_DGRAM'' + Raw -> ``SOCK_RAW'' + RDM -> ``SOCK_RDM'' + SeqPacket -> ``SOCK_SEQPACKET'' + Packet -> ``SOCK_PACKET'' + +#endif + +-- This is a Sun running Solaris rather than SunOS or SGI running IRIX + +#if defined(solaris2_TARGET_OS) || defined(irix_TARGET_OS) +data SocketType = + Datagram + | Stream + | NC_TPI_COTS_ORD + | Raw + | RDM + | SeqPacket + deriving (Eq, Ord, Ix, Show) + +packSocketType stype = 1 + (index (Datagram, SeqPacket) stype) +#endif + + +#if linux_TARGET_OS +data SocketType = + Stream + | Datagram + | Raw + | RDM + | SeqPacket + | Packet + deriving (Eq, Ord, Ix, Show) + +packSocketType stype = 1 + (index (Stream, Packet) stype) +#endif +\end{code} + +%************************************************************************ +%* * +\subsection[Socket-Util]{Utility Functions} +%* * +%************************************************************************ + +\begin{code} +aNY_PORT = 0::Int +iNADDR_ANY = ``INADDR_ANY''::Word +sOMAXCONN = ``SOMAXCONN''::Int +maxListenQueue = sOMAXCONN + +------------------------------------------------------------------------------- +data ShutdownCmd + = ShutdownReceive + | ShutdownSend + | ShutdownBoth + +sdownCmdToInt :: ShutdownCmd -> Int +sdownCmdToInt ShutdownReceive = 0 +sdownCmdToInt ShutdownSend = 1 +sdownCmdToInt ShutdownBoth = 2 + +shutdown :: Socket -> ShutdownCmd -> IO () +shutdown (MkSocket s _ _ _ _) stype = do + let t = sdownCmdToInt stype + status <- _ccall_ shutdownSocket s t + case status of + -1 -> constructErrorAndFail "shutdown" + _ -> return () + +------------------------------------------------------------------------------- + +sClose :: Socket -> IO () +sClose (MkSocket s family stype protocol status) = _ccall_ close s + +------------------------------------------------------------------------------- + +inet_addr :: String -> HostAddress +inet_addr ipstr = unsafePerformIO (_ccall_ inet_addr ipstr) + +------------------------------------------------------------------------------- + +inet_ntoa :: HostAddress -> String +inet_ntoa haddr = unsafePerformIO ( + _casm_ ``struct in_addr addr; + addr.s_addr = htonl(%0); + %r = inet_ntoa (addr);'' haddr >>= \ str -> + return (unpackCString str)) + +------------------------------------------------------------------------------- + +sIsConnected :: Socket -> IO Bool +sIsConnected (MkSocket s family stype protocol status) = do + value <- readIORef status + return (value == Connected) + +------------------------------------------------------------------------------- + +sIsBound :: Socket -> IO Bool +sIsBound (MkSocket s family stype protocol status) = do + value <- readIORef status + return (value == Bound) + +------------------------------------------------------------------------------- + +sIsListening :: Socket -> IO Bool +sIsListening (MkSocket s family stype protocol status) = do + value <- readIORef status + return (value == Listening) + +------------------------------------------------------------------------------- + +sIsReadable :: Socket -> IO Bool +sIsReadable (MkSocket s family stype protocol status) = do + value <- readIORef status + return (value == Listening || value == Connected) + +------------------------------------------------------------------------------- + +sIsWritable :: Socket -> IO Bool +sIsWritable = sIsReadable + +------------------------------------------------------------------------------- + +sIsAcceptable :: Socket -> IO Bool +#ifndef cygwin32_TARGET_OS +sIsAcceptable (MkSocket s AF_UNIX Stream protocol status) = do + value <- readIORef status + return (value == Connected || value == Bound || value == Listening) +sIsAcceptable (MkSocket s AF_UNIX _ protocol status) = + return False +#endif +sIsAcceptable (MkSocket s _ stype protocol status) = do + value <- readIORef status + return (value == Connected || value == Listening) + +------------------------------------------------------------------------------- + +{- +sSetBlocking :: Socket -> Bool -> IO () +sIsBlocking :: Socket -> IO Bool +-} + +------------------------------------------------------------------------------- + +allocSockAddr :: Family -> IO (MutableByteArray RealWorld Int, Int) + +#ifndef cygwin32_TARGET_OS +allocSockAddr AF_UNIX = do + ptr <- stToIO (newCharArray (0,``sizeof(struct sockaddr_un)'')) + let (_,sz) = boundsOfByteArray ptr + return (ptr, sz) +#endif + +allocSockAddr AF_INET = do + ptr <- stToIO (newCharArray (0,``sizeof(struct sockaddr_in)'')) + let (_,sz) = boundsOfByteArray ptr + return (ptr, sz) + +------------------------------------------------------------------------------- + +unpackSockAddr :: MutableByteArray RealWorld Int -> Int -> IO SockAddr +unpackSockAddr arr len = do + fam <- _casm_ ``%r = ((struct sockaddr*)%0)->sa_family;'' arr + case unpackFamily fam of +#ifndef cygwin32_TARGET_OS + AF_UNIX -> unpackSockAddrUnix arr (len - ``sizeof(short)'') +#endif + AF_INET -> unpackSockAddrInet arr + +------------------------------------------------------------------------------- + +{- + sun_path is *not* NULL terminated, hence we *do* the need to know the + length of it. +-} +#ifndef cygwin32_TARGET_OS +unpackSockAddrUnix :: (MutableByteArray RealWorld Int) -> Int -> IO SockAddr +unpackSockAddrUnix ptr len = do + char_star <- _casm_ ``%r = ((struct sockaddr_un*)%0)->sun_path;'' ptr + path <- stToIO (packCBytesST len char_star) + return (SockAddrUnix (unpackPS path)) +#endif +------------------------------------------------------------------------------- + +unpackSockAddrInet :: (MutableByteArray RealWorld Int) -> IO SockAddr +unpackSockAddrInet ptr = do + port <- _casm_ ``%r = ntohs(((struct sockaddr_in*)%0)->sin_port);'' ptr + addr <- _casm_ ``%r = ntohl(((struct sockaddr_in*)%0)->sin_addr.s_addr);'' + ptr + return (SockAddrInet port addr) + +------------------------------------------------------------------------------- + + +packSockAddr :: SockAddr -> IO (MutableByteArray RealWorld Int) +#ifndef cygwin32_TARGET_OS +packSockAddr (SockAddrUnix path) = do + (ptr,_) <- allocSockAddr AF_UNIX + _casm_ ``(((struct sockaddr_un *)%0)->sun_family) = AF_UNIX;'' + ptr + _casm_ ``strcpy ((((struct sockaddr_un *)%0)->sun_path),%1);'' + ptr path + return ptr +#endif +packSockAddr (SockAddrInet port address) = do + (ptr,_) <- allocSockAddr AF_INET + _casm_ ``(((struct sockaddr_in *)%0)->sin_family) = AF_INET;'' + ptr + _casm_ ``(((struct sockaddr_in *)%0)->sin_port) = htons((int)%1);'' + ptr port + _casm_ ``(((struct sockaddr_in *)%0)->sin_addr.s_addr) = htonl(%1);'' + ptr address + return ptr + +------------------------------------------------------------------------------- +\end{code} + +@socketHandle@ turns a @Socket@ into a 1.3 @Handle@. By default, the new +handle will not be buffered, use @hSetBuffering@ if you want to change +it subsequently. + +\begin{code} +#ifndef __PARALLEL_HASKELL__ +socketToHandle :: Socket -> IOMode -> IO Handle + +socketToHandle (MkSocket s family stype protocol status) m = do + ptr <- _casm_ ``%r = fdopen (%0, (char *)%1);'' s m' + fp <- makeForeignObj ptr (``&freeFile'' :: Addr) + hndl <- newHandle (htype fp Nothing False) + hSetBuffering hndl NoBuffering + return hndl + where + m' = + case m of + ReadMode -> "r" + WriteMode -> "w" + AppendMode -> "a" + ReadWriteMode -> "r+" + htype = + case m of + ReadMode -> ReadHandle + WriteMode -> WriteHandle + AppendMode -> AppendHandle + ReadWriteMode -> ReadWriteHandle +#else +socketToHandle (MkSocket s family stype protocol status) m = + error "socketToHandle not implemented in a parallel setup" +#endif +\end{code} + diff --git a/ghc/lib/misc/Util.lhs b/ghc/lib/misc/Util.lhs new file mode 100644 index 0000000..d09c6d7 --- /dev/null +++ b/ghc/lib/misc/Util.lhs @@ -0,0 +1,816 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1996 +% +\section[Util]{Highly random utility functions} + +\begin{code} +#if defined(COMPILING_GHC) +# include "HsVersions.h" +# define IF_NOT_GHC(a) {--} +#else +# define panic error +# define TAG_ Ordering +# define LT_ LT +# define EQ_ EQ +# define GT_ GT +# define _LT LT +# define _EQ EQ +# define _GT GT +# define GT__ _ +# define tagCmp_ compare +# define _tagCmp compare +# define FAST_STRING String +# define ASSERT(x) {-nothing-} +# define IF_NOT_GHC(a) a +# define COMMA , +#endif + +#ifndef __GLASGOW_HASKELL__ +# undef TAG_ +# undef LT_ +# undef EQ_ +# undef GT_ +# undef tagCmp_ +#endif + +module Util ( + -- Haskell-version support +#ifndef __GLASGOW_HASKELL__ + tagCmp_, + TAG_(..), +#endif + -- general list processing + IF_NOT_GHC(forall COMMA exists COMMA) + zipEqual, zipWithEqual, zipWith3Equal, zipWith4Equal, + zipLazy, + mapAndUnzip, mapAndUnzip3, + nOfThem, lengthExceeds, isSingleton, + startsWith, endsWith, +#if defined(COMPILING_GHC) + isIn, isn'tIn, +#endif + + -- association lists + assoc, + + -- duplicate handling + hasNoDups, equivClasses, runs, removeDups, + + -- sorting + IF_NOT_GHC(quicksort COMMA stableSortLt COMMA mergesort COMMA) + sortLt, + IF_NOT_GHC(mergeSort COMMA) naturalMergeSortLe, -- from Carsten + IF_NOT_GHC(naturalMergeSort COMMA mergeSortLe COMMA) + + -- transitive closures + transitiveClosure, + + -- accumulating + mapAccumL, mapAccumR, mapAccumB, + + -- comparisons +#if defined(COMPILING_GHC) + thenCmp, cmpList, + cmpPString, +#else + cmpString, +#endif + + -- pairs + IF_NOT_GHC(cfst COMMA applyToPair COMMA applyToFst COMMA) + IF_NOT_GHC(applyToSnd COMMA foldPair COMMA) + unzipWith + + -- error handling +#if defined(COMPILING_GHC) + , panic, panic#, pprPanic, pprPanic#, pprError, pprTrace + , assertPanic +#endif {- COMPILING_GHC -} + + ) where + +#if defined(COMPILING_GHC) + +CHK_Ubiq() -- debugging consistency check +IMPORT_1_3(List(zipWith4)) + +import Pretty +#else +import List(zipWith4) +#endif + +infixr 9 `thenCmp` +\end{code} + +%************************************************************************ +%* * +\subsection[Utils-version-support]{Functions to help pre-1.2 versions of (non-Glasgow) Haskell} +%* * +%************************************************************************ + +This is our own idea: +\begin{code} +#ifndef __GLASGOW_HASKELL__ +data TAG_ = LT_ | EQ_ | GT_ + +tagCmp_ :: Ord a => a -> a -> TAG_ +tagCmp_ a b = if a == b then EQ_ else if a < b then LT_ else GT_ +#endif +\end{code} + +%************************************************************************ +%* * +\subsection[Utils-lists]{General list processing} +%* * +%************************************************************************ + +Quantifiers are not standard in Haskell. The following fill in the gap. + +\begin{code} +forall :: (a -> Bool) -> [a] -> Bool +forall pred [] = True +forall pred (x:xs) = pred x && forall pred xs + +exists :: (a -> Bool) -> [a] -> Bool +exists pred [] = False +exists pred (x:xs) = pred x || exists pred xs +\end{code} + +A paranoid @zip@ (and some @zipWith@ friends) that checks the lists +are of equal length. Alastair Reid thinks this should only happen if +DEBUGging on; hey, why not? +[In the GHC syslib, we want the paranoid behaviour by default --SOF] + +\begin{code} +zipEqual :: String -> [a] -> [b] -> [(a,b)] +zipWithEqual :: String -> (a->b->c) -> [a]->[b]->[c] +zipWith3Equal :: String -> (a->b->c->d) -> [a]->[b]->[c]->[d] +zipWith4Equal :: String -> (a->b->c->d->e) -> [a]->[b]->[c]->[d]->[e] + +#if (!defined(DEBUG)) && defined(COMPILING_GHC) +zipEqual _ = zip +zipWithEqual _ = zipWith +zipWith3Equal _ = zipWith3 +zipWith4Equal _ = zipWith4 +#else +zipEqual msg [] [] = [] +zipEqual msg (a:as) (b:bs) = (a,b) : zipEqual msg as bs +zipEqual msg as bs = panic ("zipEqual: unequal lists:"++msg) + +zipWithEqual msg z (a:as) (b:bs)= z a b : zipWithEqual msg z as bs +zipWithEqual msg _ [] [] = [] +zipWithEqual msg _ _ _ = panic ("zipWithEqual: unequal lists:"++msg) + +zipWith3Equal msg z (a:as) (b:bs) (c:cs) + = z a b c : zipWith3Equal msg z as bs cs +zipWith3Equal msg _ [] [] [] = [] +zipWith3Equal msg _ _ _ _ = panic ("zipWith3Equal: unequal lists:"++msg) + +zipWith4Equal msg z (a:as) (b:bs) (c:cs) (d:ds) + = z a b c d : zipWith4Equal msg z as bs cs ds +zipWith4Equal msg _ [] [] [] [] = [] +zipWith4Equal msg _ _ _ _ _ = panic ("zipWith4Equal: unequal lists:"++msg) +#endif +\end{code} + +\begin{code} +-- zipLazy is lazy in the second list (observe the ~) + +zipLazy :: [a] -> [b] -> [(a,b)] +zipLazy [] ys = [] +zipLazy (x:xs) ~(y:ys) = (x,y) : zipLazy xs ys +\end{code} + +\begin{code} +mapAndUnzip :: (a -> (b, c)) -> [a] -> ([b], [c]) + +mapAndUnzip f [] = ([],[]) +mapAndUnzip f (x:xs) + = let + (r1, r2) = f x + (rs1, rs2) = mapAndUnzip f xs + in + (r1:rs1, r2:rs2) + +mapAndUnzip3 :: (a -> (b, c, d)) -> [a] -> ([b], [c], [d]) + +mapAndUnzip3 f [] = ([],[],[]) +mapAndUnzip3 f (x:xs) + = let + (r1, r2, r3) = f x + (rs1, rs2, rs3) = mapAndUnzip3 f xs + in + (r1:rs1, r2:rs2, r3:rs3) +\end{code} + +\begin{code} +nOfThem :: Int -> a -> [a] +nOfThem = replicate -- deprecated. + +lengthExceeds :: [a] -> Int -> Bool + +[] `lengthExceeds` n = 0 > n +(x:xs) `lengthExceeds` n = (1 > n) || (xs `lengthExceeds` (n - 1)) + +isSingleton :: [a] -> Bool + +isSingleton [x] = True +isSingleton _ = False + +startsWith, endsWith :: String -> String -> Maybe String + +startsWith [] str = Just str +startsWith (c:cs) (s:ss) + = if c /= s then Nothing else startsWith cs ss +startsWith _ [] = Nothing + +endsWith cs ss + = case (startsWith (reverse cs) (reverse ss)) of + Nothing -> Nothing + Just rs -> Just (reverse rs) +\end{code} + +Debugging/specialising versions of \tr{elem} and \tr{notElem} +\begin{code} +#if defined(COMPILING_GHC) +isIn, isn'tIn :: (Eq a) => String -> a -> [a] -> Bool + +# ifndef DEBUG +isIn msg x ys = elem__ x ys +isn'tIn msg x ys = notElem__ x ys + +--these are here to be SPECIALIZEd (automagically) +elem__ _ [] = False +elem__ x (y:ys) = x==y || elem__ x ys + +notElem__ x [] = True +notElem__ x (y:ys) = x /= y && notElem__ x ys + +# else {- DEBUG -} +isIn msg x ys + = elem ILIT(0) x ys + where + elem i _ [] = False + elem i x (y:ys) + | i _GE_ ILIT(100) = panic ("Over-long elem in: " ++ msg) + | otherwise = x == y || elem (i _ADD_ ILIT(1)) x ys + +isn'tIn msg x ys + = notElem ILIT(0) x ys + where + notElem i x [] = True + notElem i x (y:ys) + | i _GE_ ILIT(100) = panic ("Over-long notElem in: " ++ msg) + | otherwise = x /= y && notElem (i _ADD_ ILIT(1)) x ys + +# endif {- DEBUG -} + +#endif {- COMPILING_GHC -} +\end{code} + +%************************************************************************ +%* * +\subsection[Utils-assoc]{Association lists} +%* * +%************************************************************************ + +See also @assocMaybe@ and @mkLookupFun@ in module @Maybes@. + +\begin{code} +assoc :: (Eq a) => String -> [(a, b)] -> a -> b + +assoc crash_msg lst key + = if (null res) + then panic ("Failed in assoc: " ++ crash_msg) + else head res + where res = [ val | (key', val) <- lst, key == key'] +\end{code} + +%************************************************************************ +%* * +\subsection[Utils-dups]{Duplicate-handling} +%* * +%************************************************************************ + +\begin{code} +hasNoDups :: (Eq a) => [a] -> Bool + +hasNoDups xs = f [] xs + where + f seen_so_far [] = True + f seen_so_far (x:xs) = if x `is_elem` seen_so_far then + False + else + f (x:seen_so_far) xs + +#if defined(COMPILING_GHC) + is_elem = isIn "hasNoDups" +#else + is_elem = elem +#endif +\end{code} + +\begin{code} +equivClasses :: (a -> a -> Ordering) -- Comparison + -> [a] + -> [[a]] + +equivClasses cmp stuff@[] = [] +equivClasses cmp stuff@[item] = [stuff] +equivClasses cmp items + = runs eq (sortLt lt items) + where + eq a b = case cmp a b of { EQ -> True; _ -> False } + lt a b = case cmp a b of { LT -> True; _ -> False } +\end{code} + +The first cases in @equivClasses@ above are just to cut to the point +more quickly... + +@runs@ groups a list into a list of lists, each sublist being a run of +identical elements of the input list. It is passed a predicate @p@ which +tells when two elements are equal. + +\begin{code} +runs :: (a -> a -> Bool) -- Equality + -> [a] + -> [[a]] + +runs p [] = [] +runs p (x:xs) = case (span (p x) xs) of + (first, rest) -> (x:first) : (runs p rest) +\end{code} + +\begin{code} +removeDups :: (a -> a -> Ordering) -- Comparison function + -> [a] + -> ([a], -- List with no duplicates + [[a]]) -- List of duplicate groups. One representative from + -- each group appears in the first result + +removeDups cmp [] = ([], []) +removeDups cmp [x] = ([x],[]) +removeDups cmp xs + = case (mapAccumR collect_dups [] (equivClasses cmp xs)) of { (dups, xs') -> + (xs', dups) } + where + collect_dups dups_so_far [x] = (dups_so_far, x) + collect_dups dups_so_far dups@(x:xs) = (dups:dups_so_far, x) +\end{code} + +%************************************************************************ +%* * +\subsection[Utils-sorting]{Sorting} +%* * +%************************************************************************ + +%************************************************************************ +%* * +\subsubsection[Utils-quicksorting]{Quicksorts} +%* * +%************************************************************************ + +\begin{code} +-- tail-recursive, etc., "quicker sort" [as per Meira thesis] +quicksort :: (a -> a -> Bool) -- Less-than predicate + -> [a] -- Input list + -> [a] -- Result list in increasing order + +quicksort lt [] = [] +quicksort lt [x] = [x] +quicksort lt (x:xs) = split x [] [] xs + where + split x lo hi [] = quicksort lt lo ++ (x : quicksort lt hi) + split x lo hi (y:ys) | y `lt` x = split x (y:lo) hi ys + | True = split x lo (y:hi) ys +\end{code} + +Quicksort variant from Lennart's Haskell-library contribution. This +is a {\em stable} sort. + +\begin{code} +stableSortLt = sortLt -- synonym; when we want to highlight stable-ness + +sortLt :: (a -> a -> Bool) -- Less-than predicate + -> [a] -- Input list + -> [a] -- Result list + +sortLt lt l = qsort lt l [] + +-- qsort is stable and does not concatenate. +qsort :: (a -> a -> Bool) -- Less-than predicate + -> [a] -- xs, Input list + -> [a] -- r, Concatenate this list to the sorted input list + -> [a] -- Result = sort xs ++ r + +qsort lt [] r = r +qsort lt [x] r = x:r +qsort lt (x:xs) r = qpart lt x xs [] [] r + +-- qpart partitions and sorts the sublists +-- rlt contains things less than x, +-- rge contains the ones greater than or equal to x. +-- Both have equal elements reversed with respect to the original list. + +qpart lt x [] rlt rge r = + -- rlt and rge are in reverse order and must be sorted with an + -- anti-stable sorting + rqsort lt rlt (x : rqsort lt rge r) + +qpart lt x (y:ys) rlt rge r = + if lt y x then + -- y < x + qpart lt x ys (y:rlt) rge r + else + -- y >= x + qpart lt x ys rlt (y:rge) r + +-- rqsort is as qsort but anti-stable, i.e. reverses equal elements +rqsort lt [] r = r +rqsort lt [x] r = x:r +rqsort lt (x:xs) r = rqpart lt x xs [] [] r + +rqpart lt x [] rle rgt r = + qsort lt rle (x : qsort lt rgt r) + +rqpart lt x (y:ys) rle rgt r = + if lt x y then + -- y > x + rqpart lt x ys rle (y:rgt) r + else + -- y <= x + rqpart lt x ys (y:rle) rgt r +\end{code} + +%************************************************************************ +%* * +\subsubsection[Utils-dull-mergesort]{A rather dull mergesort} +%* * +%************************************************************************ + +\begin{code} +mergesort :: (a -> a -> Ordering) -> [a] -> [a] + +mergesort cmp xs = merge_lists (split_into_runs [] xs) + where + a `le` b = case cmp a b of { LT_ -> True; EQ_ -> True; GT__ -> False } + a `ge` b = case cmp a b of { LT_ -> False; EQ_ -> True; GT__ -> True } + + split_into_runs [] [] = [] + split_into_runs run [] = [run] + split_into_runs [] (x:xs) = split_into_runs [x] xs + split_into_runs [r] (x:xs) | x `ge` r = split_into_runs [r,x] xs + split_into_runs rl@(r:rs) (x:xs) | x `le` r = split_into_runs (x:rl) xs + | True = rl : (split_into_runs [x] xs) + + merge_lists [] = [] + merge_lists (x:xs) = merge x (merge_lists xs) + + merge [] ys = ys + merge xs [] = xs + merge xl@(x:xs) yl@(y:ys) + = case cmp x y of + EQ_ -> x : y : (merge xs ys) + LT_ -> x : (merge xs yl) + GT__ -> y : (merge xl ys) +\end{code} + +%************************************************************************ +%* * +\subsubsection[Utils-Carsten-mergesort]{A mergesort from Carsten} +%* * +%************************************************************************ + +\begin{display} +Date: Mon, 3 May 93 20:45:23 +0200 +From: Carsten Kehler Holst +To: partain@dcs.gla.ac.uk +Subject: natural merge sort beats quick sort [ and it is prettier ] + +Here is a piece of Haskell code that I'm rather fond of. See it as an +attempt to get rid of the ridiculous quick-sort routine. group is +quite useful by itself I think it was John's idea originally though I +believe the lazy version is due to me [surprisingly complicated]. +gamma [used to be called] is called gamma because I got inspired by +the Gamma calculus. It is not very close to the calculus but does +behave less sequentially than both foldr and foldl. One could imagine +a version of gamma that took a unit element as well thereby avoiding +the problem with empty lists. + +I've tried this code against + + 1) insertion sort - as provided by haskell + 2) the normal implementation of quick sort + 3) a deforested version of quick sort due to Jan Sparud + 4) a super-optimized-quick-sort of Lennart's + +If the list is partially sorted both merge sort and in particular +natural merge sort wins. If the list is random [ average length of +rising subsequences = approx 2 ] mergesort still wins and natural +merge sort is marginally beaten by Lennart's soqs. The space +consumption of merge sort is a bit worse than Lennart's quick sort +approx a factor of 2. And a lot worse if Sparud's bug-fix [see his +fpca article ] isn't used because of group. + +have fun +Carsten +\end{display} + +\begin{code} +group :: (a -> a -> Bool) -> [a] -> [[a]] + +{- +Date: Mon, 12 Feb 1996 15:09:41 +0000 +From: Andy Gill + +Here is a `better' definition of group. +-} +group p [] = [] +group p (x:xs) = group' xs x x (x :) + where + group' [] _ _ s = [s []] + group' (x:xs) x_min x_max s + | not (x `p` x_max) = group' xs x_min x (s . (x :)) + | x `p` x_min = group' xs x x_max ((x :) . s) + | otherwise = s [] : group' xs x x (x :) + +-- This one works forwards *and* backwards, as well as also being +-- faster that the one in Util.lhs. + +{- ORIG: +group p [] = [[]] +group p (x:xs) = + let ((h1:t1):tt1) = group p xs + (t,tt) = if null xs then ([],[]) else + if x `p` h1 then (h1:t1,tt1) else + ([], (h1:t1):tt1) + in ((x:t):tt) +-} + +generalMerge :: (a -> a -> Bool) -> [a] -> [a] -> [a] +generalMerge p xs [] = xs +generalMerge p [] ys = ys +generalMerge p (x:xs) (y:ys) | x `p` y = x : generalMerge p xs (y:ys) + | otherwise = y : generalMerge p (x:xs) ys + +-- gamma is now called balancedFold + +balancedFold :: (a -> a -> a) -> [a] -> a +balancedFold f [] = error "can't reduce an empty list using balancedFold" +balancedFold f [x] = x +balancedFold f l = balancedFold f (balancedFold' f l) + +balancedFold' :: (a -> a -> a) -> [a] -> [a] +balancedFold' f (x:y:xs) = f x y : balancedFold' f xs +balancedFold' f xs = xs + +generalMergeSort p [] = [] +generalMergeSort p xs = (balancedFold (generalMerge p) . map (: [])) xs + +generalNaturalMergeSort p [] = [] +generalNaturalMergeSort p xs = (balancedFold (generalMerge p) . group p) xs + +mergeSort, naturalMergeSort :: Ord a => [a] -> [a] + +mergeSort = generalMergeSort (<=) +naturalMergeSort = generalNaturalMergeSort (<=) + +mergeSortLe le = generalMergeSort le +naturalMergeSortLe le = generalNaturalMergeSort le +\end{code} + +%************************************************************************ +%* * +\subsection[Utils-transitive-closure]{Transitive closure} +%* * +%************************************************************************ + +This algorithm for transitive closure is straightforward, albeit quadratic. + +\begin{code} +transitiveClosure :: (a -> [a]) -- Successor function + -> (a -> a -> Bool) -- Equality predicate + -> [a] + -> [a] -- The transitive closure + +transitiveClosure succ eq xs + = go [] xs + where + go done [] = done + go done (x:xs) | x `is_in` done = go done xs + | otherwise = go (x:done) (succ x ++ xs) + + x `is_in` [] = False + x `is_in` (y:ys) | eq x y = True + | otherwise = x `is_in` ys +\end{code} + +%************************************************************************ +%* * +\subsection[Utils-accum]{Accumulating} +%* * +%************************************************************************ + +@mapAccumL@ behaves like a combination +of @map@ and @foldl@; +it applies a function to each element of a list, passing an accumulating +parameter from left to right, and returning a final value of this +accumulator together with the new list. + +\begin{code} +mapAccumL :: (acc -> x -> (acc, y)) -- Function of elt of input list + -- and accumulator, returning new + -- accumulator and elt of result list + -> acc -- Initial accumulator + -> [x] -- Input list + -> (acc, [y]) -- Final accumulator and result list + +mapAccumL f b [] = (b, []) +mapAccumL f b (x:xs) = (b'', x':xs') where + (b', x') = f b x + (b'', xs') = mapAccumL f b' xs +\end{code} + +@mapAccumR@ does the same, but working from right to left instead. Its type is +the same as @mapAccumL@, though. + +\begin{code} +mapAccumR :: (acc -> x -> (acc, y)) -- Function of elt of input list + -- and accumulator, returning new + -- accumulator and elt of result list + -> acc -- Initial accumulator + -> [x] -- Input list + -> (acc, [y]) -- Final accumulator and result list + +mapAccumR f b [] = (b, []) +mapAccumR f b (x:xs) = (b'', x':xs') where + (b'', x') = f b' x + (b', xs') = mapAccumR f b xs +\end{code} + +Here is the bi-directional version, that works from both left and right. + +\begin{code} +mapAccumB :: (accl -> accr -> x -> (accl, accr,y)) + -- Function of elt of input list + -- and accumulator, returning new + -- accumulator and elt of result list + -> accl -- Initial accumulator from left + -> accr -- Initial accumulator from right + -> [x] -- Input list + -> (accl, accr, [y]) -- Final accumulators and result list + +mapAccumB f a b [] = (a,b,[]) +mapAccumB f a b (x:xs) = (a'',b'',y:ys) + where + (a',b'',y) = f a b' x + (a'',b',ys) = mapAccumB f a' b xs +\end{code} + +%************************************************************************ +%* * +\subsection[Utils-comparison]{Comparisons} +%* * +%************************************************************************ + +See also @tagCmp_@ near the versions-compatibility section. + +The Ord3 class will be subsumed into Ord in Haskell 1.3. + +\begin{code} +{- +class Ord3 a where + cmp :: a -> a -> TAG_ +-} + +thenCmp :: Ordering -> Ordering -> Ordering +{-# INLINE thenCmp #-} +thenCmp EQ any = any +thenCmp other any = other + +cmpList :: (a -> a -> Ordering) -> [a] -> [a] -> Ordering + -- `cmpList' uses a user-specified comparer + +cmpList cmp [] [] = EQ +cmpList cmp [] _ = LT +cmpList cmp _ [] = GT +cmpList cmp (a:as) (b:bs) + = case cmp a b of { EQ -> cmpList cmp as bs; xxx -> xxx } +\end{code} + +begin{code} +instance Ord3 a => Ord3 [a] where + cmp [] [] = EQ_ + cmp (x:xs) [] = GT_ + cmp [] (y:ys) = LT_ + cmp (x:xs) (y:ys) = (x `cmp` y) `thenCmp` (xs `cmp` ys) + +instance Ord3 a => Ord3 (Maybe a) where + cmp Nothing Nothing = EQ_ + cmp Nothing (Just y) = LT_ + cmp (Just x) Nothing = GT_ + cmp (Just x) (Just y) = x `cmp` y + +instance Ord3 Int where + cmp a b | a < b = LT_ + | a > b = GT_ + | otherwise = EQ_ +end{code} + +\begin{code} +cmpString :: String -> String -> TAG_ + +cmpString [] [] = EQ_ +cmpString (x:xs) (y:ys) = if x == y then cmpString xs ys + else if x < y then LT_ + else GT_ +cmpString [] ys = LT_ +cmpString xs [] = GT_ + +#ifdef COMPILING_GHC +cmpString _ _ = panic# "cmpString" +#else +cmpString _ _ = error "cmpString" +#endif +\end{code} + +\begin{code} +cmpPString :: FAST_STRING -> FAST_STRING -> TAG_ + +cmpPString x y = compare x y +\end{code} + +%************************************************************************ +%* * +\subsection[Utils-pairs]{Pairs} +%* * +%************************************************************************ + +The following are curried versions of @fst@ and @snd@. + +\begin{code} +cfst :: a -> b -> a -- stranal-sem only (Note) +cfst x y = x +\end{code} + +The following provide us higher order functions that, when applied +to a function, operate on pairs. + +\begin{code} +applyToPair :: ((a -> c),(b -> d)) -> (a,b) -> (c,d) +applyToPair (f,g) (x,y) = (f x, g y) + +applyToFst :: (a -> c) -> (a,b)-> (c,b) +applyToFst f (x,y) = (f x,y) + +applyToSnd :: (b -> d) -> (a,b) -> (a,d) +applyToSnd f (x,y) = (x,f y) + +foldPair :: (a->a->a,b->b->b) -> (a,b) -> [(a,b)] -> (a,b) +foldPair fg ab [] = ab +foldPair fg@(f,g) ab ((a,b):abs) = (f a u,g b v) + where (u,v) = foldPair fg ab abs +\end{code} + +\begin{code} +unzipWith :: (a -> b -> c) -> [(a, b)] -> [c] +unzipWith f pairs = map ( \ (a, b) -> f a b ) pairs +\end{code} + +%************************************************************************ +%* * +\subsection[Utils-errors]{Error handling} +%* * +%************************************************************************ + +\begin{code} +#if defined(COMPILING_GHC) +panic x = error ("panic! (the `impossible' happened):\n\t" + ++ x ++ "\n\n" + ++ "Please report it as a compiler bug " + ++ "to glasgow-haskell-bugs@dcs.gla.ac.uk.\n\n" ) + +pprPanic heading pretty_msg = panic (heading++(ppShow 80 pretty_msg)) +pprError heading pretty_msg = error (heading++(ppShow 80 pretty_msg)) +#if __GLASGOW_HASKELL__ == 201 +pprTrace heading pretty_msg = GHCbase.trace (heading++(ppShow 80 pretty_msg)) +#elsif __GLASGOW_HASKELL__ >= 201 +pprTrace heading pretty_msg = GHC.trace (heading++(ppShow 80 pretty_msg)) +#else +pprTrace heading pretty_msg = trace (heading++(ppShow 80 pretty_msg)) +#endif + +-- #-versions because panic can't return an unboxed int, and that's +-- what TAG_ is with GHC at the moment. Ugh. (Simon) +-- No, man -- Too Beautiful! (Will) + +panic# :: String -> TAG_ +panic# s = case (panic s) of () -> EQ_ + +pprPanic# heading pretty_msg = panic# (heading++(ppShow 80 pretty_msg)) + +assertPanic :: String -> Int -> a +assertPanic file line = panic ("ASSERT failed! file "++file++", line "++show line) + +#endif {- COMPILING_GHC -} +\end{code} diff --git a/ghc/lib/misc/cbits/Makefile b/ghc/lib/misc/cbits/Makefile new file mode 100644 index 0000000..c31a7d7 --- /dev/null +++ b/ghc/lib/misc/cbits/Makefile @@ -0,0 +1,27 @@ +# +# Makefile for cbits subdirectory +# +TOP=../../.. +include $(TOP)/mk/boilerplate.mk +override WAYS= + +CC:=$(HC) + +C_SRCS=$(wildcard *.c) + +# Remove Readline.lhs if readline.h isn't available. +ifneq "$(HAVE_READLINE)" "YES" + C_SRCS := $(filter-out ghcReadline.c,$(C_SRCS)) +endif + +# ToDo: properly +# -D__GNUC__ : added to turn off noise from byteorder.h with 2.7.2 / Solaris-2.3 +# +SRC_MKDEPENDC_OPTS += -I$(GHC_INCLUDE_DIR) -D__GNUC__ +SRC_CC_OPTS += -I$(GHC_INCLUDE_DIR) + +LIBRARY=libHSghc_cbits.a +LIBOBJS=$(C_OBJS) +INSTALL_LIBS += $(LIBRARY) + +include $(TOP)/mk/target.mk diff --git a/ghc/lib/misc/cbits/acceptSocket.c b/ghc/lib/misc/cbits/acceptSocket.c new file mode 100644 index 0000000..dcb445f --- /dev/null +++ b/ghc/lib/misc/cbits/acceptSocket.c @@ -0,0 +1,51 @@ +#if 0 +% +% (c) The GRASP/AQUA Project, Glasgow University, 1996 +% +\subsection[acceptSocket.lc]{Server wait for client to connect} + +\begin{code} +#endif + +#define NON_POSIX_SOURCE +#include "rtsdefs.h" +#include "ghcSockets.h" + +StgInt +acceptSocket(I_ sockfd, A_ peer, A_ addrlen) +{ + StgInt fd; + + while ((fd = accept((int)sockfd, (struct sockaddr *)peer, (int *)addrlen)) < 0) { + if (errno != EINTR) { + cvtErrno(); + switch (ghc_errno) { + default: + stdErrno(); + break; + case GHC_EBADF: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Not a valid descriptor"; + break; + case GHC_EFAULT: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Address not in writeable part of user address space"; + break; + case GHC_ENOTSOCK: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Descriptor not a socket"; + break; + case GHC_EOPNOTSUPP: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Socket not of type that supports listen"; + break; + case GHC_EWOULDBLOCK: + ghc_errtype = ERR_OTHERERROR; + ghc_errstr = "No sockets are present to be accepted"; + break; + } + return -1; + } + } + return fd; +} diff --git a/ghc/lib/misc/cbits/bindSocket.c b/ghc/lib/misc/cbits/bindSocket.c new file mode 100644 index 0000000..db667da --- /dev/null +++ b/ghc/lib/misc/cbits/bindSocket.c @@ -0,0 +1,91 @@ +#if 0 +% +% (c) The GRASP/AQUA Project, Glasgow University, 1995 +% +\subsection[bindSocket.lc]{Assign name to unnamed socket} + +\begin{code} +#endif + +#define NON_POSIX_SOURCE +#include "rtsdefs.h" +#include "ghcSockets.h" + +StgInt +bindSocket(I_ sockfd, A_ myaddr, I_ addrlen, I_ isUnixDomain) +{ + int rc; + + while ((rc = bind((int)sockfd, (struct sockaddr *)myaddr, (int)addrlen)) < 0) { + if (errno != EINTR) { + cvtErrno(); + switch (ghc_errno) { + default: + stdErrno(); + break; + case GHC_EACCES: + ghc_errtype = ERR_PERMISSIONDENIED; + if (isUnixDomain != 0) + ghc_errstr = "For a component of path prefix of path name"; + else + ghc_errstr = "Requested address protected, cannot bind socket"; + break; + case GHC_EISCONN: + case GHC_EADDRINUSE: + ghc_errtype = ERR_RESOURCEBUSY; + ghc_errstr = "Address already in use"; + break; + case GHC_EADDRNOTAVAIL: + ghc_errtype = ERR_PERMISSIONDENIED; + ghc_errstr = "Address not available from local machine"; + break; + case GHC_EBADF: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Not a valid socket file descriptor"; + break; + case GHC_EFAULT: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Address not in valid part of user address space"; + break; + case GHC_EINVAL: + ghc_errtype = ERR_SYSTEMERROR; + ghc_errstr = "Specified size of structure not equal valid address for family"; + break; + case GHC_ENOTSOCK: + ghc_errtype = ERR_INAPPROPRIATETYPE; + ghc_errstr = "Descriptor for file, not a socket"; + break; + case GHC_EIO: + ghc_errtype = ERR_SYSTEMERROR; + ghc_errstr = "Could not make directory entry or alloc inode"; + break; + case GHC_EISDIR: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "A null path name was given"; + break; + case GHC_ELOOP: + ghc_errtype = ERR_SYSTEMERROR; + ghc_errstr = "Too many symbolic links encountered"; + break; + case GHC_ENAMETOOLONG: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Max length of path name exceeded"; + break; + case GHC_ENOENT: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Component in path prefix does not exist"; + break; + case GHC_ENOTDIR: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Component in path prefix is not a directory"; + break; + case GHC_EROFS: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "The inode would reside on read only file system"; + break; + } + return -1; + } + } + return 0; +} diff --git a/ghc/lib/misc/cbits/connectSocket.c b/ghc/lib/misc/cbits/connectSocket.c new file mode 100644 index 0000000..38aed5a --- /dev/null +++ b/ghc/lib/misc/cbits/connectSocket.c @@ -0,0 +1,113 @@ +#if 0 +% +% (c) The GRASP/AQUA Project, Glasgow University, 1995 +% +\subsection[connectSocket.lc]{Assign name to client socket} + +\begin{code} +#endif + +#define NON_POSIX_SOURCE +#include "rtsdefs.h" +#include "ghcSockets.h" + +StgInt +connectSocket(I_ sockfd, A_ servaddr, I_ addrlen, I_ isUnixDomain) +{ + int rc; + + while ((rc = connect((int)sockfd, (struct sockaddr *)servaddr, (int)addrlen)) < 0) { + if (errno != EINTR) { + cvtErrno(); + switch (ghc_errno) { + default: + stdErrno(); + break; + case GHC_EACCES: + ghc_errtype = ERR_PERMISSIONDENIED; + if (isUnixDomain != 0) + ghc_errstr = "For a component of path prefix of path name"; + else + ghc_errstr = "Requested address protected, cannot bind socket"; + break; + case GHC_EISCONN: + case GHC_EADDRINUSE: + ghc_errtype = ERR_RESOURCEBUSY; + ghc_errstr = "Address already in use"; + break; + case GHC_EADDRNOTAVAIL: + ghc_errtype = ERR_PERMISSIONDENIED; + ghc_errstr = "Address not available from local machine"; + break; + case GHC_EAFNOSUPPORT: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Address cannot be used with socket"; + break; + case GHC_EINPROGRESS: + case GHC_EALREADY: + ghc_errtype = ERR_RESOURCEBUSY; + ghc_errstr = "Non-blocking socket, previous connection attempt not completed"; + break; + case GHC_EBADF: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Not a valid socket file descriptor"; + break; + case GHC_ECONNREFUSED: + ghc_errtype = ERR_PERMISSIONDENIED; + ghc_errstr = "Connection rejected"; + break; + case GHC_EFAULT: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Address not in valid part of process address space"; + break; + case GHC_EINVAL: + ghc_errtype = ERR_SYSTEMERROR; + ghc_errstr = "Specified size of structure not equal valid address for family"; + break; + break; + case GHC_ENETUNREACH: + ghc_errtype = ERR_PERMISSIONDENIED; + ghc_errstr = "Network not reachable from host"; + break; + case GHC_ENOTSOCK: + ghc_errtype = ERR_INAPPROPRIATETYPE; + ghc_errstr = "Descriptor for file, not a socket"; + break; + case GHC_ETIMEDOUT: + ghc_errtype = ERR_TIMEEXPIRED; + ghc_errstr = "Connection attempt timed out"; + break; + case GHC_EIO: + ghc_errtype = ERR_SYSTEMERROR; + ghc_errstr = "Could not make directory entry or alloc inode"; + break; + case GHC_EISDIR: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "A null path name was given"; + break; + case GHC_ELOOP: + ghc_errtype = ERR_SYSTEMERROR; + ghc_errstr = "Too many symbolic links encountered"; + break; + case GHC_ENAMETOOLONG: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Max length of path name exceeded"; + break; + case GHC_ENOENT: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Component in path prefix does not exist"; + break; + case GHC_ENOTDIR: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Component in path prefix is not a directory"; + break; + case GHC_EPROTOTYPE: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "File referred to is a socket of differing type"; + break; + } + return -1; + } + } + return 0; +} diff --git a/ghc/lib/misc/cbits/createSocket.c b/ghc/lib/misc/cbits/createSocket.c new file mode 100644 index 0000000..ccab688 --- /dev/null +++ b/ghc/lib/misc/cbits/createSocket.c @@ -0,0 +1,51 @@ +#if 0 +% +% (c) The GRASP/AQUA Project, Glasgow University, 1995 +% +\subsection[createSocket.lc]{Create a socket file descriptor} + +\begin{code} +#endif + +#define NON_POSIX_SOURCE +#include "rtsdefs.h" +#include "ghcSockets.h" + +StgInt +createSocket(I_ family, I_ type, I_ protocol) +{ + int fd; + + if ((fd = socket((int)family, (int)type, (int)protocol)) < 0) { + if (errno != EINTR) { + cvtErrno(); + switch (ghc_errno) { + default: + stdErrno(); + break; + case GHC_EACCES: + ghc_errtype = ERR_PERMISSIONDENIED; + ghc_errstr = "cannot create socket"; + break; + case GHC_EMFILE: + ghc_errtype = ERR_RESOURCEEXHAUSTED; + ghc_errstr = "Too many open files"; + break; + case GHC_ENFILE: + ghc_errtype = ERR_RESOURCEEXHAUSTED; + ghc_errstr = "System file table overflow"; + break; + case GHC_EPROTONOSUPPORT: + ghc_errtype = ERR_UNSUPPORTEDOPERATION; + ghc_errstr = "Protocol type not supported"; + break; + case GHC_EPROTOTYPE: + ghc_errtype = ERR_INAPPROPRIATETYPE; + ghc_errstr = "Protocol wrong type for socket"; + break; + } + return (StgInt)-1; + } + } + return (StgInt)fd; +} diff --git a/ghc/lib/misc/cbits/decls.h b/ghc/lib/misc/cbits/decls.h new file mode 100644 index 0000000..c15ce41 --- /dev/null +++ b/ghc/lib/misc/cbits/decls.h @@ -0,0 +1,11 @@ +#ifndef GHC_CBITS_DECLS_H +#define GHC_CBITS_DECLS_H + +StgByteArray getCPUTime(StgByteArray); +StgInt getClockTime(StgByteArray, StgByteArray); +StgAddr showTime(I_, StgByteArray, StgByteArray); +StgAddr toClockSec(I_, I_, I_, I_, I_, I_, I_, StgByteArray); +StgAddr toLocalTime(I_, StgByteArray, StgByteArray); +StgAddr toUTCTime (I_, StgByteArray, StgByteArray); + +#endif diff --git a/ghc/lib/misc/cbits/getPeerName.c b/ghc/lib/misc/cbits/getPeerName.c new file mode 100644 index 0000000..4bd1e3b --- /dev/null +++ b/ghc/lib/misc/cbits/getPeerName.c @@ -0,0 +1,53 @@ +#if 0 +% +% (c) The GRASP/AQUA Project, Glasgow University, 1996 +% +\subsection[getPeerName.lc]{Return name of peer process} + +Returns name of peer process connected to a socket. + +\begin{code} +#endif + +#define NON_POSIX_SOURCE +#include "rtsdefs.h" +#include "ghcSockets.h" + +StgInt +getPeerName(I_ sockfd, A_ peer, A_ namelen) +{ + StgInt name; + + while ((name = getpeername((int) sockfd, (struct sockaddr *) peer, (int *) namelen)) < 0) { + if (errno != EINTR) { + cvtErrno(); + switch (ghc_errno) { + default: + stdErrno(); + break; + case GHC_EBADF: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Not a valid write descriptor"; + break; + case GHC_EFAULT: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Data not in writeable part of user address space"; + break; + case GHC_ENOBUFS: + ghc_errtype = ERR_RESOURCEEXHAUSTED; + ghc_errstr = "Insuffcient resources"; + break; + case GHC_ENOTCONN: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Socket not connected"; + break; + case GHC_ENOTSOCK: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Descriptor is not a socket"; + break; + } + return -1; + } + } + return name; +} diff --git a/ghc/lib/misc/cbits/getSockName.c b/ghc/lib/misc/cbits/getSockName.c new file mode 100644 index 0000000..987296b --- /dev/null +++ b/ghc/lib/misc/cbits/getSockName.c @@ -0,0 +1,47 @@ +#if 0 +% +% (c) The GRASP/AQUA Project, Glasgow University, 1996 +% +\subsection[getSockName.lc]{Return name of process assoc with socket} + +\begin{code} +#endif + +#define NON_POSIX_SOURCE +#include "rtsdefs.h" +#include "ghcSockets.h" + +StgInt +getSockName(I_ sockfd, A_ peer, A_ namelen) +{ + StgInt name; + + while ((name = getsockname((int) sockfd, (struct sockaddr *) peer, (int *) namelen)) < 0) { + if (errno != EINTR) { + cvtErrno(); + switch (ghc_errno) { + default: + stdErrno(); + break; + case GHC_EBADF: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Not a valid write descriptor"; + break; + case GHC_EFAULT: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Data not in writeable part of user address space"; + break; + case GHC_ENOBUFS: + ghc_errtype = ERR_RESOURCEEXHAUSTED; + ghc_errstr = "Insuffcient resources"; + break; + case GHC_ENOTSOCK: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Descriptor is not a socket"; + break; + } + return -1; + } + } + return name; +} diff --git a/ghc/lib/misc/cbits/ghcReadline.c b/ghc/lib/misc/cbits/ghcReadline.c new file mode 100644 index 0000000..b5bbaaa --- /dev/null +++ b/ghc/lib/misc/cbits/ghcReadline.c @@ -0,0 +1,43 @@ +#if 0 +% +% (c) The GRASP/AQUA Project, Glasgow University, 1995 +% +% Last Modified: Wed Jul 19 12:03:26 1995 +% Darren J Moffat +\section[LibReadline]{GNU Readline Library Bindings} + +\begin{code} +#endif + +#include "rtsdefs.h" + +#include "ghcReadline.h" /* to make sure the code here agrees...*/ + +/* +Wrapper around the callback mechanism to allow Haskell side functions +to be callbacks for the Readline library. + +The C function $genericRlCback$ puts the cback args into global +variables and enters the Haskell world through the $haskellRlEntry$ +function. Before exiting, the Haskell function will deposit its result +in the global variable $rl_return$. +*/ + +I_ current_narg, rl_return, current_kc; + +char* rl_prompt_hack; + +StgStablePtr haskellRlEntry; +StgStablePtr cbackList; + + +I_ +genericRlCback (I_ narg, I_ kc) +{ + current_narg = narg; + current_kc = kc; + + performIO(haskellRlEntry); + + return rl_return; +} diff --git a/ghc/lib/misc/cbits/ghcReadline.h b/ghc/lib/misc/cbits/ghcReadline.h new file mode 100644 index 0000000..7627fec --- /dev/null +++ b/ghc/lib/misc/cbits/ghcReadline.h @@ -0,0 +1,26 @@ +#ifndef GHC_READLINE_H +#define GHC_READLINE_H + +/* Included to see the defn. the HAVE_* below */ +#include "config.h" + +#if HAVE_READLINE_READLINE_H +#include "readline/readline.h" +#endif + +/* For some reason the following 3 aren't defined in readline.h */ +extern int rl_mark; +extern int rl_done; +extern int rl_pending_input; + + +/* Our C Hackery stuff for Callbacks */ +typedef I_ KeyCode; +extern StgStablePtr cbackList; +I_ genericRlCback PROTO((I_, I_)); +extern StgStablePtr haskellRlEntry; +extern I_ current_narg, rl_return; +extern KeyCode current_kc; +extern char* rl_prompt_hack; + +#endif /* !GHC_READLINE_H */ diff --git a/ghc/lib/misc/cbits/ghcRegex.h b/ghc/lib/misc/cbits/ghcRegex.h new file mode 100644 index 0000000..7215c6f --- /dev/null +++ b/ghc/lib/misc/cbits/ghcRegex.h @@ -0,0 +1,543 @@ +/* Definitions for data structures and routines for the regular + expression library, version 0.12. + Copyright (C) 1985,89,90,91,92,93,95,96,97 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifndef __REGEXP_LIBRARY_H__ +#define __REGEXP_LIBRARY_H__ + +/* Allow the use in C++ code. */ +#ifdef __cplusplus +extern "C" { +#endif + +/* POSIX says that must be included (by the caller) before + . */ + +#if !defined (_POSIX_C_SOURCE) && !defined (_POSIX_SOURCE) && defined (VMS) +/* VMS doesn't have `size_t' in , even though POSIX says it + should be there. */ +#include +#endif + +/* The following two types have to be signed and unsigned integer type + wide enough to hold a value of a pointer. For most ANSI compilers + ptrdiff_t and size_t should be likely OK. Still size of these two + types is 2 for Microsoft C. Ugh... */ +typedef long int s_reg_t; +typedef unsigned long int active_reg_t; + +/* The following bits are used to determine the regexp syntax we + recognize. The set/not-set meanings are chosen so that Emacs syntax + remains the value 0. The bits are given in alphabetical order, and + the definitions shifted by one from the previous bit; thus, when we + add or remove a bit, only one other definition need change. */ +typedef unsigned long int reg_syntax_t; + +/* If this bit is not set, then \ inside a bracket expression is literal. + If set, then such a \ quotes the following character. */ +#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) + +/* If this bit is not set, then + and ? are operators, and \+ and \? are + literals. + If set, then \+ and \? are operators and + and ? are literals. */ +#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) + +/* If this bit is set, then character classes are supported. They are: + [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], + [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. + If not set, then character classes are not supported. */ +#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) + +/* If this bit is set, then ^ and $ are always anchors (outside bracket + expressions, of course). + If this bit is not set, then it depends: + ^ is an anchor if it is at the beginning of a regular + expression or after an open-group or an alternation operator; + $ is an anchor if it is at the end of a regular expression, or + before a close-group or an alternation operator. + + This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because + POSIX draft 11.2 says that * etc. in leading positions is undefined. + We already implemented a previous draft which made those constructs + invalid, though, so we haven't changed the code back. */ +#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) + +/* If this bit is set, then special characters are always special + regardless of where they are in the pattern. + If this bit is not set, then special characters are special only in + some contexts; otherwise they are ordinary. Specifically, + * + ? and intervals are only special when not after the beginning, + open-group, or alternation operator. */ +#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) + +/* If this bit is set, then *, +, ?, and { cannot be first in an re or + immediately after an alternation or begin-group operator. */ +#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) + +/* If this bit is set, then . matches newline. + If not set, then it doesn't. */ +#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) + +/* If this bit is set, then . doesn't match NUL. + If not set, then it does. */ +#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) + +/* If this bit is set, nonmatching lists [^...] do not match newline. + If not set, they do. */ +#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) + +/* If this bit is set, either \{...\} or {...} defines an + interval, depending on RE_NO_BK_BRACES. + If not set, \{, \}, {, and } are literals. */ +#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) + +/* If this bit is set, +, ? and | aren't recognized as operators. + If not set, they are. */ +#define RE_LIMITED_OPS (RE_INTERVALS << 1) + +/* If this bit is set, newline is an alternation operator. + If not set, newline is literal. */ +#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) + +/* If this bit is set, then `{...}' defines an interval, and \{ and \} + are literals. + If not set, then `\{...\}' defines an interval. */ +#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) + +/* If this bit is set, (...) defines a group, and \( and \) are literals. + If not set, \(...\) defines a group, and ( and ) are literals. */ +#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) + +/* If this bit is set, then \ matches . + If not set, then \ is a back-reference. */ +#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) + +/* If this bit is set, then | is an alternation operator, and \| is literal. + If not set, then \| is an alternation operator, and | is literal. */ +#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) + +/* If this bit is set, then an ending range point collating higher + than the starting range point, as in [z-a], is invalid. + If not set, then when ending range point collates higher than the + starting range point, the range is ignored. */ +#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) + +/* If this bit is set, then an unmatched ) is ordinary. + If not set, then an unmatched ) is invalid. */ +#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) + +/* If this bit is set, succeed as soon as we match the whole pattern, + without further backtracking. */ +#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) + +/* If this bit is set, do not process the GNU regex operators. + If not set, then the GNU regex operators are recognized. */ +#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) + +/* If this bit is set, turn on internal regex debugging. + If not set, and debugging was on, turn it off. + This only works if regex.c is compiled -DDEBUG. + We define this bit always, so that all that's needed to turn on + debugging is to recompile regex.c; the calling code can always have + this bit set, and it won't affect anything in the normal case. */ +#define RE_DEBUG (RE_NO_GNU_OPS << 1) + +/* This global variable defines the particular regexp syntax to use (for + some interfaces). When a regexp is compiled, the syntax used is + stored in the pattern buffer, so changing this does not affect + already-compiled regexps. */ +extern reg_syntax_t re_syntax_options; + +/* Define combinations of the above bits for the standard possibilities. + (The [[[ comments delimit what gets put into the Texinfo file, so + don't delete them!) */ +/* [[[begin syntaxes]]] */ +#define RE_SYNTAX_EMACS 0 + +#define RE_SYNTAX_AWK \ + (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ + | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ + | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) + +#define RE_SYNTAX_GNU_AWK \ + ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ + & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS)) + +#define RE_SYNTAX_POSIX_AWK \ + (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ + | RE_INTERVALS | RE_NO_GNU_OPS) + +#define RE_SYNTAX_GREP \ + (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ + | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ + | RE_NEWLINE_ALT) + +#define RE_SYNTAX_EGREP \ + (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ + | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ + | RE_NO_BK_VBAR) + +#define RE_PERL_MULTILINE_SYNTAX \ + (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS \ + | RE_INTERVALS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_VBAR) + +#define RE_PERL_SINGLELINE_SYNTAX \ + (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_DOT_NEWLINE \ + | RE_INTERVALS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_VBAR) + +#define RE_SYNTAX_POSIX_EGREP \ + (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) + +/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ +#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC + +#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC + +/* Syntax bits common to both basic and extended POSIX regex syntax. */ +#define _RE_SYNTAX_POSIX_COMMON \ + (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ + | RE_INTERVALS | RE_NO_EMPTY_RANGES) + +#define RE_SYNTAX_POSIX_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) + +/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes + RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this + isn't minimal, since other operators, such as \`, aren't disabled. */ +#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) + +#define RE_SYNTAX_POSIX_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ + | RE_UNMATCHED_RIGHT_PAREN_ORD) + +/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS + replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ +#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) +/* [[[end syntaxes]]] */ + +/* Maximum number of duplicates an interval can allow. Some systems + (erroneously) define this in other header files, but we want our + value, so remove any previous define. */ +#ifdef RE_DUP_MAX +#undef RE_DUP_MAX +#endif +/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ +#define RE_DUP_MAX (0x7fff) + + +/* POSIX `cflags' bits (i.e., information for `regcomp'). */ + +/* If this bit is set, then use extended regular expression syntax. + If not set, then use basic regular expression syntax. */ +#define REG_EXTENDED 1 + +/* If this bit is set, then ignore case when matching. + If not set, then case is significant. */ +#define REG_ICASE (REG_EXTENDED << 1) + +/* If this bit is set, then anchors do not match at newline + characters in the string. + If not set, then anchors do match at newlines. */ +#define REG_NEWLINE (REG_ICASE << 1) + +/* If this bit is set, then report only success or fail in regexec. + If not set, then returns differ between not matching and errors. */ +#define REG_NOSUB (REG_NEWLINE << 1) + + +/* POSIX `eflags' bits (i.e., information for regexec). */ + +/* If this bit is set, then the beginning-of-line operator doesn't match + the beginning of the string (presumably because it's not the + beginning of a line). + If not set, then the beginning-of-line operator does match the + beginning of the string. */ +#define REG_NOTBOL 1 + +/* Like REG_NOTBOL, except for the end-of-line. */ +#define REG_NOTEOL (1 << 1) + + +/* If any error codes are removed, changed, or added, update the + `re_error_msg' table in regex.c. */ +typedef enum +{ + REG_NOERROR = 0, /* Success. */ + REG_NOMATCH, /* Didn't find a match (for regexec). */ + + /* POSIX regcomp return error codes. (In the order listed in the + standard.) */ + REG_BADPAT, /* Invalid pattern. */ + REG_ECOLLATE, /* Not implemented. */ + REG_ECTYPE, /* Invalid character class name. */ + REG_EESCAPE, /* Trailing backslash. */ + REG_ESUBREG, /* Invalid back reference. */ + REG_EBRACK, /* Unmatched left bracket. */ + REG_EPAREN, /* Parenthesis imbalance. */ + REG_EBRACE, /* Unmatched \{. */ + REG_BADBR, /* Invalid contents of \{\}. */ + REG_ERANGE, /* Invalid range end. */ + REG_ESPACE, /* Ran out of memory. */ + REG_BADRPT, /* No preceding re for repetition op. */ + + /* Error codes we've added. */ + REG_EEND, /* Premature end. */ + REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ + REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ +} reg_errcode_t; + +/* This data structure represents a compiled pattern. Before calling + the pattern compiler, the fields `buffer', `allocated', `fastmap', + `translate', and `no_sub' can be set. After the pattern has been + compiled, the `re_nsub' field is available. All other fields are + private to the regex routines. */ + +#ifndef RE_TRANSLATE_TYPE +#define RE_TRANSLATE_TYPE char * +#endif + +struct re_pattern_buffer +{ +/* [[[begin pattern_buffer]]] */ + /* Space that holds the compiled pattern. It is declared as + `unsigned char *' because its elements are + sometimes used as array indexes. */ + unsigned char *buffer; + + /* Number of bytes to which `buffer' points. */ + unsigned long int allocated; + + /* Number of bytes actually used in `buffer'. */ + unsigned long int used; + + /* Syntax setting with which the pattern was compiled. */ + reg_syntax_t syntax; + + /* Pointer to a fastmap, if any, otherwise zero. re_search uses + the fastmap, if there is one, to skip over impossible + starting points for matches. */ + char *fastmap; + + /* Either a translate table to apply to all characters before + comparing them, or zero for no translation. The translation + is applied to a pattern when it is compiled and to a string + when it is matched. */ + RE_TRANSLATE_TYPE translate; + + /* Number of subexpressions found by the compiler. */ + size_t re_nsub; + + /* Zero if this pattern cannot match the empty string, one else. + Well, in truth it's used only in `re_search_2', to see + whether or not we should use the fastmap, so we don't set + this absolutely perfectly; see `re_compile_fastmap' (the + `duplicate' case). */ + unsigned can_be_null : 1; + + /* If REGS_UNALLOCATED, allocate space in the `regs' structure + for `max (RE_NREGS, re_nsub + 1)' groups. + If REGS_REALLOCATE, reallocate space if necessary. + If REGS_FIXED, use what's there. */ +#define REGS_UNALLOCATED 0 +#define REGS_REALLOCATE 1 +#define REGS_FIXED 2 + unsigned regs_allocated : 2; + + /* Set to zero when `regex_compile' compiles a pattern; set to one + by `re_compile_fastmap' if it updates the fastmap. */ + unsigned fastmap_accurate : 1; + + /* If set, `re_match_2' does not return information about + subexpressions. */ + unsigned no_sub : 1; + + /* If set, a beginning-of-line anchor doesn't match at the + beginning of the string. */ + unsigned not_bol : 1; + + /* Similarly for an end-of-line anchor. */ + unsigned not_eol : 1; + + /* If true, an anchor at a newline matches. */ + unsigned newline_anchor : 1; + +/* [[[end pattern_buffer]]] */ +}; + +typedef struct re_pattern_buffer regex_t; + +/* Type for byte offsets within the string. POSIX mandates this. */ +typedef int regoff_t; + + +/* This is the structure we store register match data in. See + regex.texinfo for a full description of what registers match. */ +struct re_registers +{ + unsigned num_regs; + regoff_t *start; + regoff_t *end; +}; + + +/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, + `re_match_2' returns information about at least this many registers + the first time a `regs' structure is passed. */ +#ifndef RE_NREGS +#define RE_NREGS 30 +#endif + + +/* POSIX specification for registers. Aside from the different names than + `re_registers', POSIX uses an array of structures, instead of a + structure of arrays. */ +typedef struct +{ + regoff_t rm_so; /* Byte offset from string's start to substring's start. */ + regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ +} regmatch_t; + +/* Declarations for routines. */ + +/* To avoid duplicating every routine declaration -- once with a + prototype (if we are ANSI), and once without (if we aren't) -- we + use the following macro to declare argument types. This + unfortunately clutters up the declarations a bit, but I think it's + worth it. */ + +#if __STDC__ + +#define _RE_ARGS(args) args + +#else /* not __STDC__ */ + +#define _RE_ARGS(args) () + +#endif /* not __STDC__ */ + +/* Sets the current default syntax to SYNTAX, and return the old syntax. + You can also simply assign to the `re_syntax_options' variable. */ +extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax)); + +/* Compile the regular expression PATTERN, with length LENGTH + and syntax given by the global `re_syntax_options', into the buffer + BUFFER. Return NULL if successful, and an error string if not. */ +extern const char *re_compile_pattern + _RE_ARGS ((const char *pattern, size_t length, + struct re_pattern_buffer *buffer)); + + +/* Compile a fastmap for the compiled pattern in BUFFER; used to + accelerate searches. Return 0 if successful and -2 if was an + internal error. */ +extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); + + +/* Search in the string STRING (with length LENGTH) for the pattern + compiled into BUFFER. Start searching at position START, for RANGE + characters. Return the starting position of the match, -1 for no + match, or -2 for an internal error. Also return register + information in REGS (if REGS and BUFFER->no_sub are nonzero). */ +extern int re_search + _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, + int length, int start, int range, struct re_registers *regs)); + + +/* Like `re_search', but search in the concatenation of STRING1 and + STRING2. Also, stop searching at index START + STOP. */ +extern int re_search_2 + _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, + int length1, const char *string2, int length2, + int start, int range, struct re_registers *regs, int stop)); + + +/* Like `re_search', but return how many characters in STRING the regexp + in BUFFER matched, starting at position START. */ +extern int re_match + _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, + int length, int start, struct re_registers *regs)); + + +/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ +extern int re_match_2 + _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, + int length1, const char *string2, int length2, + int start, struct re_registers *regs, int stop)); + + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using BUFFER and REGS will use this memory + for recording register information. STARTS and ENDS must be + allocated with malloc, and must each be at least `NUM_REGS * sizeof + (regoff_t)' bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ +extern void re_set_registers + _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, + unsigned num_regs, regoff_t *starts, regoff_t *ends)); + +#ifdef _REGEX_RE_COMP +#ifndef _CRAY +/* 4.2 bsd compatibility. */ +extern char *re_comp _RE_ARGS ((const char *)); +extern int re_exec _RE_ARGS ((const char *)); +#endif +#endif + +/* POSIX compatibility. */ +extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags)); +extern int regexec + _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch, + regmatch_t pmatch[], int eflags)); +extern size_t regerror + _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf, + size_t errbuf_size)); +extern void regfree _RE_ARGS ((regex_t *preg)); + + +#ifdef __cplusplus +} +#endif /* C++ */ + +#endif /* not __REGEXP_LIBRARY_H__ */ + +/* +Local variables: +make-backup-files: t +version-control: t +trim-versions-without-asking: nil +End: +*/ diff --git a/ghc/lib/misc/cbits/ghcSockets.h b/ghc/lib/misc/cbits/ghcSockets.h new file mode 100644 index 0000000..5457aed --- /dev/null +++ b/ghc/lib/misc/cbits/ghcSockets.h @@ -0,0 +1,77 @@ +#ifndef GHC_SOCKETS_H +#define GHC_SOCKETS_H + +#include +#include +#include +#include +#include + +#ifdef HAVE_STDLIB_H +# include +#endif +#ifdef HAVE_STRING_H +# include +#endif +#ifdef HAVE_FCNTL_H +# include +#endif +#ifdef HAVE_SYS_SOCKET_H +# include +#endif +#if TIME_WITH_SYS_TIME +# include +# include +#else +# if HAVE_SYS_TIME_H +# include +# else +# include +# endif +#endif +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#include + +/* ToDo: featurise this */ +#ifndef cygwin32_TARGET_OS +#include +#endif + +#ifdef HAVE_UNISTD_H +# include +#endif + +/* acceptSocket.lc */ +StgInt acceptSocket PROTO((StgInt, StgAddr, StgAddr)); + +/* bindSocket.lc */ +StgInt bindSocket PROTO((StgInt, StgAddr, StgInt, StgInt)); + +/* connectSocket.lc */ +StgInt connectSocket PROTO((StgInt, StgAddr, StgInt, StgInt)); + +/* createSocket.lc */ +StgInt createSocket PROTO((StgInt, StgInt, StgInt)); + +/* getSockName.lc */ +StgInt getSockName PROTO((StgInt, StgAddr, StgAddr)); + +/* getPeerName.lc */ +StgInt getPeerName PROTO((StgInt, StgAddr, StgAddr)); + +/* listenSocket.lc */ +StgInt listenSocket PROTO((StgInt, StgInt)); + +/* shutdownSocket.lc */ +StgInt shutdownSocket PROTO((StgInt, StgInt)); + +/* readDescriptor.lc */ +StgInt readDescriptor PROTO((StgInt, StgAddr, StgInt)); + +/* writeDescriptor.lc */ +StgInt writeDescriptor PROTO((StgInt, StgAddr, StgInt)); + + +#endif /* !GHC_SOCKETS_H */ diff --git a/ghc/lib/misc/cbits/listenSocket.c b/ghc/lib/misc/cbits/listenSocket.c new file mode 100644 index 0000000..f1316e8 --- /dev/null +++ b/ghc/lib/misc/cbits/listenSocket.c @@ -0,0 +1,43 @@ +#if 0 +% +% (c) The GRASP/AQUA Project, Glasgow University, 1996 +% +\subsection[listenSocket.lc]{Indicate willingness to receive connections} + +\begin{code} +#endif + +#define NON_POSIX_SOURCE +#include "rtsdefs.h" +#include "ghcSockets.h" + +StgInt +listenSocket(I_ sockfd, I_ backlog) +{ + int rc; + + while ((rc = listen((int) sockfd, (int) backlog)) < 0) { + if (errno != EINTR) { + cvtErrno(); + switch (ghc_errno) { + default: + stdErrno(); + break; + case GHC_EBADF: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Not a valid descriptor"; + break; + case GHC_ENOTSOCK: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Descriptor not a socket"; + break; + case GHC_EOPNOTSUPP: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Socket not of type that supports listen"; + break; + } + return -1; + } + } + return 0; +} diff --git a/ghc/lib/misc/cbits/readDescriptor.c b/ghc/lib/misc/cbits/readDescriptor.c new file mode 100644 index 0000000..21714bf --- /dev/null +++ b/ghc/lib/misc/cbits/readDescriptor.c @@ -0,0 +1,60 @@ +#if 0 +% +% (c) The GRASP/AQUA Project, Glasgow University, 1996 +% +\subsection[readDescriptor.lc]{Suck some bytes from a descriptor} + +\begin{code} +#endif + +#define NON_POSIX_SOURCE +#include "rtsdefs.h" +#include "ghcSockets.h" + +StgInt +readDescriptor(I_ fd, A_ buf, I_ nbytes) +{ + StgInt sucked; + + while ((sucked = read((int) fd, (char *) buf, (int) nbytes)) < 0) { + if (errno != EINTR) { + cvtErrno(); + switch (ghc_errno) { + default: + stdErrno(); + break; + case GHC_EBADF: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Not a valid write descriptor"; + break; + case GHC_EBADMSG: + ghc_errtype = ERR_SYSTEMERROR; + ghc_errstr = "Message waiting to be read is not a data message"; + break; + case GHC_EFAULT: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Data buffer not in writeable part of user address space"; + break; + case GHC_EINVAL: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Seek pointer associated with descriptor negative"; + break; + case GHC_EIO: + ghc_errtype = ERR_SYSTEMERROR; + ghc_errstr = "I/O error occurred while writing to file system"; + break; + case GHC_EISDIR: + ghc_errtype = ERR_INAPPROPRIATETYPE; + ghc_errstr = "Descriptor refers to a directory"; + break; + case GHC_EAGAIN: + case GHC_EWOULDBLOCK: + ghc_errtype = ERR_OTHERERROR; + ghc_errstr = "No data could be written immediately"; + break; + } + return -1; + } + } + return sucked; +} diff --git a/ghc/lib/misc/cbits/regex.c b/ghc/lib/misc/cbits/regex.c new file mode 100644 index 0000000..7212564 --- /dev/null +++ b/ghc/lib/misc/cbits/regex.c @@ -0,0 +1,5718 @@ +/* Extended regular expression matching and search library, + version 0.12. + (Implements POSIX draft P1003.2/D11.2, except for some of the + internationalization features.) + + Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +/* AIX requires this to be the first thing in the file. */ +#if defined (_AIX) && !defined (REGEX_MALLOC) + #pragma alloca +#endif + +#undef _GNU_SOURCE +#define _GNU_SOURCE + +#ifdef HAVE_CONFIG_H +#include +#endif + +#if defined(STDC_HEADERS) && !defined(emacs) +#include +#else +/* We need this for `regex.h', and perhaps for the Emacs include files. */ +#include +#endif + +/* For platform which support the ISO C amendement 1 functionality we + support user defined character classes. */ +#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) +# include +# include +#endif + +/* This is for other GNU distributions with internationalized messages. */ +#if HAVE_LIBINTL_H || defined (_LIBC) +# include +#else +# define gettext(msgid) (msgid) +#endif + +#ifndef gettext_noop +/* This define is so xgettext can find the internationalizable + strings. */ +#define gettext_noop(String) String +#endif + +/* The `emacs' switch turns on certain matching commands + that make sense only in Emacs. */ +#ifdef emacs + +#include "lisp.h" +#include "buffer.h" +#include "syntax.h" + +#else /* not emacs */ + +/* If we are not linking with Emacs proper, + we can't use the relocating allocator + even if config.h says that we can. */ +#undef REL_ALLOC + +#if defined (STDC_HEADERS) || defined (_LIBC) +#include +#else +char *malloc (); +char *realloc (); +#endif + +/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. + If nothing else has been done, use the method below. */ +#ifdef INHIBIT_STRING_HEADER +#if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY)) +#if !defined (bzero) && !defined (bcopy) +#undef INHIBIT_STRING_HEADER +#endif +#endif +#endif + +/* This is the normal way of making sure we have a bcopy and a bzero. + This is used in most programs--a few other programs avoid this + by defining INHIBIT_STRING_HEADER. */ +#ifndef INHIBIT_STRING_HEADER +#if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC) +#include +#ifndef bcmp +#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) +#endif +#ifndef bcopy +#define bcopy(s, d, n) memcpy ((d), (s), (n)) +#endif +#ifndef bzero +#define bzero(s, n) memset ((s), 0, (n)) +#endif +#else +#include +#endif +#endif + +/* Define the syntax stuff for \<, \>, etc. */ + +/* This must be nonzero for the wordchar and notwordchar pattern + commands in re_match_2. */ +#ifndef Sword +#define Sword 1 +#endif + +#ifdef SWITCH_ENUM_BUG +#define SWITCH_ENUM_CAST(x) ((int)(x)) +#else +#define SWITCH_ENUM_CAST(x) (x) +#endif + +#ifdef SYNTAX_TABLE + +extern char *re_syntax_table; + +#else /* not SYNTAX_TABLE */ + +/* How many characters in the character set. */ +#define CHAR_SET_SIZE 256 + +static char re_syntax_table[CHAR_SET_SIZE]; + +static void +init_syntax_once () +{ + register int c; + static int done = 0; + + if (done) + return; + + bzero (re_syntax_table, sizeof re_syntax_table); + + for (c = 'a'; c <= 'z'; c++) + re_syntax_table[c] = Sword; + + for (c = 'A'; c <= 'Z'; c++) + re_syntax_table[c] = Sword; + + for (c = '0'; c <= '9'; c++) + re_syntax_table[c] = Sword; + + re_syntax_table['_'] = Sword; + + done = 1; +} + +#endif /* not SYNTAX_TABLE */ + +#define SYNTAX(c) re_syntax_table[c] + +#endif /* not emacs */ + +/* Get the interface, including the syntax bits. */ +#include "ghcRegex.h" + +/* isalpha etc. are used for the character classes. */ +#include + +/* Jim Meyering writes: + + "... Some ctype macros are valid only for character codes that + isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when + using /bin/cc or gcc but without giving an ansi option). So, all + ctype uses should be through macros like ISPRINT... If + STDC_HEADERS is defined, then autoconf has verified that the ctype + macros don't need to be guarded with references to isascii. ... + Defining isascii to 1 should let any compiler worth its salt + eliminate the && through constant folding." */ + +#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) +#define ISASCII(c) 1 +#else +#define ISASCII(c) isascii(c) +#endif + +#ifdef isblank +#define ISBLANK(c) (ISASCII (c) && isblank (c)) +#else +#define ISBLANK(c) ((c) == ' ' || (c) == '\t') +#endif +#ifdef isgraph +#define ISGRAPH(c) (ISASCII (c) && isgraph (c)) +#else +#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) +#endif + +#define ISPRINT(c) (ISASCII (c) && isprint (c)) +#define ISDIGIT(c) (ISASCII (c) && isdigit (c)) +#define ISALNUM(c) (ISASCII (c) && isalnum (c)) +#define ISALPHA(c) (ISASCII (c) && isalpha (c)) +#define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) +#define ISLOWER(c) (ISASCII (c) && islower (c)) +#define ISPUNCT(c) (ISASCII (c) && ispunct (c)) +#define ISSPACE(c) (ISASCII (c) && isspace (c)) +#define ISUPPER(c) (ISASCII (c) && isupper (c)) +#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) + +#ifndef NULL +#define NULL (void *)0 +#endif + +/* We remove any previous definition of `SIGN_EXTEND_CHAR', + since ours (we hope) works properly with all combinations of + machines, compilers, `char' and `unsigned char' argument types. + (Per Bothner suggested the basic approach.) */ +#undef SIGN_EXTEND_CHAR +#if __STDC__ +#define SIGN_EXTEND_CHAR(c) ((signed char) (c)) +#else /* not __STDC__ */ +/* As in Harbison and Steele. */ +#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) +#endif + +/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we + use `alloca' instead of `malloc'. This is because using malloc in + re_search* or re_match* could cause memory leaks when C-g is used in + Emacs; also, malloc is slower and causes storage fragmentation. On + the other hand, malloc is more portable, and easier to debug. + + Because we sometimes use alloca, some routines have to be macros, + not functions -- `alloca'-allocated space disappears at the end of the + function it is called in. */ + +#ifdef REGEX_MALLOC + +#define REGEX_ALLOCATE malloc +#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) +#define REGEX_FREE free + +#else /* not REGEX_MALLOC */ + +/* Emacs already defines alloca, sometimes. */ +#ifndef alloca + +/* Make alloca work the best possible way. */ +#ifdef __GNUC__ +#define alloca __builtin_alloca +#else /* not __GNUC__ */ +#if HAVE_ALLOCA_H +#include +#else /* not __GNUC__ or HAVE_ALLOCA_H */ +#if 0 /* It is a bad idea to declare alloca. We always cast the result. */ +#ifndef _AIX /* Already did AIX, up at the top. */ +char *alloca (); +#endif /* not _AIX */ +#endif +#endif /* not HAVE_ALLOCA_H */ +#endif /* not __GNUC__ */ + +#endif /* not alloca */ + +#define REGEX_ALLOCATE alloca + +/* Assumes a `char *destination' variable. */ +#define REGEX_REALLOCATE(source, osize, nsize) \ + (destination = (char *) alloca (nsize), \ + bcopy (source, destination, osize), \ + destination) + +/* No need to do anything to free, after alloca. */ +#define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ + +#endif /* not REGEX_MALLOC */ + +/* Define how to allocate the failure stack. */ + +#if defined (REL_ALLOC) && defined (REGEX_MALLOC) + +#define REGEX_ALLOCATE_STACK(size) \ + r_alloc (&failure_stack_ptr, (size)) +#define REGEX_REALLOCATE_STACK(source, osize, nsize) \ + r_re_alloc (&failure_stack_ptr, (nsize)) +#define REGEX_FREE_STACK(ptr) \ + r_alloc_free (&failure_stack_ptr) + +#else /* not using relocating allocator */ + +#ifdef REGEX_MALLOC + +#define REGEX_ALLOCATE_STACK malloc +#define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize) +#define REGEX_FREE_STACK free + +#else /* not REGEX_MALLOC */ + +#define REGEX_ALLOCATE_STACK alloca + +#define REGEX_REALLOCATE_STACK(source, osize, nsize) \ + REGEX_REALLOCATE (source, osize, nsize) +/* No need to explicitly free anything. */ +#define REGEX_FREE_STACK(arg) + +#endif /* not REGEX_MALLOC */ +#endif /* not using relocating allocator */ + + +/* True if `size1' is non-NULL and PTR is pointing anywhere inside + `string1' or just past its end. This works if PTR is NULL, which is + a good thing. */ +#define FIRST_STRING_P(ptr) \ + (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) + +/* (Re)Allocate N items of type T using malloc, or fail. */ +#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) +#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) +#define RETALLOC_IF(addr, n, t) \ + if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) +#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) + +#define BYTEWIDTH 8 /* In bits. */ + +#define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) + +#undef MAX +#undef MIN +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +typedef char boolean; +#define false 0 +#define true 1 + +static int re_match_2_internal (); + +/* These are the command codes that appear in compiled regular + expressions. Some opcodes are followed by argument bytes. A + command code can specify any interpretation whatsoever for its + arguments. Zero bytes may appear in the compiled regular expression. */ + +typedef enum +{ + no_op = 0, + + /* Succeed right away--no more backtracking. */ + succeed, + + /* Followed by one byte giving n, then by n literal bytes. */ + exactn, + + /* Matches any (more or less) character. */ + anychar, + + /* Matches any one char belonging to specified set. First + following byte is number of bitmap bytes. Then come bytes + for a bitmap saying which chars are in. Bits in each byte + are ordered low-bit-first. A character is in the set if its + bit is 1. A character too large to have a bit in the map is + automatically not in the set. */ + charset, + + /* Same parameters as charset, but match any character that is + not one of those specified. */ + charset_not, + + /* Start remembering the text that is matched, for storing in a + register. Followed by one byte with the register number, in + the range 0 to one less than the pattern buffer's re_nsub + field. Then followed by one byte with the number of groups + inner to this one. (This last has to be part of the + start_memory only because we need it in the on_failure_jump + of re_match_2.) */ + start_memory, + + /* Stop remembering the text that is matched and store it in a + memory register. Followed by one byte with the register + number, in the range 0 to one less than `re_nsub' in the + pattern buffer, and one byte with the number of inner groups, + just like `start_memory'. (We need the number of inner + groups here because we don't have any easy way of finding the + corresponding start_memory when we're at a stop_memory.) */ + stop_memory, + + /* Match a duplicate of something remembered. Followed by one + byte containing the register number. */ + duplicate, + + /* Fail unless at beginning of line. */ + begline, + + /* Fail unless at end of line. */ + endline, + + /* Succeeds if at beginning of buffer (if emacs) or at beginning + of string to be matched (if not). */ + begbuf, + + /* Analogously, for end of buffer/string. */ + endbuf, + + /* Followed by two byte relative address to which to jump. */ + jump, + + /* Same as jump, but marks the end of an alternative. */ + jump_past_alt, + + /* Followed by two-byte relative address of place to resume at + in case of failure. */ + on_failure_jump, + + /* Like on_failure_jump, but pushes a placeholder instead of the + current string position when executed. */ + on_failure_keep_string_jump, + + /* Throw away latest failure point and then jump to following + two-byte relative address. */ + pop_failure_jump, + + /* Change to pop_failure_jump if know won't have to backtrack to + match; otherwise change to jump. This is used to jump + back to the beginning of a repeat. If what follows this jump + clearly won't match what the repeat does, such that we can be + sure that there is no use backtracking out of repetitions + already matched, then we change it to a pop_failure_jump. + Followed by two-byte address. */ + maybe_pop_jump, + + /* Jump to following two-byte address, and push a dummy failure + point. This failure point will be thrown away if an attempt + is made to use it for a failure. A `+' construct makes this + before the first repeat. Also used as an intermediary kind + of jump when compiling an alternative. */ + dummy_failure_jump, + + /* Push a dummy failure point and continue. Used at the end of + alternatives. */ + push_dummy_failure, + + /* Followed by two-byte relative address and two-byte number n. + After matching N times, jump to the address upon failure. */ + succeed_n, + + /* Followed by two-byte relative address, and two-byte number n. + Jump to the address N times, then fail. */ + jump_n, + + /* Set the following two-byte relative address to the + subsequent two-byte number. The address *includes* the two + bytes of number. */ + set_number_at, + + wordchar, /* Matches any word-constituent character. */ + notwordchar, /* Matches any char that is not a word-constituent. */ + + wordbeg, /* Succeeds if at word beginning. */ + wordend, /* Succeeds if at word end. */ + + wordbound, /* Succeeds if at a word boundary. */ + notwordbound /* Succeeds if not at a word boundary. */ + +#ifdef emacs + ,before_dot, /* Succeeds if before point. */ + at_dot, /* Succeeds if at point. */ + after_dot, /* Succeeds if after point. */ + + /* Matches any character whose syntax is specified. Followed by + a byte which contains a syntax code, e.g., Sword. */ + syntaxspec, + + /* Matches any character whose syntax is not that specified. */ + notsyntaxspec +#endif /* emacs */ +} re_opcode_t; + +/* Common operations on the compiled pattern. */ + +/* Store NUMBER in two contiguous bytes starting at DESTINATION. */ + +#define STORE_NUMBER(destination, number) \ + do { \ + (destination)[0] = (number) & 0377; \ + (destination)[1] = (number) >> 8; \ + } while (0) + +/* Same as STORE_NUMBER, except increment DESTINATION to + the byte after where the number is stored. Therefore, DESTINATION + must be an lvalue. */ + +#define STORE_NUMBER_AND_INCR(destination, number) \ + do { \ + STORE_NUMBER (destination, number); \ + (destination) += 2; \ + } while (0) + +/* Put into DESTINATION a number stored in two contiguous bytes starting + at SOURCE. */ + +#define EXTRACT_NUMBER(destination, source) \ + do { \ + (destination) = *(source) & 0377; \ + (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ + } while (0) + +#ifdef DEBUG +static void extract_number _RE_ARGS ((int *dest, unsigned char *source)); +static void +extract_number (dest, source) + int *dest; + unsigned char *source; +{ + int temp = SIGN_EXTEND_CHAR (*(source + 1)); + *dest = *source & 0377; + *dest += temp << 8; +} + +#ifndef EXTRACT_MACROS /* To debug the macros. */ +#undef EXTRACT_NUMBER +#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) +#endif /* not EXTRACT_MACROS */ + +#endif /* DEBUG */ + +/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. + SOURCE must be an lvalue. */ + +#define EXTRACT_NUMBER_AND_INCR(destination, source) \ + do { \ + EXTRACT_NUMBER (destination, source); \ + (source) += 2; \ + } while (0) + +#ifdef DEBUG +static void extract_number_and_incr _RE_ARGS ((int *destination, + unsigned char **source)); +static void +extract_number_and_incr (destination, source) + int *destination; + unsigned char **source; +{ + extract_number (destination, *source); + *source += 2; +} + +#ifndef EXTRACT_MACROS +#undef EXTRACT_NUMBER_AND_INCR +#define EXTRACT_NUMBER_AND_INCR(dest, src) \ + extract_number_and_incr (&dest, &src) +#endif /* not EXTRACT_MACROS */ + +#endif /* DEBUG */ + +/* If DEBUG is defined, Regex prints many voluminous messages about what + it is doing (if the variable `debug' is nonzero). If linked with the + main program in `iregex.c', you can enter patterns and strings + interactively. And if linked with the main program in `main.c' and + the other test files, you can run the already-written tests. */ + +#ifdef DEBUG + +/* We use standard I/O for debugging. */ +#include + +/* It is useful to test things that ``must'' be true when debugging. */ +#include + +static int debug = 0; + +#define DEBUG_STATEMENT(e) e +#define DEBUG_PRINT1(x) if (debug) printf (x) +#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) +#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) +#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) +#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ + if (debug) print_partial_compiled_pattern (s, e) +#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ + if (debug) print_double_string (w, s1, sz1, s2, sz2) + + +/* Print the fastmap in human-readable form. */ + +void +print_fastmap (fastmap) + char *fastmap; +{ + unsigned was_a_range = 0; + unsigned i = 0; + + while (i < (1 << BYTEWIDTH)) + { + if (fastmap[i++]) + { + was_a_range = 0; + putchar (i - 1); + while (i < (1 << BYTEWIDTH) && fastmap[i]) + { + was_a_range = 1; + i++; + } + if (was_a_range) + { + printf ("-"); + putchar (i - 1); + } + } + } + putchar ('\n'); +} + + +/* Print a compiled pattern string in human-readable form, starting at + the START pointer into it and ending just before the pointer END. */ + +void +print_partial_compiled_pattern (start, end) + unsigned char *start; + unsigned char *end; +{ + int mcnt, mcnt2; + unsigned char *p1; + unsigned char *p = start; + unsigned char *pend = end; + + if (start == NULL) + { + printf ("(null)\n"); + return; + } + + /* Loop over pattern commands. */ + while (p < pend) + { + printf ("%d:\t", p - start); + + switch ((re_opcode_t) *p++) + { + case no_op: + printf ("/no_op"); + break; + + case exactn: + mcnt = *p++; + printf ("/exactn/%d", mcnt); + do + { + putchar ('/'); + putchar (*p++); + } + while (--mcnt); + break; + + case start_memory: + mcnt = *p++; + printf ("/start_memory/%d/%d", mcnt, *p++); + break; + + case stop_memory: + mcnt = *p++; + printf ("/stop_memory/%d/%d", mcnt, *p++); + break; + + case duplicate: + printf ("/duplicate/%d", *p++); + break; + + case anychar: + printf ("/anychar"); + break; + + case charset: + case charset_not: + { + register int c, last = -100; + register int in_range = 0; + + printf ("/charset [%s", + (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); + + assert (p + *p < pend); + + for (c = 0; c < 256; c++) + if (c / 8 < *p + && (p[1 + (c/8)] & (1 << (c % 8)))) + { + /* Are we starting a range? */ + if (last + 1 == c && ! in_range) + { + putchar ('-'); + in_range = 1; + } + /* Have we broken a range? */ + else if (last + 1 != c && in_range) + { + putchar (last); + in_range = 0; + } + + if (! in_range) + putchar (c); + + last = c; + } + + if (in_range) + putchar (last); + + putchar (']'); + + p += 1 + *p; + } + break; + + case begline: + printf ("/begline"); + break; + + case endline: + printf ("/endline"); + break; + + case on_failure_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/on_failure_jump to %d", p + mcnt - start); + break; + + case on_failure_keep_string_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/on_failure_keep_string_jump to %d", p + mcnt - start); + break; + + case dummy_failure_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/dummy_failure_jump to %d", p + mcnt - start); + break; + + case push_dummy_failure: + printf ("/push_dummy_failure"); + break; + + case maybe_pop_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/maybe_pop_jump to %d", p + mcnt - start); + break; + + case pop_failure_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/pop_failure_jump to %d", p + mcnt - start); + break; + + case jump_past_alt: + extract_number_and_incr (&mcnt, &p); + printf ("/jump_past_alt to %d", p + mcnt - start); + break; + + case jump: + extract_number_and_incr (&mcnt, &p); + printf ("/jump to %d", p + mcnt - start); + break; + + case succeed_n: + extract_number_and_incr (&mcnt, &p); + p1 = p + mcnt; + extract_number_and_incr (&mcnt2, &p); + printf ("/succeed_n to %d, %d times", p1 - start, mcnt2); + break; + + case jump_n: + extract_number_and_incr (&mcnt, &p); + p1 = p + mcnt; + extract_number_and_incr (&mcnt2, &p); + printf ("/jump_n to %d, %d times", p1 - start, mcnt2); + break; + + case set_number_at: + extract_number_and_incr (&mcnt, &p); + p1 = p + mcnt; + extract_number_and_incr (&mcnt2, &p); + printf ("/set_number_at location %d to %d", p1 - start, mcnt2); + break; + + case wordbound: + printf ("/wordbound"); + break; + + case notwordbound: + printf ("/notwordbound"); + break; + + case wordbeg: + printf ("/wordbeg"); + break; + + case wordend: + printf ("/wordend"); + +#ifdef emacs + case before_dot: + printf ("/before_dot"); + break; + + case at_dot: + printf ("/at_dot"); + break; + + case after_dot: + printf ("/after_dot"); + break; + + case syntaxspec: + printf ("/syntaxspec"); + mcnt = *p++; + printf ("/%d", mcnt); + break; + + case notsyntaxspec: + printf ("/notsyntaxspec"); + mcnt = *p++; + printf ("/%d", mcnt); + break; +#endif /* emacs */ + + case wordchar: + printf ("/wordchar"); + break; + + case notwordchar: + printf ("/notwordchar"); + break; + + case begbuf: + printf ("/begbuf"); + break; + + case endbuf: + printf ("/endbuf"); + break; + + default: + printf ("?%d", *(p-1)); + } + + putchar ('\n'); + } + + printf ("%d:\tend of pattern.\n", p - start); +} + + +void +print_compiled_pattern (bufp) + struct re_pattern_buffer *bufp; +{ + unsigned char *buffer = bufp->buffer; + + print_partial_compiled_pattern (buffer, buffer + bufp->used); + printf ("%ld bytes used/%ld bytes allocated.\n", + bufp->used, bufp->allocated); + + if (bufp->fastmap_accurate && bufp->fastmap) + { + printf ("fastmap: "); + print_fastmap (bufp->fastmap); + } + + printf ("re_nsub: %d\t", bufp->re_nsub); + printf ("regs_alloc: %d\t", bufp->regs_allocated); + printf ("can_be_null: %d\t", bufp->can_be_null); + printf ("newline_anchor: %d\n", bufp->newline_anchor); + printf ("no_sub: %d\t", bufp->no_sub); + printf ("not_bol: %d\t", bufp->not_bol); + printf ("not_eol: %d\t", bufp->not_eol); + printf ("syntax: %lx\n", bufp->syntax); + /* Perhaps we should print the translate table? */ +} + + +void +print_double_string (where, string1, size1, string2, size2) + const char *where; + const char *string1; + const char *string2; + int size1; + int size2; +{ + int this_char; + + if (where == NULL) + printf ("(null)"); + else + { + if (FIRST_STRING_P (where)) + { + for (this_char = where - string1; this_char < size1; this_char++) + putchar (string1[this_char]); + + where = string2; + } + + for (this_char = where - string2; this_char < size2; this_char++) + putchar (string2[this_char]); + } +} + +void +printchar (c) + int c; +{ + putc (c, stderr); +} + +#else /* not DEBUG */ + +#undef assert +#define assert(e) + +#define DEBUG_STATEMENT(e) +#define DEBUG_PRINT1(x) +#define DEBUG_PRINT2(x1, x2) +#define DEBUG_PRINT3(x1, x2, x3) +#define DEBUG_PRINT4(x1, x2, x3, x4) +#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) +#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) + +#endif /* not DEBUG */ + +/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can + also be assigned to arbitrarily: each pattern buffer stores its own + syntax, so it can be changed between regex compilations. */ +/* This has no initializer because initialized variables in Emacs + become read-only after dumping. */ +reg_syntax_t re_syntax_options; + + +/* Specify the precise syntax of regexps for compilation. This provides + for compatibility for various utilities which historically have + different, incompatible syntaxes. + + The argument SYNTAX is a bit mask comprised of the various bits + defined in regex.h. We return the old syntax. */ + +reg_syntax_t +re_set_syntax (syntax) + reg_syntax_t syntax; +{ + reg_syntax_t ret = re_syntax_options; + + re_syntax_options = syntax; +#ifdef DEBUG + if (syntax & RE_DEBUG) + debug = 1; + else if (debug) /* was on but now is not */ + debug = 0; +#endif /* DEBUG */ + return ret; +} + +/* This table gives an error message for each of the error codes listed + in regex.h. Obviously the order here has to be same as there. + POSIX doesn't require that we do anything for REG_NOERROR, + but why not be nice? */ + +static const char *re_error_msgid[] = + { + gettext_noop ("Success"), /* REG_NOERROR */ + gettext_noop ("No match"), /* REG_NOMATCH */ + gettext_noop ("Invalid regular expression"), /* REG_BADPAT */ + gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */ + gettext_noop ("Invalid character class name"), /* REG_ECTYPE */ + gettext_noop ("Trailing backslash"), /* REG_EESCAPE */ + gettext_noop ("Invalid back reference"), /* REG_ESUBREG */ + gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */ + gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */ + gettext_noop ("Unmatched \\{"), /* REG_EBRACE */ + gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */ + gettext_noop ("Invalid range end"), /* REG_ERANGE */ + gettext_noop ("Memory exhausted"), /* REG_ESPACE */ + gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */ + gettext_noop ("Premature end of regular expression"), /* REG_EEND */ + gettext_noop ("Regular expression too big"), /* REG_ESIZE */ + gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */ + }; + +/* Avoiding alloca during matching, to placate r_alloc. */ + +/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the + searching and matching functions should not call alloca. On some + systems, alloca is implemented in terms of malloc, and if we're + using the relocating allocator routines, then malloc could cause a + relocation, which might (if the strings being searched are in the + ralloc heap) shift the data out from underneath the regexp + routines. + + Here's another reason to avoid allocation: Emacs + processes input from X in a signal handler; processing X input may + call malloc; if input arrives while a matching routine is calling + malloc, then we're scrod. But Emacs can't just block input while + calling matching routines; then we don't notice interrupts when + they come in. So, Emacs blocks input around all regexp calls + except the matching calls, which it leaves unprotected, in the + faith that they will not malloc. */ + +/* Normally, this is fine. */ +#define MATCH_MAY_ALLOCATE + +/* When using GNU C, we are not REALLY using the C alloca, no matter + what config.h may say. So don't take precautions for it. */ +#ifdef __GNUC__ +#undef C_ALLOCA +#endif + +/* The match routines may not allocate if (1) they would do it with malloc + and (2) it's not safe for them to use malloc. + Note that if REL_ALLOC is defined, matching would not use malloc for the + failure stack, but we would still use it for the register vectors; + so REL_ALLOC should not affect this. */ +#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs) +#undef MATCH_MAY_ALLOCATE +#endif + + +/* Failure stack declarations and macros; both re_compile_fastmap and + re_match_2 use a failure stack. These have to be macros because of + REGEX_ALLOCATE_STACK. */ + + +/* Number of failure points for which to initially allocate space + when matching. If this number is exceeded, we allocate more + space, so it is not a hard limit. */ +#ifndef INIT_FAILURE_ALLOC +#define INIT_FAILURE_ALLOC 5 +#endif + +/* Roughly the maximum number of failure points on the stack. Would be + exactly that if always used MAX_FAILURE_ITEMS items each time we failed. + This is a variable only so users of regex can assign to it; we never + change it ourselves. */ + +#ifdef INT_IS_16BIT + +#if defined (MATCH_MAY_ALLOCATE) +/* 4400 was enough to cause a crash on Alpha OSF/1, + whose default stack limit is 2mb. */ +long int re_max_failures = 4000; +#else +long int re_max_failures = 2000; +#endif + +union fail_stack_elt +{ + unsigned char *pointer; + long int integer; +}; + +typedef union fail_stack_elt fail_stack_elt_t; + +typedef struct +{ + fail_stack_elt_t *stack; + unsigned long int size; + unsigned long int avail; /* Offset of next open position. */ +} fail_stack_type; + +#else /* not INT_IS_16BIT */ + +#if defined (MATCH_MAY_ALLOCATE) +/* 4400 was enough to cause a crash on Alpha OSF/1, + whose default stack limit is 2mb. */ +int re_max_failures = 20000; +#else +int re_max_failures = 2000; +#endif + +union fail_stack_elt +{ + unsigned char *pointer; + int integer; +}; + +typedef union fail_stack_elt fail_stack_elt_t; + +typedef struct +{ + fail_stack_elt_t *stack; + unsigned size; + unsigned avail; /* Offset of next open position. */ +} fail_stack_type; + +#endif /* INT_IS_16BIT */ + +#define FAIL_STACK_EMPTY() (fail_stack.avail == 0) +#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) +#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) + + +/* Define macros to initialize and free the failure stack. + Do `return -2' if the alloc fails. */ + +#ifdef MATCH_MAY_ALLOCATE +#define INIT_FAIL_STACK() \ + do { \ + fail_stack.stack = (fail_stack_elt_t *) \ + REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \ + \ + if (fail_stack.stack == NULL) \ + return -2; \ + \ + fail_stack.size = INIT_FAILURE_ALLOC; \ + fail_stack.avail = 0; \ + } while (0) + +#define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack) +#else +#define INIT_FAIL_STACK() \ + do { \ + fail_stack.avail = 0; \ + } while (0) + +#define RESET_FAIL_STACK() +#endif + + +/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. + + Return 1 if succeeds, and 0 if either ran out of memory + allocating space for it or it was already too large. + + REGEX_REALLOCATE_STACK requires `destination' be declared. */ + +#define DOUBLE_FAIL_STACK(fail_stack) \ + ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \ + ? 0 \ + : ((fail_stack).stack = (fail_stack_elt_t *) \ + REGEX_REALLOCATE_STACK ((fail_stack).stack, \ + (fail_stack).size * sizeof (fail_stack_elt_t), \ + ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ + \ + (fail_stack).stack == NULL \ + ? 0 \ + : ((fail_stack).size <<= 1, \ + 1))) + + +/* Push pointer POINTER on FAIL_STACK. + Return 1 if was able to do so and 0 if ran out of memory allocating + space to do so. */ +#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ + ((FAIL_STACK_FULL () \ + && !DOUBLE_FAIL_STACK (FAIL_STACK)) \ + ? 0 \ + : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \ + 1)) + +/* Push a pointer value onto the failure stack. + Assumes the variable `fail_stack'. Probably should only + be called from within `PUSH_FAILURE_POINT'. */ +#define PUSH_FAILURE_POINTER(item) \ + fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item) + +/* This pushes an integer-valued item onto the failure stack. + Assumes the variable `fail_stack'. Probably should only + be called from within `PUSH_FAILURE_POINT'. */ +#define PUSH_FAILURE_INT(item) \ + fail_stack.stack[fail_stack.avail++].integer = (item) + +/* Push a fail_stack_elt_t value onto the failure stack. + Assumes the variable `fail_stack'. Probably should only + be called from within `PUSH_FAILURE_POINT'. */ +#define PUSH_FAILURE_ELT(item) \ + fail_stack.stack[fail_stack.avail++] = (item) + +/* These three POP... operations complement the three PUSH... operations. + All assume that `fail_stack' is nonempty. */ +#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer +#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer +#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail] + +/* Used to omit pushing failure point id's when we're not debugging. */ +#ifdef DEBUG +#define DEBUG_PUSH PUSH_FAILURE_INT +#define DEBUG_POP(item_addr) (item_addr)->integer = POP_FAILURE_INT () +#else +#define DEBUG_PUSH(item) +#define DEBUG_POP(item_addr) +#endif + + +/* Push the information about the state we will need + if we ever fail back to it. + + Requires variables fail_stack, regstart, regend, reg_info, and + num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be + declared. + + Does `return FAILURE_CODE' if runs out of memory. */ + +#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ + do { \ + char *destination; \ + /* Must be int, so when we don't save any registers, the arithmetic \ + of 0 + -1 isn't done as unsigned. */ \ + /* Can't be int, since there is not a shred of a guarantee that int \ + is wide enough to hold a value of something to which pointer can \ + be assigned */ \ + s_reg_t this_reg; \ + \ + DEBUG_STATEMENT (failure_id++); \ + DEBUG_STATEMENT (nfailure_points_pushed++); \ + DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ + DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ + DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ + \ + DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \ + DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ + \ + /* Ensure we have enough space allocated for what we will push. */ \ + while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ + { \ + if (!DOUBLE_FAIL_STACK (fail_stack)) \ + return failure_code; \ + \ + DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ + (fail_stack).size); \ + DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ + } \ + \ + /* Push the info, starting with the registers. */ \ + DEBUG_PRINT1 ("\n"); \ + \ + if (1) \ + for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ + this_reg++) \ + { \ + DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \ + DEBUG_STATEMENT (num_regs_pushed++); \ + \ + DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ + PUSH_FAILURE_POINTER (regstart[this_reg]); \ + \ + DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ + PUSH_FAILURE_POINTER (regend[this_reg]); \ + \ + DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \ + DEBUG_PRINT2 (" match_null=%d", \ + REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ + DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ + DEBUG_PRINT2 (" matched_something=%d", \ + MATCHED_SOMETHING (reg_info[this_reg])); \ + DEBUG_PRINT2 (" ever_matched=%d", \ + EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ + DEBUG_PRINT1 ("\n"); \ + PUSH_FAILURE_ELT (reg_info[this_reg].word); \ + } \ + \ + DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\ + PUSH_FAILURE_INT (lowest_active_reg); \ + \ + DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\ + PUSH_FAILURE_INT (highest_active_reg); \ + \ + DEBUG_PRINT2 (" Pushing pattern 0x%x:\n", pattern_place); \ + DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ + PUSH_FAILURE_POINTER (pattern_place); \ + \ + DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \ + DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ + size2); \ + DEBUG_PRINT1 ("'\n"); \ + PUSH_FAILURE_POINTER (string_place); \ + \ + DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ + DEBUG_PUSH (failure_id); \ + } while (0) + +/* This is the number of items that are pushed and popped on the stack + for each register. */ +#define NUM_REG_ITEMS 3 + +/* Individual items aside from the registers. */ +#ifdef DEBUG +#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ +#else +#define NUM_NONREG_ITEMS 4 +#endif + +/* We push at most this many items on the stack. */ +/* We used to use (num_regs - 1), which is the number of registers + this regexp will save; but that was changed to 5 + to avoid stack overflow for a regexp with lots of parens. */ +#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) + +/* We actually push this many items. */ +#define NUM_FAILURE_ITEMS \ + (((0 \ + ? 0 : highest_active_reg - lowest_active_reg + 1) \ + * NUM_REG_ITEMS) \ + + NUM_NONREG_ITEMS) + +/* How many items can still be added to the stack without overflowing it. */ +#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) + + +/* Pops what PUSH_FAIL_STACK pushes. + + We restore into the parameters, all of which should be lvalues: + STR -- the saved data position. + PAT -- the saved pattern position. + LOW_REG, HIGH_REG -- the highest and lowest active registers. + REGSTART, REGEND -- arrays of string positions. + REG_INFO -- array of information about each subexpression. + + Also assumes the variables `fail_stack' and (if debugging), `bufp', + `pend', `string1', `size1', `string2', and `size2'. */ + +#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ +{ \ + DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \ + s_reg_t this_reg; \ + const unsigned char *string_temp; \ + \ + assert (!FAIL_STACK_EMPTY ()); \ + \ + /* Remove failure points and point to how many regs pushed. */ \ + DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ + DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ + DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ + \ + assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ + \ + DEBUG_POP (&failure_id); \ + DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ + \ + /* If the saved string location is NULL, it came from an \ + on_failure_keep_string_jump opcode, and we want to throw away the \ + saved NULL, thus retaining our current position in the string. */ \ + string_temp = POP_FAILURE_POINTER (); \ + if (string_temp != NULL) \ + str = (const char *) string_temp; \ + \ + DEBUG_PRINT2 (" Popping string 0x%x: `", str); \ + DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ + DEBUG_PRINT1 ("'\n"); \ + \ + pat = (unsigned char *) POP_FAILURE_POINTER (); \ + DEBUG_PRINT2 (" Popping pattern 0x%x:\n", pat); \ + DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ + \ + /* Restore register info. */ \ + high_reg = (active_reg_t) POP_FAILURE_INT (); \ + DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \ + \ + low_reg = (active_reg_t) POP_FAILURE_INT (); \ + DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \ + \ + if (1) \ + for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ + { \ + DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \ + \ + reg_info[this_reg].word = POP_FAILURE_ELT (); \ + DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \ + \ + regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \ + DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ + \ + regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \ + DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ + } \ + else \ + { \ + for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \ + { \ + reg_info[this_reg].word.integer = 0; \ + regend[this_reg] = 0; \ + regstart[this_reg] = 0; \ + } \ + highest_active_reg = high_reg; \ + } \ + \ + set_regs_matched_done = 0; \ + DEBUG_STATEMENT (nfailure_points_popped++); \ +} /* POP_FAILURE_POINT */ + + + +/* Structure for per-register (a.k.a. per-group) information. + Other register information, such as the + starting and ending positions (which are addresses), and the list of + inner groups (which is a bits list) are maintained in separate + variables. + + We are making a (strictly speaking) nonportable assumption here: that + the compiler will pack our bit fields into something that fits into + the type of `word', i.e., is something that fits into one item on the + failure stack. */ + + +/* Declarations and macros for re_match_2. */ + +typedef union +{ + fail_stack_elt_t word; + struct + { + /* This field is one if this group can match the empty string, + zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ +#define MATCH_NULL_UNSET_VALUE 3 + unsigned match_null_string_p : 2; + unsigned is_active : 1; + unsigned matched_something : 1; + unsigned ever_matched_something : 1; + } bits; +} register_info_type; + +#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) +#define IS_ACTIVE(R) ((R).bits.is_active) +#define MATCHED_SOMETHING(R) ((R).bits.matched_something) +#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) + + +/* Call this when have matched a real character; it sets `matched' flags + for the subexpressions which we are currently inside. Also records + that those subexprs have matched. */ +#define SET_REGS_MATCHED() \ + do \ + { \ + if (!set_regs_matched_done) \ + { \ + active_reg_t r; \ + set_regs_matched_done = 1; \ + for (r = lowest_active_reg; r <= highest_active_reg; r++) \ + { \ + MATCHED_SOMETHING (reg_info[r]) \ + = EVER_MATCHED_SOMETHING (reg_info[r]) \ + = 1; \ + } \ + } \ + } \ + while (0) + +/* Registers are set to a sentinel when they haven't yet matched. */ +static char reg_unset_dummy; +#define REG_UNSET_VALUE (®_unset_dummy) +#define REG_UNSET(e) ((e) == REG_UNSET_VALUE) + +/* Subroutine declarations and macros for regex_compile. */ + +static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size, + reg_syntax_t syntax, + struct re_pattern_buffer *bufp)); +static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg)); +static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc, + int arg1, int arg2)); +static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, + int arg, unsigned char *end)); +static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc, + int arg1, int arg2, unsigned char *end)); +static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p, + reg_syntax_t syntax)); +static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend, + reg_syntax_t syntax)); +static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr, + const char *pend, + char *translate, + reg_syntax_t syntax, + unsigned char *b)); + +/* Fetch the next character in the uncompiled pattern---translating it + if necessary. Also cast from a signed character in the constant + string passed to us by the user to an unsigned char that we can use + as an array index (in, e.g., `translate'). */ +#ifndef PATFETCH +#define PATFETCH(c) \ + do {if (p == pend) return REG_EEND; \ + c = (unsigned char) *p++; \ + if (translate) c = (unsigned char) translate[c]; \ + } while (0) +#endif + +/* Fetch the next character in the uncompiled pattern, with no + translation. */ +#define PATFETCH_RAW(c) \ + do {if (p == pend) return REG_EEND; \ + c = (unsigned char) *p++; \ + } while (0) + +/* Go backwards one character in the pattern. */ +#define PATUNFETCH p-- + + +/* If `translate' is non-null, return translate[D], else just D. We + cast the subscript to translate because some data is declared as + `char *', to avoid warnings when a string constant is passed. But + when we use a character as a subscript we must make it unsigned. */ +#ifndef TRANSLATE +#define TRANSLATE(d) \ + (translate ? (char) translate[(unsigned char) (d)] : (d)) +#endif + + +/* Macros for outputting the compiled pattern into `buffer'. */ + +/* If the buffer isn't allocated when it comes in, use this. */ +#define INIT_BUF_SIZE 32 + +/* Make sure we have at least N more bytes of space in buffer. */ +#define GET_BUFFER_SPACE(n) \ + while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \ + EXTEND_BUFFER () + +/* Make sure we have one more byte of buffer space and then add C to it. */ +#define BUF_PUSH(c) \ + do { \ + GET_BUFFER_SPACE (1); \ + *b++ = (unsigned char) (c); \ + } while (0) + + +/* Ensure we have two more bytes of buffer space and then append C1 and C2. */ +#define BUF_PUSH_2(c1, c2) \ + do { \ + GET_BUFFER_SPACE (2); \ + *b++ = (unsigned char) (c1); \ + *b++ = (unsigned char) (c2); \ + } while (0) + + +/* As with BUF_PUSH_2, except for three bytes. */ +#define BUF_PUSH_3(c1, c2, c3) \ + do { \ + GET_BUFFER_SPACE (3); \ + *b++ = (unsigned char) (c1); \ + *b++ = (unsigned char) (c2); \ + *b++ = (unsigned char) (c3); \ + } while (0) + + +/* Store a jump with opcode OP at LOC to location TO. We store a + relative address offset by the three bytes the jump itself occupies. */ +#define STORE_JUMP(op, loc, to) \ + store_op1 (op, loc, (int) ((to) - (loc) - 3)) + +/* Likewise, for a two-argument jump. */ +#define STORE_JUMP2(op, loc, to, arg) \ + store_op2 (op, loc, (int) ((to) - (loc) - 3), arg) + +/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ +#define INSERT_JUMP(op, loc, to) \ + insert_op1 (op, loc, (int) ((to) - (loc) - 3), b) + +/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ +#define INSERT_JUMP2(op, loc, to, arg) \ + insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b) + + +/* This is not an arbitrary limit: the arguments which represent offsets + into the pattern are two bytes long. So if 2^16 bytes turns out to + be too small, many things would have to change. */ +/* Any other compiler which, like MSC, has allocation limit below 2^16 + bytes will have to use approach similar to what was done below for + MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up + reallocating to 0 bytes. Such thing is not going to work too well. + You have been warned!! */ +#if defined(_MSC_VER) && !defined(WIN32) +/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes. + The REALLOC define eliminates a flurry of conversion warnings, + but is not required. */ +#define MAX_BUF_SIZE 65500L +#define REALLOC(p,s) realloc ((p), (size_t) (s)) +#else +#define MAX_BUF_SIZE (1L << 16) +#define REALLOC(p,s) realloc ((p), (s)) +#endif + +/* Extend the buffer by twice its current size via realloc and + reset the pointers that pointed into the old block to point to the + correct places in the new one. If extending the buffer results in it + being larger than MAX_BUF_SIZE, then flag memory exhausted. */ +#define EXTEND_BUFFER() \ + do { \ + unsigned char *old_buffer = bufp->buffer; \ + if (bufp->allocated == MAX_BUF_SIZE) \ + return REG_ESIZE; \ + bufp->allocated <<= 1; \ + if (bufp->allocated > MAX_BUF_SIZE) \ + bufp->allocated = MAX_BUF_SIZE; \ + bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\ + if (bufp->buffer == NULL) \ + return REG_ESPACE; \ + /* If the buffer moved, move all the pointers into it. */ \ + if (old_buffer != bufp->buffer) \ + { \ + b = (b - old_buffer) + bufp->buffer; \ + begalt = (begalt - old_buffer) + bufp->buffer; \ + if (fixup_alt_jump) \ + fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ + if (laststart) \ + laststart = (laststart - old_buffer) + bufp->buffer; \ + if (pending_exact) \ + pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ + } \ + } while (0) + + +/* Since we have one byte reserved for the register number argument to + {start,stop}_memory, the maximum number of groups we can report + things about is what fits in that byte. */ +#define MAX_REGNUM 255 + +/* But patterns can have more than `MAX_REGNUM' registers. We just + ignore the excess. */ +typedef unsigned regnum_t; + + +/* Macros for the compile stack. */ + +/* Since offsets can go either forwards or backwards, this type needs to + be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ +/* int may be not enough when sizeof(int) == 2. */ +typedef long pattern_offset_t; + +typedef struct +{ + pattern_offset_t begalt_offset; + pattern_offset_t fixup_alt_jump; + pattern_offset_t inner_group_offset; + pattern_offset_t laststart_offset; + regnum_t regnum; +} compile_stack_elt_t; + + +typedef struct +{ + compile_stack_elt_t *stack; + unsigned size; + unsigned avail; /* Offset of next open position. */ +} compile_stack_type; + + +#define INIT_COMPILE_STACK_SIZE 32 + +#define COMPILE_STACK_EMPTY (compile_stack.avail == 0) +#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) + +/* The next available element. */ +#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) + + +/* Set the bit for character C in a list. */ +#define SET_LIST_BIT(c) \ + (b[((unsigned char) (c)) / BYTEWIDTH] \ + |= 1 << (((unsigned char) c) % BYTEWIDTH)) + + +/* Get the next unsigned number in the uncompiled pattern. */ +#define GET_UNSIGNED_NUMBER(num) \ + { if (p != pend) \ + { \ + PATFETCH (c); \ + while (ISDIGIT (c)) \ + { \ + if (num < 0) \ + num = 0; \ + num = num * 10 + c - '0'; \ + if (p == pend) \ + break; \ + PATFETCH (c); \ + } \ + } \ + } + +#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) +/* The GNU C library provides support for user-defined character classes + and the functions from ISO C amendement 1. */ +# ifdef CHARCLASS_NAME_MAX +# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX +# else +/* This shouldn't happen but some implementation might still have this + problem. Use a reasonable default value. */ +# define CHAR_CLASS_MAX_LENGTH 256 +# endif + +# define IS_CHAR_CLASS(string) wctype (string) +#else +# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ + +# define IS_CHAR_CLASS(string) \ + (STREQ (string, "alpha") || STREQ (string, "upper") \ + || STREQ (string, "lower") || STREQ (string, "digit") \ + || STREQ (string, "alnum") || STREQ (string, "xdigit") \ + || STREQ (string, "space") || STREQ (string, "print") \ + || STREQ (string, "punct") || STREQ (string, "graph") \ + || STREQ (string, "cntrl") || STREQ (string, "blank")) +#endif + +#ifndef MATCH_MAY_ALLOCATE + +/* If we cannot allocate large objects within re_match_2_internal, + we make the fail stack and register vectors global. + The fail stack, we grow to the maximum size when a regexp + is compiled. + The register vectors, we adjust in size each time we + compile a regexp, according to the number of registers it needs. */ + +static fail_stack_type fail_stack; + +/* Size with which the following vectors are currently allocated. + That is so we can make them bigger as needed, + but never make them smaller. */ +static int regs_allocated_size; + +static const char ** regstart, ** regend; +static const char ** old_regstart, ** old_regend; +static const char **best_regstart, **best_regend; +static register_info_type *reg_info; +static const char **reg_dummy; +static register_info_type *reg_info_dummy; + +/* Make the register vectors big enough for NUM_REGS registers, + but don't make them smaller. */ + +static +regex_grow_registers (num_regs) + int num_regs; +{ + if (num_regs > regs_allocated_size) + { + RETALLOC_IF (regstart, num_regs, const char *); + RETALLOC_IF (regend, num_regs, const char *); + RETALLOC_IF (old_regstart, num_regs, const char *); + RETALLOC_IF (old_regend, num_regs, const char *); + RETALLOC_IF (best_regstart, num_regs, const char *); + RETALLOC_IF (best_regend, num_regs, const char *); + RETALLOC_IF (reg_info, num_regs, register_info_type); + RETALLOC_IF (reg_dummy, num_regs, const char *); + RETALLOC_IF (reg_info_dummy, num_regs, register_info_type); + + regs_allocated_size = num_regs; + } +} + +#endif /* not MATCH_MAY_ALLOCATE */ + +static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type + compile_stack, + regnum_t regnum)); + +/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. + Returns one of error codes defined in `regex.h', or zero for success. + + Assumes the `allocated' (and perhaps `buffer') and `translate' + fields are set in BUFP on entry. + + If it succeeds, results are put in BUFP (if it returns an error, the + contents of BUFP are undefined): + `buffer' is the compiled pattern; + `syntax' is set to SYNTAX; + `used' is set to the length of the compiled pattern; + `fastmap_accurate' is zero; + `re_nsub' is the number of subexpressions in PATTERN; + `not_bol' and `not_eol' are zero; + + The `fastmap' and `newline_anchor' fields are neither + examined nor set. */ + +/* Return, freeing storage we allocated. */ +#define FREE_STACK_RETURN(value) \ + return (free (compile_stack.stack), value) + +static reg_errcode_t +regex_compile (pattern, size, syntax, bufp) + const char *pattern; + size_t size; + reg_syntax_t syntax; + struct re_pattern_buffer *bufp; +{ + /* We fetch characters from PATTERN here. Even though PATTERN is + `char *' (i.e., signed), we declare these variables as unsigned, so + they can be reliably used as array indices. */ + register unsigned char c, c1; + + /* A random temporary spot in PATTERN. */ + const char *p1; + + /* Points to the end of the buffer, where we should append. */ + register unsigned char *b; + + /* Keeps track of unclosed groups. */ + compile_stack_type compile_stack; + + /* Points to the current (ending) position in the pattern. */ + const char *p = pattern; + const char *pend = pattern + size; + + /* How to translate the characters in the pattern. */ + RE_TRANSLATE_TYPE translate = bufp->translate; + + /* Address of the count-byte of the most recently inserted `exactn' + command. This makes it possible to tell if a new exact-match + character can be added to that command or if the character requires + a new `exactn' command. */ + unsigned char *pending_exact = 0; + + /* Address of start of the most recently finished expression. + This tells, e.g., postfix * where to find the start of its + operand. Reset at the beginning of groups and alternatives. */ + unsigned char *laststart = 0; + + /* Address of beginning of regexp, or inside of last group. */ + unsigned char *begalt; + + /* Place in the uncompiled pattern (i.e., the {) to + which to go back if the interval is invalid. */ + const char *beg_interval; + + /* Address of the place where a forward jump should go to the end of + the containing expression. Each alternative of an `or' -- except the + last -- ends with a forward jump of this sort. */ + unsigned char *fixup_alt_jump = 0; + + /* Counts open-groups as they are encountered. Remembered for the + matching close-group on the compile stack, so the same register + number is put in the stop_memory as the start_memory. */ + regnum_t regnum = 0; + +#ifdef DEBUG + DEBUG_PRINT1 ("\nCompiling pattern: "); + if (debug) + { + unsigned debug_count; + + for (debug_count = 0; debug_count < size; debug_count++) + putchar (pattern[debug_count]); + putchar ('\n'); + } +#endif /* DEBUG */ + + /* Initialize the compile stack. */ + compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); + if (compile_stack.stack == NULL) + return REG_ESPACE; + + compile_stack.size = INIT_COMPILE_STACK_SIZE; + compile_stack.avail = 0; + + /* Initialize the pattern buffer. */ + bufp->syntax = syntax; + bufp->fastmap_accurate = 0; + bufp->not_bol = bufp->not_eol = 0; + + /* Set `used' to zero, so that if we return an error, the pattern + printer (for debugging) will think there's no pattern. We reset it + at the end. */ + bufp->used = 0; + + /* Always count groups, whether or not bufp->no_sub is set. */ + bufp->re_nsub = 0; + +#if !defined (emacs) && !defined (SYNTAX_TABLE) + /* Initialize the syntax table. */ + init_syntax_once (); +#endif + + if (bufp->allocated == 0) + { + if (bufp->buffer) + { /* If zero allocated, but buffer is non-null, try to realloc + enough space. This loses if buffer's address is bogus, but + that is the user's responsibility. */ + RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); + } + else + { /* Caller did not allocate a buffer. Do it for them. */ + bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); + } + if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); + + bufp->allocated = INIT_BUF_SIZE; + } + + begalt = b = bufp->buffer; + + /* Loop through the uncompiled pattern until we're at the end. */ + while (p != pend) + { + PATFETCH (c); + + switch (c) + { + case '^': + { + if ( /* If at start of pattern, it's an operator. */ + p == pattern + 1 + /* If context independent, it's an operator. */ + || syntax & RE_CONTEXT_INDEP_ANCHORS + /* Otherwise, depends on what's come before. */ + || at_begline_loc_p (pattern, p, syntax)) + BUF_PUSH (begline); + else + goto normal_char; + } + break; + + + case '$': + { + if ( /* If at end of pattern, it's an operator. */ + p == pend + /* If context independent, it's an operator. */ + || syntax & RE_CONTEXT_INDEP_ANCHORS + /* Otherwise, depends on what's next. */ + || at_endline_loc_p (p, pend, syntax)) + BUF_PUSH (endline); + else + goto normal_char; + } + break; + + + case '+': + case '?': + if ((syntax & RE_BK_PLUS_QM) + || (syntax & RE_LIMITED_OPS)) + goto normal_char; + handle_plus: + case '*': + /* If there is no previous pattern... */ + if (!laststart) + { + if (syntax & RE_CONTEXT_INVALID_OPS) + FREE_STACK_RETURN (REG_BADRPT); + else if (!(syntax & RE_CONTEXT_INDEP_OPS)) + goto normal_char; + } + + { + /* Are we optimizing this jump? */ + boolean keep_string_p = false; + + /* 1 means zero (many) matches is allowed. */ + char zero_times_ok = 0, many_times_ok = 0; + + /* If there is a sequence of repetition chars, collapse it + down to just one (the right one). We can't combine + interval operators with these because of, e.g., `a{2}*', + which should only match an even number of `a's. */ + + for (;;) + { + zero_times_ok |= c != '+'; + many_times_ok |= c != '?'; + + if (p == pend) + break; + + PATFETCH (c); + + if (c == '*' + || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) + ; + + else if (syntax & RE_BK_PLUS_QM && c == '\\') + { + if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); + + PATFETCH (c1); + if (!(c1 == '+' || c1 == '?')) + { + PATUNFETCH; + PATUNFETCH; + break; + } + + c = c1; + } + else + { + PATUNFETCH; + break; + } + + /* If we get here, we found another repeat character. */ + } + + /* Star, etc. applied to an empty pattern is equivalent + to an empty pattern. */ + if (!laststart) + break; + + /* Now we know whether or not zero matches is allowed + and also whether or not two or more matches is allowed. */ + if (many_times_ok) + { /* More than one repetition is allowed, so put in at the + end a backward relative jump from `b' to before the next + jump we're going to put in below (which jumps from + laststart to after this jump). + + But if we are at the `*' in the exact sequence `.*\n', + insert an unconditional jump backwards to the ., + instead of the beginning of the loop. This way we only + push a failure point once, instead of every time + through the loop. */ + assert (p - 1 > pattern); + + /* Allocate the space for the jump. */ + GET_BUFFER_SPACE (3); + + /* We know we are not at the first character of the pattern, + because laststart was nonzero. And we've already + incremented `p', by the way, to be the character after + the `*'. Do we have to do something analogous here + for null bytes, because of RE_DOT_NOT_NULL? */ + if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') + && zero_times_ok + && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') + && !(syntax & RE_DOT_NEWLINE)) + { /* We have .*\n. */ + STORE_JUMP (jump, b, laststart); + keep_string_p = true; + } + else + /* Anything else. */ + STORE_JUMP (maybe_pop_jump, b, laststart - 3); + + /* We've added more stuff to the buffer. */ + b += 3; + } + + /* On failure, jump from laststart to b + 3, which will be the + end of the buffer after this jump is inserted. */ + GET_BUFFER_SPACE (3); + INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump + : on_failure_jump, + laststart, b + 3); + pending_exact = 0; + b += 3; + + if (!zero_times_ok) + { + /* At least one repetition is required, so insert a + `dummy_failure_jump' before the initial + `on_failure_jump' instruction of the loop. This + effects a skip over that instruction the first time + we hit that loop. */ + GET_BUFFER_SPACE (3); + INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); + b += 3; + } + } + break; + + + case '.': + laststart = b; + BUF_PUSH (anychar); + break; + + + case '[': + { + boolean had_char_class = false; + + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); + + /* Ensure that we have enough space to push a charset: the + opcode, the length count, and the bitset; 34 bytes in all. */ + GET_BUFFER_SPACE (34); + + laststart = b; + + /* We test `*p == '^' twice, instead of using an if + statement, so we only need one BUF_PUSH. */ + BUF_PUSH (*p == '^' ? charset_not : charset); + if (*p == '^') + p++; + + /* Remember the first position in the bracket expression. */ + p1 = p; + + /* Push the number of bytes in the bitmap. */ + BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); + + /* Clear the whole map. */ + bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); + + /* charset_not matches newline according to a syntax bit. */ + if ((re_opcode_t) b[-2] == charset_not + && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) + SET_LIST_BIT ('\n'); + + /* Read in characters and ranges, setting map bits. */ + for (;;) + { + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); + + PATFETCH (c); + + /* \ might escape characters inside [...] and [^...]. */ + if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') + { + if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); + + PATFETCH (c1); + SET_LIST_BIT (c1); + continue; + } + + /* Could be the end of the bracket expression. If it's + not (i.e., when the bracket expression is `[]' so + far), the ']' character bit gets set way below. */ + if (c == ']' && p != p1 + 1) + break; + + /* Look ahead to see if it's a range when the last thing + was a character class. */ + if (had_char_class && c == '-' && *p != ']') + FREE_STACK_RETURN (REG_ERANGE); + + /* Look ahead to see if it's a range when the last thing + was a character: if this is a hyphen not at the + beginning or the end of a list, then it's the range + operator. */ + if (c == '-' + && !(p - 2 >= pattern && p[-2] == '[') + && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') + && *p != ']') + { + reg_errcode_t ret + = compile_range (&p, pend, translate, syntax, b); + if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); + } + + else if (p[0] == '-' && p[1] != ']') + { /* This handles ranges made up of characters only. */ + reg_errcode_t ret; + + /* Move past the `-'. */ + PATFETCH (c1); + + ret = compile_range (&p, pend, translate, syntax, b); + if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); + } + + /* See if we're at the beginning of a possible character + class. */ + + else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') + { /* Leave room for the null. */ + char str[CHAR_CLASS_MAX_LENGTH + 1]; + + PATFETCH (c); + c1 = 0; + + /* If pattern is `[[:'. */ + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); + + for (;;) + { + PATFETCH (c); + if (c == ':' || c == ']' || p == pend + || c1 == CHAR_CLASS_MAX_LENGTH) + break; + str[c1++] = c; + } + str[c1] = '\0'; + + /* If isn't a word bracketed by `[:' and:`]': + undo the ending character, the letters, and leave + the leading `:' and `[' (but set bits for them). */ + if (c == ':' && *p == ']') + { +#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) + boolean is_lower = STREQ (str, "lower"); + boolean is_upper = STREQ (str, "upper"); + wctype_t wt; + int ch; + + wt = wctype (str); + if (wt == 0) + FREE_STACK_RETURN (REG_ECTYPE); + + /* Throw away the ] at the end of the character + class. */ + PATFETCH (c); + + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); + + for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) + { + if (iswctype (btowc (ch), wt)) + SET_LIST_BIT (ch); + + if (translate && (is_upper || is_lower) + && (ISUPPER (ch) || ISLOWER (ch))) + SET_LIST_BIT (ch); + } + + had_char_class = true; +#else + int ch; + boolean is_alnum = STREQ (str, "alnum"); + boolean is_alpha = STREQ (str, "alpha"); + boolean is_blank = STREQ (str, "blank"); + boolean is_cntrl = STREQ (str, "cntrl"); + boolean is_digit = STREQ (str, "digit"); + boolean is_graph = STREQ (str, "graph"); + boolean is_lower = STREQ (str, "lower"); + boolean is_print = STREQ (str, "print"); + boolean is_punct = STREQ (str, "punct"); + boolean is_space = STREQ (str, "space"); + boolean is_upper = STREQ (str, "upper"); + boolean is_xdigit = STREQ (str, "xdigit"); + + if (!IS_CHAR_CLASS (str)) + FREE_STACK_RETURN (REG_ECTYPE); + + /* Throw away the ] at the end of the character + class. */ + PATFETCH (c); + + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); + + for (ch = 0; ch < 1 << BYTEWIDTH; ch++) + { + /* This was split into 3 if's to + avoid an arbitrary limit in some compiler. */ + if ( (is_alnum && ISALNUM (ch)) + || (is_alpha && ISALPHA (ch)) + || (is_blank && ISBLANK (ch)) + || (is_cntrl && ISCNTRL (ch))) + SET_LIST_BIT (ch); + if ( (is_digit && ISDIGIT (ch)) + || (is_graph && ISGRAPH (ch)) + || (is_lower && ISLOWER (ch)) + || (is_print && ISPRINT (ch))) + SET_LIST_BIT (ch); + if ( (is_punct && ISPUNCT (ch)) + || (is_space && ISSPACE (ch)) + || (is_upper && ISUPPER (ch)) + || (is_xdigit && ISXDIGIT (ch))) + SET_LIST_BIT (ch); + if ( translate && (is_upper || is_lower) + && (ISUPPER (ch) || ISLOWER (ch))) + SET_LIST_BIT (ch); + } + had_char_class = true; +#endif /* libc || wctype.h */ + } + else + { + c1++; + while (c1--) + PATUNFETCH; + SET_LIST_BIT ('['); + SET_LIST_BIT (':'); + had_char_class = false; + } + } + else + { + had_char_class = false; + SET_LIST_BIT (c); + } + } + + /* Discard any (non)matching list bytes that are all 0 at the + end of the map. Decrease the map-length byte too. */ + while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) + b[-1]--; + b += b[-1]; + } + break; + + + case '(': + if (syntax & RE_NO_BK_PARENS) + goto handle_open; + else + goto normal_char; + + + case ')': + if (syntax & RE_NO_BK_PARENS) + goto handle_close; + else + goto normal_char; + + + case '\n': + if (syntax & RE_NEWLINE_ALT) + goto handle_alt; + else + goto normal_char; + + + case '|': + if (syntax & RE_NO_BK_VBAR) + goto handle_alt; + else + goto normal_char; + + + case '{': + if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) + goto handle_interval; + else + goto normal_char; + + + case '\\': + if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); + + /* Do not translate the character after the \, so that we can + distinguish, e.g., \B from \b, even if we normally would + translate, e.g., B to b. */ + PATFETCH_RAW (c); + + switch (c) + { + case '(': + if (syntax & RE_NO_BK_PARENS) + goto normal_backslash; + + handle_open: + bufp->re_nsub++; + regnum++; + + if (COMPILE_STACK_FULL) + { + RETALLOC (compile_stack.stack, compile_stack.size << 1, + compile_stack_elt_t); + if (compile_stack.stack == NULL) return REG_ESPACE; + + compile_stack.size <<= 1; + } + + /* These are the values to restore when we hit end of this + group. They are all relative offsets, so that if the + whole pattern moves because of realloc, they will still + be valid. */ + COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; + COMPILE_STACK_TOP.fixup_alt_jump + = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; + COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; + COMPILE_STACK_TOP.regnum = regnum; + + /* We will eventually replace the 0 with the number of + groups inner to this one. But do not push a + start_memory for groups beyond the last one we can + represent in the compiled pattern. */ + if (regnum <= MAX_REGNUM) + { + COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; + BUF_PUSH_3 (start_memory, regnum, 0); + } + + compile_stack.avail++; + + fixup_alt_jump = 0; + laststart = 0; + begalt = b; + /* If we've reached MAX_REGNUM groups, then this open + won't actually generate any code, so we'll have to + clear pending_exact explicitly. */ + pending_exact = 0; + break; + + + case ')': + if (syntax & RE_NO_BK_PARENS) goto normal_backslash; + + if (COMPILE_STACK_EMPTY) + if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) + goto normal_backslash; + else + FREE_STACK_RETURN (REG_ERPAREN); + + handle_close: + if (fixup_alt_jump) + { /* Push a dummy failure point at the end of the + alternative for a possible future + `pop_failure_jump' to pop. See comments at + `push_dummy_failure' in `re_match_2'. */ + BUF_PUSH (push_dummy_failure); + + /* We allocated space for this jump when we assigned + to `fixup_alt_jump', in the `handle_alt' case below. */ + STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); + } + + /* See similar code for backslashed left paren above. */ + if (COMPILE_STACK_EMPTY) + if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) + goto normal_char; + else + FREE_STACK_RETURN (REG_ERPAREN); + + /* Since we just checked for an empty stack above, this + ``can't happen''. */ + assert (compile_stack.avail != 0); + { + /* We don't just want to restore into `regnum', because + later groups should continue to be numbered higher, + as in `(ab)c(de)' -- the second group is #2. */ + regnum_t this_group_regnum; + + compile_stack.avail--; + begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; + fixup_alt_jump + = COMPILE_STACK_TOP.fixup_alt_jump + ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 + : 0; + laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; + this_group_regnum = COMPILE_STACK_TOP.regnum; + /* If we've reached MAX_REGNUM groups, then this open + won't actually generate any code, so we'll have to + clear pending_exact explicitly. */ + pending_exact = 0; + + /* We're at the end of the group, so now we know how many + groups were inside this one. */ + if (this_group_regnum <= MAX_REGNUM) + { + unsigned char *inner_group_loc + = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; + + *inner_group_loc = regnum - this_group_regnum; + BUF_PUSH_3 (stop_memory, this_group_regnum, + regnum - this_group_regnum); + } + } + break; + + + case '|': /* `\|'. */ + if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) + goto normal_backslash; + handle_alt: + if (syntax & RE_LIMITED_OPS) + goto normal_char; + + /* Insert before the previous alternative a jump which + jumps to this alternative if the former fails. */ + GET_BUFFER_SPACE (3); + INSERT_JUMP (on_failure_jump, begalt, b + 6); + pending_exact = 0; + b += 3; + + /* The alternative before this one has a jump after it + which gets executed if it gets matched. Adjust that + jump so it will jump to this alternative's analogous + jump (put in below, which in turn will jump to the next + (if any) alternative's such jump, etc.). The last such + jump jumps to the correct final destination. A picture: + _____ _____ + | | | | + | v | v + a | b | c + + If we are at `b', then fixup_alt_jump right now points to a + three-byte space after `a'. We'll put in the jump, set + fixup_alt_jump to right after `b', and leave behind three + bytes which we'll fill in when we get to after `c'. */ + + if (fixup_alt_jump) + STORE_JUMP (jump_past_alt, fixup_alt_jump, b); + + /* Mark and leave space for a jump after this alternative, + to be filled in later either by next alternative or + when know we're at the end of a series of alternatives. */ + fixup_alt_jump = b; + GET_BUFFER_SPACE (3); + b += 3; + + laststart = 0; + begalt = b; + break; + + + case '{': + /* If \{ is a literal. */ + if (!(syntax & RE_INTERVALS) + /* If we're at `\{' and it's not the open-interval + operator. */ + || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) + || (p - 2 == pattern && p == pend)) + goto normal_backslash; + + handle_interval: + { + /* If got here, then the syntax allows intervals. */ + + /* At least (most) this many matches must be made. */ + int lower_bound = -1, upper_bound = -1; + + beg_interval = p - 1; + + if (p == pend) + { + if (syntax & RE_NO_BK_BRACES) + goto unfetch_interval; + else + FREE_STACK_RETURN (REG_EBRACE); + } + + GET_UNSIGNED_NUMBER (lower_bound); + + if (c == ',') + { + GET_UNSIGNED_NUMBER (upper_bound); + if (upper_bound < 0) upper_bound = RE_DUP_MAX; + } + else + /* Interval such as `{1}' => match exactly once. */ + upper_bound = lower_bound; + + if (lower_bound < 0 || upper_bound > RE_DUP_MAX + || lower_bound > upper_bound) + { + if (syntax & RE_NO_BK_BRACES) + goto unfetch_interval; + else + FREE_STACK_RETURN (REG_BADBR); + } + + if (!(syntax & RE_NO_BK_BRACES)) + { + if (c != '\\') FREE_STACK_RETURN (REG_EBRACE); + + PATFETCH (c); + } + + if (c != '}') + { + if (syntax & RE_NO_BK_BRACES) + goto unfetch_interval; + else + FREE_STACK_RETURN (REG_BADBR); + } + + /* We just parsed a valid interval. */ + + /* If it's invalid to have no preceding re. */ + if (!laststart) + { + if (syntax & RE_CONTEXT_INVALID_OPS) + FREE_STACK_RETURN (REG_BADRPT); + else if (syntax & RE_CONTEXT_INDEP_OPS) + laststart = b; + else + goto unfetch_interval; + } + + /* If the upper bound is zero, don't want to succeed at + all; jump from `laststart' to `b + 3', which will be + the end of the buffer after we insert the jump. */ + if (upper_bound == 0) + { + GET_BUFFER_SPACE (3); + INSERT_JUMP (jump, laststart, b + 3); + b += 3; + } + + /* Otherwise, we have a nontrivial interval. When + we're all done, the pattern will look like: + set_number_at + set_number_at + succeed_n + + jump_n + (The upper bound and `jump_n' are omitted if + `upper_bound' is 1, though.) */ + else + { /* If the upper bound is > 1, we need to insert + more at the end of the loop. */ + unsigned nbytes = 10 + (upper_bound > 1) * 10; + + GET_BUFFER_SPACE (nbytes); + + /* Initialize lower bound of the `succeed_n', even + though it will be set during matching by its + attendant `set_number_at' (inserted next), + because `re_compile_fastmap' needs to know. + Jump to the `jump_n' we might insert below. */ + INSERT_JUMP2 (succeed_n, laststart, + b + 5 + (upper_bound > 1) * 5, + lower_bound); + b += 5; + + /* Code to initialize the lower bound. Insert + before the `succeed_n'. The `5' is the last two + bytes of this `set_number_at', plus 3 bytes of + the following `succeed_n'. */ + insert_op2 (set_number_at, laststart, 5, lower_bound, b); + b += 5; + + if (upper_bound > 1) + { /* More than one repetition is allowed, so + append a backward jump to the `succeed_n' + that starts this interval. + + When we've reached this during matching, + we'll have matched the interval once, so + jump back only `upper_bound - 1' times. */ + STORE_JUMP2 (jump_n, b, laststart + 5, + upper_bound - 1); + b += 5; + + /* The location we want to set is the second + parameter of the `jump_n'; that is `b-2' as + an absolute address. `laststart' will be + the `set_number_at' we're about to insert; + `laststart+3' the number to set, the source + for the relative address. But we are + inserting into the middle of the pattern -- + so everything is getting moved up by 5. + Conclusion: (b - 2) - (laststart + 3) + 5, + i.e., b - laststart. + + We insert this at the beginning of the loop + so that if we fail during matching, we'll + reinitialize the bounds. */ + insert_op2 (set_number_at, laststart, b - laststart, + upper_bound - 1, b); + b += 5; + } + } + pending_exact = 0; + beg_interval = NULL; + } + break; + + unfetch_interval: + /* If an invalid interval, match the characters as literals. */ + assert (beg_interval); + p = beg_interval; + beg_interval = NULL; + + /* normal_char and normal_backslash need `c'. */ + PATFETCH (c); + + if (!(syntax & RE_NO_BK_BRACES)) + { + if (p > pattern && p[-1] == '\\') + goto normal_backslash; + } + goto normal_char; + +#ifdef emacs + /* There is no way to specify the before_dot and after_dot + operators. rms says this is ok. --karl */ + case '=': + BUF_PUSH (at_dot); + break; + + case 's': + laststart = b; + PATFETCH (c); + BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); + break; + + case 'S': + laststart = b; + PATFETCH (c); + BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); + break; +#endif /* emacs */ + + + case 'w': + if (re_syntax_options & RE_NO_GNU_OPS) + goto normal_char; + laststart = b; + BUF_PUSH (wordchar); + break; + + + case 'W': + if (re_syntax_options & RE_NO_GNU_OPS) + goto normal_char; + laststart = b; + BUF_PUSH (notwordchar); + break; + + + case '<': + if (re_syntax_options & RE_NO_GNU_OPS) + goto normal_char; + BUF_PUSH (wordbeg); + break; + + case '>': + if (re_syntax_options & RE_NO_GNU_OPS) + goto normal_char; + BUF_PUSH (wordend); + break; + + case 'b': + if (re_syntax_options & RE_NO_GNU_OPS) + goto normal_char; + BUF_PUSH (wordbound); + break; + + case 'B': + if (re_syntax_options & RE_NO_GNU_OPS) + goto normal_char; + BUF_PUSH (notwordbound); + break; + + case '`': + if (re_syntax_options & RE_NO_GNU_OPS) + goto normal_char; + BUF_PUSH (begbuf); + break; + + case '\'': + if (re_syntax_options & RE_NO_GNU_OPS) + goto normal_char; + BUF_PUSH (endbuf); + break; + + case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + if (syntax & RE_NO_BK_REFS) + goto normal_char; + + c1 = c - '0'; + + if (c1 > regnum) + FREE_STACK_RETURN (REG_ESUBREG); + + /* Can't back reference to a subexpression if inside of it. */ + if (group_in_compile_stack (compile_stack, (regnum_t) c1)) + goto normal_char; + + laststart = b; + BUF_PUSH_2 (duplicate, c1); + break; + + + case '+': + case '?': + if (syntax & RE_BK_PLUS_QM) + goto handle_plus; + else + goto normal_backslash; + + default: + normal_backslash: + /* You might think it would be useful for \ to mean + not to translate; but if we don't translate it + it will never match anything. */ + c = TRANSLATE (c); + goto normal_char; + } + break; + + + default: + /* Expects the character in `c'. */ + normal_char: + /* If no exactn currently being built. */ + if (!pending_exact + + /* If last exactn not at current position. */ + || pending_exact + *pending_exact + 1 != b + + /* We have only one byte following the exactn for the count. */ + || *pending_exact == (1 << BYTEWIDTH) - 1 + + /* If followed by a repetition operator. */ + || *p == '*' || *p == '^' + || ((syntax & RE_BK_PLUS_QM) + ? *p == '\\' && (p[1] == '+' || p[1] == '?') + : (*p == '+' || *p == '?')) + || ((syntax & RE_INTERVALS) + && ((syntax & RE_NO_BK_BRACES) + ? *p == '{' + : (p[0] == '\\' && p[1] == '{')))) + { + /* Start building a new exactn. */ + + laststart = b; + + BUF_PUSH_2 (exactn, 0); + pending_exact = b - 1; + } + + BUF_PUSH (c); + (*pending_exact)++; + break; + } /* switch (c) */ + } /* while p != pend */ + + + /* Through the pattern now. */ + + if (fixup_alt_jump) + STORE_JUMP (jump_past_alt, fixup_alt_jump, b); + + if (!COMPILE_STACK_EMPTY) + FREE_STACK_RETURN (REG_EPAREN); + + /* If we don't want backtracking, force success + the first time we reach the end of the compiled pattern. */ + if (syntax & RE_NO_POSIX_BACKTRACKING) + BUF_PUSH (succeed); + + free (compile_stack.stack); + + /* We have succeeded; set the length of the buffer. */ + bufp->used = b - bufp->buffer; + +#ifdef DEBUG + if (debug) + { + DEBUG_PRINT1 ("\nCompiled pattern: \n"); + print_compiled_pattern (bufp); + } +#endif /* DEBUG */ + +#ifndef MATCH_MAY_ALLOCATE + /* Initialize the failure stack to the largest possible stack. This + isn't necessary unless we're trying to avoid calling alloca in + the search and match routines. */ + { + int num_regs = bufp->re_nsub + 1; + + /* Since DOUBLE_FAIL_STACK refuses to double only if the current size + is strictly greater than re_max_failures, the largest possible stack + is 2 * re_max_failures failure points. */ + if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) + { + fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); + +#ifdef emacs + if (! fail_stack.stack) + fail_stack.stack + = (fail_stack_elt_t *) xmalloc (fail_stack.size + * sizeof (fail_stack_elt_t)); + else + fail_stack.stack + = (fail_stack_elt_t *) xrealloc (fail_stack.stack, + (fail_stack.size + * sizeof (fail_stack_elt_t))); +#else /* not emacs */ + if (! fail_stack.stack) + fail_stack.stack + = (fail_stack_elt_t *) malloc (fail_stack.size + * sizeof (fail_stack_elt_t)); + else + fail_stack.stack + = (fail_stack_elt_t *) realloc (fail_stack.stack, + (fail_stack.size + * sizeof (fail_stack_elt_t))); +#endif /* not emacs */ + } + + regex_grow_registers (num_regs); + } +#endif /* not MATCH_MAY_ALLOCATE */ + + return REG_NOERROR; +} /* regex_compile */ + +/* Subroutines for `regex_compile'. */ + +/* Store OP at LOC followed by two-byte integer parameter ARG. */ + +static void +store_op1 (op, loc, arg) + re_opcode_t op; + unsigned char *loc; + int arg; +{ + *loc = (unsigned char) op; + STORE_NUMBER (loc + 1, arg); +} + + +/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ + +static void +store_op2 (op, loc, arg1, arg2) + re_opcode_t op; + unsigned char *loc; + int arg1, arg2; +{ + *loc = (unsigned char) op; + STORE_NUMBER (loc + 1, arg1); + STORE_NUMBER (loc + 3, arg2); +} + + +/* Copy the bytes from LOC to END to open up three bytes of space at LOC + for OP followed by two-byte integer parameter ARG. */ + +static void +insert_op1 (op, loc, arg, end) + re_opcode_t op; + unsigned char *loc; + int arg; + unsigned char *end; +{ + register unsigned char *pfrom = end; + register unsigned char *pto = end + 3; + + while (pfrom != loc) + *--pto = *--pfrom; + + store_op1 (op, loc, arg); +} + + +/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ + +static void +insert_op2 (op, loc, arg1, arg2, end) + re_opcode_t op; + unsigned char *loc; + int arg1, arg2; + unsigned char *end; +{ + register unsigned char *pfrom = end; + register unsigned char *pto = end + 5; + + while (pfrom != loc) + *--pto = *--pfrom; + + store_op2 (op, loc, arg1, arg2); +} + + +/* P points to just after a ^ in PATTERN. Return true if that ^ comes + after an alternative or a begin-subexpression. We assume there is at + least one character before the ^. */ + +static boolean +at_begline_loc_p (pattern, p, syntax) + const char *pattern, *p; + reg_syntax_t syntax; +{ + const char *prev = p - 2; + boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; + + return + /* After a subexpression? */ + (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) + /* After an alternative? */ + || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); +} + + +/* The dual of at_begline_loc_p. This one is for $. We assume there is + at least one character after the $, i.e., `P < PEND'. */ + +static boolean +at_endline_loc_p (p, pend, syntax) + const char *p, *pend; + reg_syntax_t syntax; +{ + const char *next = p; + boolean next_backslash = *next == '\\'; + const char *next_next = p + 1 < pend ? p + 1 : 0; + + return + /* Before a subexpression? */ + (syntax & RE_NO_BK_PARENS ? *next == ')' + : next_backslash && next_next && *next_next == ')') + /* Before an alternative? */ + || (syntax & RE_NO_BK_VBAR ? *next == '|' + : next_backslash && next_next && *next_next == '|'); +} + + +/* Returns true if REGNUM is in one of COMPILE_STACK's elements and + false if it's not. */ + +static boolean +group_in_compile_stack (compile_stack, regnum) + compile_stack_type compile_stack; + regnum_t regnum; +{ + int this_element; + + for (this_element = compile_stack.avail - 1; + this_element >= 0; + this_element--) + if (compile_stack.stack[this_element].regnum == regnum) + return true; + + return false; +} + + +/* Read the ending character of a range (in a bracket expression) from the + uncompiled pattern *P_PTR (which ends at PEND). We assume the + starting character is in `P[-2]'. (`P[-1]' is the character `-'.) + Then we set the translation of all bits between the starting and + ending characters (inclusive) in the compiled pattern B. + + Return an error code. + + We use these short variable names so we can use the same macros as + `regex_compile' itself. */ + +static reg_errcode_t +compile_range (p_ptr, pend, translate, syntax, b) + const char **p_ptr, *pend; + RE_TRANSLATE_TYPE translate; + reg_syntax_t syntax; + unsigned char *b; +{ + unsigned this_char; + + const char *p = *p_ptr; + unsigned int range_start, range_end; + + if (p == pend) + return REG_ERANGE; + + /* Even though the pattern is a signed `char *', we need to fetch + with unsigned char *'s; if the high bit of the pattern character + is set, the range endpoints will be negative if we fetch using a + signed char *. + + We also want to fetch the endpoints without translating them; the + appropriate translation is done in the bit-setting loop below. */ + /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ + range_start = ((const unsigned char *) p)[-2]; + range_end = ((const unsigned char *) p)[0]; + + /* Have to increment the pointer into the pattern string, so the + caller isn't still at the ending character. */ + (*p_ptr)++; + + /* If the start is after the end, the range is empty. */ + if (range_start > range_end) + return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; + + /* Here we see why `this_char' has to be larger than an `unsigned + char' -- the range is inclusive, so if `range_end' == 0xff + (assuming 8-bit characters), we would otherwise go into an infinite + loop, since all characters <= 0xff. */ + for (this_char = range_start; this_char <= range_end; this_char++) + { + SET_LIST_BIT (TRANSLATE (this_char)); + } + + return REG_NOERROR; +} + +/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in + BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible + characters can start a string that matches the pattern. This fastmap + is used by re_search to skip quickly over impossible starting points. + + The caller must supply the address of a (1 << BYTEWIDTH)-byte data + area as BUFP->fastmap. + + We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in + the pattern buffer. + + Returns 0 if we succeed, -2 if an internal error. */ + +int +re_compile_fastmap (bufp) + struct re_pattern_buffer *bufp; +{ + int j, k; +#ifdef MATCH_MAY_ALLOCATE + fail_stack_type fail_stack; +#endif +#ifndef REGEX_MALLOC + char *destination; +#endif + /* We don't push any register information onto the failure stack. */ + unsigned num_regs = 0; + + register char *fastmap = bufp->fastmap; + unsigned char *pattern = bufp->buffer; + unsigned char *p = pattern; + register unsigned char *pend = pattern + bufp->used; + +#ifdef REL_ALLOC + /* This holds the pointer to the failure stack, when + it is allocated relocatably. */ + fail_stack_elt_t *failure_stack_ptr; +#endif + + /* Assume that each path through the pattern can be null until + proven otherwise. We set this false at the bottom of switch + statement, to which we get only if a particular path doesn't + match the empty string. */ + boolean path_can_be_null = true; + + /* We aren't doing a `succeed_n' to begin with. */ + boolean succeed_n_p = false; + + assert (fastmap != NULL && p != NULL); + + INIT_FAIL_STACK (); + bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ + bufp->fastmap_accurate = 1; /* It will be when we're done. */ + bufp->can_be_null = 0; + + while (1) + { + if (p == pend || *p == succeed) + { + /* We have reached the (effective) end of pattern. */ + if (!FAIL_STACK_EMPTY ()) + { + bufp->can_be_null |= path_can_be_null; + + /* Reset for next path. */ + path_can_be_null = true; + + p = fail_stack.stack[--fail_stack.avail].pointer; + + continue; + } + else + break; + } + + /* We should never be about to go beyond the end of the pattern. */ + assert (p < pend); + + switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) + { + + /* I guess the idea here is to simply not bother with a fastmap + if a backreference is used, since it's too hard to figure out + the fastmap for the corresponding group. Setting + `can_be_null' stops `re_search_2' from using the fastmap, so + that is all we do. */ + case duplicate: + bufp->can_be_null = 1; + goto done; + + + /* Following are the cases which match a character. These end + with `break'. */ + + case exactn: + fastmap[p[1]] = 1; + break; + + + case charset: + for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) + if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) + fastmap[j] = 1; + break; + + + case charset_not: + /* Chars beyond end of map must be allowed. */ + for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) + fastmap[j] = 1; + + for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) + if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) + fastmap[j] = 1; + break; + + + case wordchar: + for (j = 0; j < (1 << BYTEWIDTH); j++) + if (SYNTAX (j) == Sword) + fastmap[j] = 1; + break; + + + case notwordchar: + for (j = 0; j < (1 << BYTEWIDTH); j++) + if (SYNTAX (j) != Sword) + fastmap[j] = 1; + break; + + + case anychar: + { + int fastmap_newline = fastmap['\n']; + + /* `.' matches anything ... */ + for (j = 0; j < (1 << BYTEWIDTH); j++) + fastmap[j] = 1; + + /* ... except perhaps newline. */ + if (!(bufp->syntax & RE_DOT_NEWLINE)) + fastmap['\n'] = fastmap_newline; + + /* Return if we have already set `can_be_null'; if we have, + then the fastmap is irrelevant. Something's wrong here. */ + else if (bufp->can_be_null) + goto done; + + /* Otherwise, have to check alternative paths. */ + break; + } + +#ifdef emacs + case syntaxspec: + k = *p++; + for (j = 0; j < (1 << BYTEWIDTH); j++) + if (SYNTAX (j) == (enum syntaxcode) k) + fastmap[j] = 1; + break; + + + case notsyntaxspec: + k = *p++; + for (j = 0; j < (1 << BYTEWIDTH); j++) + if (SYNTAX (j) != (enum syntaxcode) k) + fastmap[j] = 1; + break; + + + /* All cases after this match the empty string. These end with + `continue'. */ + + + case before_dot: + case at_dot: + case after_dot: + continue; +#endif /* emacs */ + + + case no_op: + case begline: + case endline: + case begbuf: + case endbuf: + case wordbound: + case notwordbound: + case wordbeg: + case wordend: + case push_dummy_failure: + continue; + + + case jump_n: + case pop_failure_jump: + case maybe_pop_jump: + case jump: + case jump_past_alt: + case dummy_failure_jump: + EXTRACT_NUMBER_AND_INCR (j, p); + p += j; + if (j > 0) + continue; + + /* Jump backward implies we just went through the body of a + loop and matched nothing. Opcode jumped to should be + `on_failure_jump' or `succeed_n'. Just treat it like an + ordinary jump. For a * loop, it has pushed its failure + point already; if so, discard that as redundant. */ + if ((re_opcode_t) *p != on_failure_jump + && (re_opcode_t) *p != succeed_n) + continue; + + p++; + EXTRACT_NUMBER_AND_INCR (j, p); + p += j; + + /* If what's on the stack is where we are now, pop it. */ + if (!FAIL_STACK_EMPTY () + && fail_stack.stack[fail_stack.avail - 1].pointer == p) + fail_stack.avail--; + + continue; + + + case on_failure_jump: + case on_failure_keep_string_jump: + handle_on_failure_jump: + EXTRACT_NUMBER_AND_INCR (j, p); + + /* For some patterns, e.g., `(a?)?', `p+j' here points to the + end of the pattern. We don't want to push such a point, + since when we restore it above, entering the switch will + increment `p' past the end of the pattern. We don't need + to push such a point since we obviously won't find any more + fastmap entries beyond `pend'. Such a pattern can match + the null string, though. */ + if (p + j < pend) + { + if (!PUSH_PATTERN_OP (p + j, fail_stack)) + { + RESET_FAIL_STACK (); + return -2; + } + } + else + bufp->can_be_null = 1; + + if (succeed_n_p) + { + EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ + succeed_n_p = false; + } + + continue; + + + case succeed_n: + /* Get to the number of times to succeed. */ + p += 2; + + /* Increment p past the n for when k != 0. */ + EXTRACT_NUMBER_AND_INCR (k, p); + if (k == 0) + { + p -= 4; + succeed_n_p = true; /* Spaghetti code alert. */ + goto handle_on_failure_jump; + } + continue; + + + case set_number_at: + p += 4; + continue; + + + case start_memory: + case stop_memory: + p += 2; + continue; + + + default: + abort (); /* We have listed all the cases. */ + } /* switch *p++ */ + + /* Getting here means we have found the possible starting + characters for one path of the pattern -- and that the empty + string does not match. We need not follow this path further. + Instead, look at the next alternative (remembered on the + stack), or quit if no more. The test at the top of the loop + does these things. */ + path_can_be_null = false; + p = pend; + } /* while p */ + + /* Set `can_be_null' for the last path (also the first path, if the + pattern is empty). */ + bufp->can_be_null |= path_can_be_null; + + done: + RESET_FAIL_STACK (); + return 0; +} /* re_compile_fastmap */ + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use + this memory for recording register information. STARTS and ENDS + must be allocated using the malloc library routine, and must each + be at least NUM_REGS * sizeof (regoff_t) bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ + +void +re_set_registers (bufp, regs, num_regs, starts, ends) + struct re_pattern_buffer *bufp; + struct re_registers *regs; + unsigned num_regs; + regoff_t *starts, *ends; +{ + if (num_regs) + { + bufp->regs_allocated = REGS_REALLOCATE; + regs->num_regs = num_regs; + regs->start = starts; + regs->end = ends; + } + else + { + bufp->regs_allocated = REGS_UNALLOCATED; + regs->num_regs = 0; + regs->start = regs->end = (regoff_t *) 0; + } +} + +/* Searching routines. */ + +/* Like re_search_2, below, but only one string is specified, and + doesn't let you say where to stop matching. */ + +int +re_search (bufp, string, size, startpos, range, regs) + struct re_pattern_buffer *bufp; + const char *string; + int size, startpos, range; + struct re_registers *regs; +{ + return re_search_2 (bufp, NULL, 0, string, size, startpos, range, + regs, size); +} + + +/* Using the compiled pattern in BUFP->buffer, first tries to match the + virtual concatenation of STRING1 and STRING2, starting first at index + STARTPOS, then at STARTPOS + 1, and so on. + + STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. + + RANGE is how far to scan while trying to match. RANGE = 0 means try + only at STARTPOS; in general, the last start tried is STARTPOS + + RANGE. + + In REGS, return the indices of the virtual concatenation of STRING1 + and STRING2 that matched the entire BUFP->buffer and its contained + subexpressions. + + Do not consider matching one past the index STOP in the virtual + concatenation of STRING1 and STRING2. + + We return either the position in the strings at which the match was + found, -1 if no match, or -2 if error (such as failure + stack overflow). */ + +int +re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int size1, size2; + int startpos; + int range; + struct re_registers *regs; + int stop; +{ + int val; + register char *fastmap = bufp->fastmap; + register RE_TRANSLATE_TYPE translate = bufp->translate; + int total_size = size1 + size2; + int endpos = startpos + range; + + /* Check for out-of-range STARTPOS. */ + if (startpos < 0 || startpos > total_size) + return -1; + + /* Fix up RANGE if it might eventually take us outside + the virtual concatenation of STRING1 and STRING2. + Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */ + if (endpos < 0) + range = 0 - startpos; + else if (endpos > total_size) + range = total_size - startpos; + + /* If the search isn't to be a backwards one, don't waste time in a + search for a pattern that must be anchored. */ + if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) + { + if (startpos > 0) + return -1; + else + range = 1; + } + +#ifdef emacs + /* In a forward search for something that starts with \=. + don't keep searching past point. */ + if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) + { + range = PT - startpos; + if (range <= 0) + return -1; + } +#endif /* emacs */ + + /* Update the fastmap now if not correct already. */ + if (fastmap && !bufp->fastmap_accurate) + if (re_compile_fastmap (bufp) == -2) + return -2; + + /* Loop through the string, looking for a place to start matching. */ + for (;;) + { + /* If a fastmap is supplied, skip quickly over characters that + cannot be the start of a match. If the pattern can match the + null string, however, we don't need to skip characters; we want + the first null string. */ + if (fastmap && startpos < total_size && !bufp->can_be_null) + { + if (range > 0) /* Searching forwards. */ + { + register const char *d; + register int lim = 0; + int irange = range; + + if (startpos < size1 && startpos + range >= size1) + lim = range - (size1 - startpos); + + d = (startpos >= size1 ? string2 - size1 : string1) + startpos; + + /* Written out as an if-else to avoid testing `translate' + inside the loop. */ + if (translate) + while (range > lim + && !fastmap[(unsigned char) + translate[(unsigned char) *d++]]) + range--; + else + while (range > lim && !fastmap[(unsigned char) *d++]) + range--; + + startpos += irange - range; + } + else /* Searching backwards. */ + { + register char c = (size1 == 0 || startpos >= size1 + ? string2[startpos - size1] + : string1[startpos]); + + if (!fastmap[(unsigned char) TRANSLATE (c)]) + goto advance; + } + } + + /* If can't match the null string, and that's all we have left, fail. */ + if (range >= 0 && startpos == total_size && fastmap + && !bufp->can_be_null) + return -1; + + val = re_match_2_internal (bufp, string1, size1, string2, size2, + startpos, regs, stop); +#ifndef REGEX_MALLOC +#ifdef C_ALLOCA + alloca (0); +#endif +#endif + + if (val >= 0) + return startpos; + + if (val == -2) + return -2; + + advance: + if (!range) + break; + else if (range > 0) + { + range--; + startpos++; + } + else + { + range++; + startpos--; + } + } + return -1; +} /* re_search_2 */ + +/* This converts PTR, a pointer into one of the search strings `string1' + and `string2' into an offset from the beginning of that string. */ +#define POINTER_TO_OFFSET(ptr) \ + (FIRST_STRING_P (ptr) \ + ? ((regoff_t) ((ptr) - string1)) \ + : ((regoff_t) ((ptr) - string2 + size1))) + +/* Macros for dealing with the split strings in re_match_2. */ + +#define MATCHING_IN_FIRST_STRING (dend == end_match_1) + +/* Call before fetching a character with *d. This switches over to + string2 if necessary. */ +#define PREFETCH() \ + while (d == dend) \ + { \ + /* End of string2 => fail. */ \ + if (dend == end_match_2) \ + goto fail; \ + /* End of string1 => advance to string2. */ \ + d = string2; \ + dend = end_match_2; \ + } + + +/* Test if at very beginning or at very end of the virtual concatenation + of `string1' and `string2'. If only one string, it's `string2'. */ +#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) +#define AT_STRINGS_END(d) ((d) == end2) + + +/* Test if D points to a character which is word-constituent. We have + two special cases to check for: if past the end of string1, look at + the first character in string2; and if before the beginning of + string2, look at the last character in string1. */ +#define WORDCHAR_P(d) \ + (SYNTAX ((d) == end1 ? *string2 \ + : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ + == Sword) + +/* Disabled due to a compiler bug -- see comment at case wordbound */ +#if 0 +/* Test if the character before D and the one at D differ with respect + to being word-constituent. */ +#define AT_WORD_BOUNDARY(d) \ + (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ + || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) +#endif + +/* Free everything we malloc. */ +#ifdef MATCH_MAY_ALLOCATE +#define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL +#define FREE_VARIABLES() \ + do { \ + REGEX_FREE_STACK (fail_stack.stack); \ + FREE_VAR (regstart); \ + FREE_VAR (regend); \ + FREE_VAR (old_regstart); \ + FREE_VAR (old_regend); \ + FREE_VAR (best_regstart); \ + FREE_VAR (best_regend); \ + FREE_VAR (reg_info); \ + FREE_VAR (reg_dummy); \ + FREE_VAR (reg_info_dummy); \ + } while (0) +#else +#define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */ +#endif /* not MATCH_MAY_ALLOCATE */ + +/* These values must meet several constraints. They must not be valid + register values; since we have a limit of 255 registers (because + we use only one byte in the pattern for the register number), we can + use numbers larger than 255. They must differ by 1, because of + NUM_FAILURE_ITEMS above. And the value for the lowest register must + be larger than the value for the highest register, so we do not try + to actually save any registers when none are active. */ +#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) +#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) + +/* Matching routines. */ + +#ifndef emacs /* Emacs never uses this. */ +/* re_match is like re_match_2 except it takes only a single string. */ + +int +re_match (bufp, string, size, pos, regs) + struct re_pattern_buffer *bufp; + const char *string; + int size, pos; + struct re_registers *regs; +{ + int result = re_match_2_internal (bufp, NULL, 0, string, size, + pos, regs, size); +#ifndef REGEX_MALLOC +#ifdef C_ALLOCA + alloca (0); +#endif +#endif + return result; +} +#endif /* not emacs */ + +static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p, + unsigned char *end, + register_info_type *reg_info)); +static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p, + unsigned char *end, + register_info_type *reg_info)); +static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p, + unsigned char *end, + register_info_type *reg_info)); +static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2, + int len, char *translate)); + +/* re_match_2 matches the compiled pattern in BUFP against the + the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 + and SIZE2, respectively). We start matching at POS, and stop + matching at STOP. + + If REGS is non-null and the `no_sub' field of BUFP is nonzero, we + store offsets for the substring each group matched in REGS. See the + documentation for exactly how many groups we fill. + + We return -1 if no match, -2 if an internal error (such as the + failure stack overflowing). Otherwise, we return the length of the + matched substring. */ + +int +re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int size1, size2; + int pos; + struct re_registers *regs; + int stop; +{ + int result = re_match_2_internal (bufp, string1, size1, string2, size2, + pos, regs, stop); +#ifndef REGEX_MALLOC +#ifdef C_ALLOCA + alloca (0); +#endif +#endif + return result; +} + +/* This is a separate function so that we can force an alloca cleanup + afterwards. */ +static int +re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int size1, size2; + int pos; + struct re_registers *regs; + int stop; +{ + /* General temporaries. */ + int mcnt; + unsigned char *p1; + + /* Just past the end of the corresponding string. */ + const char *end1, *end2; + + /* Pointers into string1 and string2, just past the last characters in + each to consider matching. */ + const char *end_match_1, *end_match_2; + + /* Where we are in the data, and the end of the current string. */ + const char *d, *dend; + + /* Where we are in the pattern, and the end of the pattern. */ + unsigned char *p = bufp->buffer; + register unsigned char *pend = p + bufp->used; + + /* Mark the opcode just after a start_memory, so we can test for an + empty subpattern when we get to the stop_memory. */ + unsigned char *just_past_start_mem = 0; + + /* We use this to map every character in the string. */ + RE_TRANSLATE_TYPE translate = bufp->translate; + + /* Failure point stack. Each place that can handle a failure further + down the line pushes a failure point on this stack. It consists of + restart, regend, and reg_info for all registers corresponding to + the subexpressions we're currently inside, plus the number of such + registers, and, finally, two char *'s. The first char * is where + to resume scanning the pattern; the second one is where to resume + scanning the strings. If the latter is zero, the failure point is + a ``dummy''; if a failure happens and the failure point is a dummy, + it gets discarded and the next next one is tried. */ +#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ + fail_stack_type fail_stack; +#endif +#ifdef DEBUG + static unsigned failure_id = 0; + unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; +#endif + +#ifdef REL_ALLOC + /* This holds the pointer to the failure stack, when + it is allocated relocatably. */ + fail_stack_elt_t *failure_stack_ptr; +#endif + + /* We fill all the registers internally, independent of what we + return, for use in backreferences. The number here includes + an element for register zero. */ + size_t num_regs = bufp->re_nsub + 1; + + /* The currently active registers. */ + active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; + active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; + + /* Information on the contents of registers. These are pointers into + the input strings; they record just what was matched (on this + attempt) by a subexpression part of the pattern, that is, the + regnum-th regstart pointer points to where in the pattern we began + matching and the regnum-th regend points to right after where we + stopped matching the regnum-th subexpression. (The zeroth register + keeps track of what the whole pattern matches.) */ +#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ + const char **regstart, **regend; +#endif + + /* If a group that's operated upon by a repetition operator fails to + match anything, then the register for its start will need to be + restored because it will have been set to wherever in the string we + are when we last see its open-group operator. Similarly for a + register's end. */ +#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ + const char **old_regstart, **old_regend; +#endif + + /* The is_active field of reg_info helps us keep track of which (possibly + nested) subexpressions we are currently in. The matched_something + field of reg_info[reg_num] helps us tell whether or not we have + matched any of the pattern so far this time through the reg_num-th + subexpression. These two fields get reset each time through any + loop their register is in. */ +#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ + register_info_type *reg_info; +#endif + + /* The following record the register info as found in the above + variables when we find a match better than any we've seen before. + This happens as we backtrack through the failure points, which in + turn happens only if we have not yet matched the entire string. */ + unsigned best_regs_set = false; +#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ + const char **best_regstart, **best_regend; +#endif + + /* Logically, this is `best_regend[0]'. But we don't want to have to + allocate space for that if we're not allocating space for anything + else (see below). Also, we never need info about register 0 for + any of the other register vectors, and it seems rather a kludge to + treat `best_regend' differently than the rest. So we keep track of + the end of the best match so far in a separate variable. We + initialize this to NULL so that when we backtrack the first time + and need to test it, it's not garbage. */ + const char *match_end = NULL; + + /* This helps SET_REGS_MATCHED avoid doing redundant work. */ + int set_regs_matched_done = 0; + + /* Used when we pop values we don't care about. */ +#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ + const char **reg_dummy; + register_info_type *reg_info_dummy; +#endif + +#ifdef DEBUG + /* Counts the total number of registers pushed. */ + unsigned num_regs_pushed = 0; +#endif + + DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); + + INIT_FAIL_STACK (); + +#ifdef MATCH_MAY_ALLOCATE + /* Do not bother to initialize all the register variables if there are + no groups in the pattern, as it takes a fair amount of time. If + there are groups, we include space for register 0 (the whole + pattern), even though we never use it, since it simplifies the + array indexing. We should fix this. */ + if (bufp->re_nsub) + { + regstart = REGEX_TALLOC (num_regs, const char *); + regend = REGEX_TALLOC (num_regs, const char *); + old_regstart = REGEX_TALLOC (num_regs, const char *); + old_regend = REGEX_TALLOC (num_regs, const char *); + best_regstart = REGEX_TALLOC (num_regs, const char *); + best_regend = REGEX_TALLOC (num_regs, const char *); + reg_info = REGEX_TALLOC (num_regs, register_info_type); + reg_dummy = REGEX_TALLOC (num_regs, const char *); + reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); + + if (!(regstart && regend && old_regstart && old_regend && reg_info + && best_regstart && best_regend && reg_dummy && reg_info_dummy)) + { + FREE_VARIABLES (); + return -2; + } + } + else + { + /* We must initialize all our variables to NULL, so that + `FREE_VARIABLES' doesn't try to free them. */ + regstart = regend = old_regstart = old_regend = best_regstart + = best_regend = reg_dummy = NULL; + reg_info = reg_info_dummy = (register_info_type *) NULL; + } +#endif /* MATCH_MAY_ALLOCATE */ + + /* The starting position is bogus. */ + if (pos < 0 || pos > size1 + size2) + { + FREE_VARIABLES (); + return -1; + } + + /* Initialize subexpression text positions to -1 to mark ones that no + start_memory/stop_memory has been seen for. Also initialize the + register information struct. */ + for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) + { + regstart[mcnt] = regend[mcnt] + = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; + + REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; + IS_ACTIVE (reg_info[mcnt]) = 0; + MATCHED_SOMETHING (reg_info[mcnt]) = 0; + EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; + } + + /* We move `string1' into `string2' if the latter's empty -- but not if + `string1' is null. */ + if (size2 == 0 && string1 != NULL) + { + string2 = string1; + size2 = size1; + string1 = 0; + size1 = 0; + } + end1 = string1 + size1; + end2 = string2 + size2; + + /* Compute where to stop matching, within the two strings. */ + if (stop <= size1) + { + end_match_1 = string1 + stop; + end_match_2 = string2; + } + else + { + end_match_1 = end1; + end_match_2 = string2 + stop - size1; + } + + /* `p' scans through the pattern as `d' scans through the data. + `dend' is the end of the input string that `d' points within. `d' + is advanced into the following input string whenever necessary, but + this happens before fetching; therefore, at the beginning of the + loop, `d' can be pointing at the end of a string, but it cannot + equal `string2'. */ + if (size1 > 0 && pos <= size1) + { + d = string1 + pos; + dend = end_match_1; + } + else + { + d = string2 + pos - size1; + dend = end_match_2; + } + + DEBUG_PRINT1 ("The compiled pattern is:\n"); + DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); + DEBUG_PRINT1 ("The string to match is: `"); + DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); + DEBUG_PRINT1 ("'\n"); + + /* This loops over pattern commands. It exits by returning from the + function if the match is complete, or it drops through if the match + fails at this starting point in the input data. */ + for (;;) + { +#ifdef _LIBC + DEBUG_PRINT2 ("\n%p: ", p); +#else + DEBUG_PRINT2 ("\n0x%x: ", p); +#endif + + if (p == pend) + { /* End of pattern means we might have succeeded. */ + DEBUG_PRINT1 ("end of pattern ... "); + + /* If we haven't matched the entire string, and we want the + longest match, try backtracking. */ + if (d != end_match_2) + { + /* 1 if this match ends in the same string (string1 or string2) + as the best previous match. */ + boolean same_str_p = (FIRST_STRING_P (match_end) + == MATCHING_IN_FIRST_STRING); + /* 1 if this match is the best seen so far. */ + boolean best_match_p; + + /* AIX compiler got confused when this was combined + with the previous declaration. */ + if (same_str_p) + best_match_p = d > match_end; + else + best_match_p = !MATCHING_IN_FIRST_STRING; + + DEBUG_PRINT1 ("backtracking.\n"); + + if (!FAIL_STACK_EMPTY ()) + { /* More failure points to try. */ + + /* If exceeds best match so far, save it. */ + if (!best_regs_set || best_match_p) + { + best_regs_set = true; + match_end = d; + + DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); + + for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) + { + best_regstart[mcnt] = regstart[mcnt]; + best_regend[mcnt] = regend[mcnt]; + } + } + goto fail; + } + + /* If no failure points, don't restore garbage. And if + last match is real best match, don't restore second + best one. */ + else if (best_regs_set && !best_match_p) + { + restore_best_regs: + /* Restore best match. It may happen that `dend == + end_match_1' while the restored d is in string2. + For example, the pattern `x.*y.*z' against the + strings `x-' and `y-z-', if the two strings are + not consecutive in memory. */ + DEBUG_PRINT1 ("Restoring best registers.\n"); + + d = match_end; + dend = ((d >= string1 && d <= end1) + ? end_match_1 : end_match_2); + + for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) + { + regstart[mcnt] = best_regstart[mcnt]; + regend[mcnt] = best_regend[mcnt]; + } + } + } /* d != end_match_2 */ + + succeed_label: + DEBUG_PRINT1 ("Accepting match.\n"); + + /* If caller wants register contents data back, do it. */ + if (regs && !bufp->no_sub) + { + /* Have the register data arrays been allocated? */ + if (bufp->regs_allocated == REGS_UNALLOCATED) + { /* No. So allocate them with malloc. We need one + extra element beyond `num_regs' for the `-1' marker + GNU code uses. */ + regs->num_regs = MAX (RE_NREGS, num_regs + 1); + regs->start = TALLOC (regs->num_regs, regoff_t); + regs->end = TALLOC (regs->num_regs, regoff_t); + if (regs->start == NULL || regs->end == NULL) + { + FREE_VARIABLES (); + return -2; + } + bufp->regs_allocated = REGS_REALLOCATE; + } + else if (bufp->regs_allocated == REGS_REALLOCATE) + { /* Yes. If we need more elements than were already + allocated, reallocate them. If we need fewer, just + leave it alone. */ + if (regs->num_regs < num_regs + 1) + { + regs->num_regs = num_regs + 1; + RETALLOC (regs->start, regs->num_regs, regoff_t); + RETALLOC (regs->end, regs->num_regs, regoff_t); + if (regs->start == NULL || regs->end == NULL) + { + FREE_VARIABLES (); + return -2; + } + } + } + else + { + /* These braces fend off a "empty body in an else-statement" + warning under GCC when assert expands to nothing. */ + assert (bufp->regs_allocated == REGS_FIXED); + } + + /* Convert the pointer data in `regstart' and `regend' to + indices. Register zero has to be set differently, + since we haven't kept track of any info for it. */ + if (regs->num_regs > 0) + { + regs->start[0] = pos; + regs->end[0] = (MATCHING_IN_FIRST_STRING + ? ((regoff_t) (d - string1)) + : ((regoff_t) (d - string2 + size1))); + } + + /* Go through the first `min (num_regs, regs->num_regs)' + registers, since that is all we initialized. */ + for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs); + mcnt++) + { + if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) + regs->start[mcnt] = regs->end[mcnt] = -1; + else + { + regs->start[mcnt] + = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); + regs->end[mcnt] + = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); + } + } + + /* If the regs structure we return has more elements than + were in the pattern, set the extra elements to -1. If + we (re)allocated the registers, this is the case, + because we always allocate enough to have at least one + -1 at the end. */ + for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++) + regs->start[mcnt] = regs->end[mcnt] = -1; + } /* regs && !bufp->no_sub */ + + DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", + nfailure_points_pushed, nfailure_points_popped, + nfailure_points_pushed - nfailure_points_popped); + DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); + + mcnt = d - pos - (MATCHING_IN_FIRST_STRING + ? string1 + : string2 - size1); + + DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); + + FREE_VARIABLES (); + return mcnt; + } + + /* Otherwise match next pattern command. */ + switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) + { + /* Ignore these. Used to ignore the n of succeed_n's which + currently have n == 0. */ + case no_op: + DEBUG_PRINT1 ("EXECUTING no_op.\n"); + break; + + case succeed: + DEBUG_PRINT1 ("EXECUTING succeed.\n"); + goto succeed_label; + + /* Match the next n pattern characters exactly. The following + byte in the pattern defines n, and the n bytes after that + are the characters to match. */ + case exactn: + mcnt = *p++; + DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); + + /* This is written out as an if-else so we don't waste time + testing `translate' inside the loop. */ + if (translate) + { + do + { + PREFETCH (); + if ((unsigned char) translate[(unsigned char) *d++] + != (unsigned char) *p++) + goto fail; + } + while (--mcnt); + } + else + { + do + { + PREFETCH (); + if (*d++ != (char) *p++) goto fail; + } + while (--mcnt); + } + SET_REGS_MATCHED (); + break; + + + /* Match any character except possibly a newline or a null. */ + case anychar: + DEBUG_PRINT1 ("EXECUTING anychar.\n"); + + PREFETCH (); + + if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') + || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000')) + goto fail; + + SET_REGS_MATCHED (); + DEBUG_PRINT2 (" Matched `%d'.\n", *d); + d++; + break; + + + case charset: + case charset_not: + { + register unsigned char c; + boolean not = (re_opcode_t) *(p - 1) == charset_not; + + DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); + + PREFETCH (); + c = TRANSLATE (*d); /* The character to match. */ + + /* Cast to `unsigned' instead of `unsigned char' in case the + bit list is a full 32 bytes long. */ + if (c < (unsigned) (*p * BYTEWIDTH) + && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) + not = !not; + + p += 1 + *p; + + if (!not) goto fail; + + SET_REGS_MATCHED (); + d++; + break; + } + + + /* The beginning of a group is represented by start_memory. + The arguments are the register number in the next byte, and the + number of groups inner to this one in the next. The text + matched within the group is recorded (in the internal + registers data structure) under the register number. */ + case start_memory: + DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); + + /* Find out if this group can match the empty string. */ + p1 = p; /* To send to group_match_null_string_p. */ + + if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) + REG_MATCH_NULL_STRING_P (reg_info[*p]) + = group_match_null_string_p (&p1, pend, reg_info); + + /* Save the position in the string where we were the last time + we were at this open-group operator in case the group is + operated upon by a repetition operator, e.g., with `(a*)*b' + against `ab'; then we want to ignore where we are now in + the string in case this attempt to match fails. */ + old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) + ? REG_UNSET (regstart[*p]) ? d : regstart[*p] + : regstart[*p]; + DEBUG_PRINT2 (" old_regstart: %d\n", + POINTER_TO_OFFSET (old_regstart[*p])); + + regstart[*p] = d; + DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); + + IS_ACTIVE (reg_info[*p]) = 1; + MATCHED_SOMETHING (reg_info[*p]) = 0; + + /* Clear this whenever we change the register activity status. */ + set_regs_matched_done = 0; + + /* This is the new highest active register. */ + highest_active_reg = *p; + + /* If nothing was active before, this is the new lowest active + register. */ + if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) + lowest_active_reg = *p; + + /* Move past the register number and inner group count. */ + p += 2; + just_past_start_mem = p; + + break; + + + /* The stop_memory opcode represents the end of a group. Its + arguments are the same as start_memory's: the register + number, and the number of inner groups. */ + case stop_memory: + DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); + + /* We need to save the string position the last time we were at + this close-group operator in case the group is operated + upon by a repetition operator, e.g., with `((a*)*(b*)*)*' + against `aba'; then we want to ignore where we are now in + the string in case this attempt to match fails. */ + old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) + ? REG_UNSET (regend[*p]) ? d : regend[*p] + : regend[*p]; + DEBUG_PRINT2 (" old_regend: %d\n", + POINTER_TO_OFFSET (old_regend[*p])); + + regend[*p] = d; + DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); + + /* This register isn't active anymore. */ + IS_ACTIVE (reg_info[*p]) = 0; + + /* Clear this whenever we change the register activity status. */ + set_regs_matched_done = 0; + + /* If this was the only register active, nothing is active + anymore. */ + if (lowest_active_reg == highest_active_reg) + { + lowest_active_reg = NO_LOWEST_ACTIVE_REG; + highest_active_reg = NO_HIGHEST_ACTIVE_REG; + } + else + { /* We must scan for the new highest active register, since + it isn't necessarily one less than now: consider + (a(b)c(d(e)f)g). When group 3 ends, after the f), the + new highest active register is 1. */ + unsigned char r = *p - 1; + while (r > 0 && !IS_ACTIVE (reg_info[r])) + r--; + + /* If we end up at register zero, that means that we saved + the registers as the result of an `on_failure_jump', not + a `start_memory', and we jumped to past the innermost + `stop_memory'. For example, in ((.)*) we save + registers 1 and 2 as a result of the *, but when we pop + back to the second ), we are at the stop_memory 1. + Thus, nothing is active. */ + if (r == 0) + { + lowest_active_reg = NO_LOWEST_ACTIVE_REG; + highest_active_reg = NO_HIGHEST_ACTIVE_REG; + } + else + highest_active_reg = r; + } + + /* If just failed to match something this time around with a + group that's operated on by a repetition operator, try to + force exit from the ``loop'', and restore the register + information for this group that we had before trying this + last match. */ + if ((!MATCHED_SOMETHING (reg_info[*p]) + || just_past_start_mem == p - 1) + && (p + 2) < pend) + { + boolean is_a_jump_n = false; + + p1 = p + 2; + mcnt = 0; + switch ((re_opcode_t) *p1++) + { + case jump_n: + is_a_jump_n = true; + case pop_failure_jump: + case maybe_pop_jump: + case jump: + case dummy_failure_jump: + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + if (is_a_jump_n) + p1 += 2; + break; + + default: + /* do nothing */ ; + } + p1 += mcnt; + + /* If the next operation is a jump backwards in the pattern + to an on_failure_jump right before the start_memory + corresponding to this stop_memory, exit from the loop + by forcing a failure after pushing on the stack the + on_failure_jump's jump in the pattern, and d. */ + if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump + && (re_opcode_t) p1[3] == start_memory && p1[4] == *p) + { + /* If this group ever matched anything, then restore + what its registers were before trying this last + failed match, e.g., with `(a*)*b' against `ab' for + regstart[1], and, e.g., with `((a*)*(b*)*)*' + against `aba' for regend[3]. + + Also restore the registers for inner groups for, + e.g., `((a*)(b*))*' against `aba' (register 3 would + otherwise get trashed). */ + + if (EVER_MATCHED_SOMETHING (reg_info[*p])) + { + unsigned r; + + EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; + + /* Restore this and inner groups' (if any) registers. */ + for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1); + r++) + { + regstart[r] = old_regstart[r]; + + /* xx why this test? */ + if (old_regend[r] >= regstart[r]) + regend[r] = old_regend[r]; + } + } + p1++; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + PUSH_FAILURE_POINT (p1 + mcnt, d, -2); + + goto fail; + } + } + + /* Move past the register number and the inner group count. */ + p += 2; + break; + + + /* \ has been turned into a `duplicate' command which is + followed by the numeric value of as the register number. */ + case duplicate: + { + register const char *d2, *dend2; + int regno = *p++; /* Get which register to match against. */ + DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); + + /* Can't back reference a group which we've never matched. */ + if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) + goto fail; + + /* Where in input to try to start matching. */ + d2 = regstart[regno]; + + /* Where to stop matching; if both the place to start and + the place to stop matching are in the same string, then + set to the place to stop, otherwise, for now have to use + the end of the first string. */ + + dend2 = ((FIRST_STRING_P (regstart[regno]) + == FIRST_STRING_P (regend[regno])) + ? regend[regno] : end_match_1); + for (;;) + { + /* If necessary, advance to next segment in register + contents. */ + while (d2 == dend2) + { + if (dend2 == end_match_2) break; + if (dend2 == regend[regno]) break; + + /* End of string1 => advance to string2. */ + d2 = string2; + dend2 = regend[regno]; + } + /* At end of register contents => success */ + if (d2 == dend2) break; + + /* If necessary, advance to next segment in data. */ + PREFETCH (); + + /* How many characters left in this segment to match. */ + mcnt = dend - d; + + /* Want how many consecutive characters we can match in + one shot, so, if necessary, adjust the count. */ + if (mcnt > dend2 - d2) + mcnt = dend2 - d2; + + /* Compare that many; failure if mismatch, else move + past them. */ + if (translate + ? bcmp_translate (d, d2, mcnt, translate) + : bcmp (d, d2, mcnt)) + goto fail; + d += mcnt, d2 += mcnt; + + /* Do this because we've match some characters. */ + SET_REGS_MATCHED (); + } + } + break; + + + /* begline matches the empty string at the beginning of the string + (unless `not_bol' is set in `bufp'), and, if + `newline_anchor' is set, after newlines. */ + case begline: + DEBUG_PRINT1 ("EXECUTING begline.\n"); + + if (AT_STRINGS_BEG (d)) + { + if (!bufp->not_bol) break; + } + else if (d[-1] == '\n' && bufp->newline_anchor) + { + break; + } + /* In all other cases, we fail. */ + goto fail; + + + /* endline is the dual of begline. */ + case endline: + DEBUG_PRINT1 ("EXECUTING endline.\n"); + + if (AT_STRINGS_END (d)) + { + if (!bufp->not_eol) break; + } + + /* We have to ``prefetch'' the next character. */ + else if ((d == end1 ? *string2 : *d) == '\n' + && bufp->newline_anchor) + { + break; + } + goto fail; + + + /* Match at the very beginning of the data. */ + case begbuf: + DEBUG_PRINT1 ("EXECUTING begbuf.\n"); + if (AT_STRINGS_BEG (d)) + break; + goto fail; + + + /* Match at the very end of the data. */ + case endbuf: + DEBUG_PRINT1 ("EXECUTING endbuf.\n"); + if (AT_STRINGS_END (d)) + break; + goto fail; + + + /* on_failure_keep_string_jump is used to optimize `.*\n'. It + pushes NULL as the value for the string on the stack. Then + `pop_failure_point' will keep the current value for the + string, instead of restoring it. To see why, consider + matching `foo\nbar' against `.*\n'. The .* matches the foo; + then the . fails against the \n. But the next thing we want + to do is match the \n against the \n; if we restored the + string value, we would be back at the foo. + + Because this is used only in specific cases, we don't need to + check all the things that `on_failure_jump' does, to make + sure the right things get saved on the stack. Hence we don't + share its code. The only reason to push anything on the + stack at all is that otherwise we would have to change + `anychar's code to do something besides goto fail in this + case; that seems worse than this. */ + case on_failure_keep_string_jump: + DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); + + EXTRACT_NUMBER_AND_INCR (mcnt, p); +#ifdef _LIBC + DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt); +#else + DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); +#endif + + PUSH_FAILURE_POINT (p + mcnt, NULL, -2); + break; + + + /* Uses of on_failure_jump: + + Each alternative starts with an on_failure_jump that points + to the beginning of the next alternative. Each alternative + except the last ends with a jump that in effect jumps past + the rest of the alternatives. (They really jump to the + ending jump of the following alternative, because tensioning + these jumps is a hassle.) + + Repeats start with an on_failure_jump that points past both + the repetition text and either the following jump or + pop_failure_jump back to this on_failure_jump. */ + case on_failure_jump: + on_failure: + DEBUG_PRINT1 ("EXECUTING on_failure_jump"); + + EXTRACT_NUMBER_AND_INCR (mcnt, p); +#ifdef _LIBC + DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt); +#else + DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); +#endif + + /* If this on_failure_jump comes right before a group (i.e., + the original * applied to a group), save the information + for that group and all inner ones, so that if we fail back + to this point, the group's information will be correct. + For example, in \(a*\)*\1, we need the preceding group, + and in \(zz\(a*\)b*\)\2, we need the inner group. */ + + /* We can't use `p' to check ahead because we push + a failure point to `p + mcnt' after we do this. */ + p1 = p; + + /* We need to skip no_op's before we look for the + start_memory in case this on_failure_jump is happening as + the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 + against aba. */ + while (p1 < pend && (re_opcode_t) *p1 == no_op) + p1++; + + if (p1 < pend && (re_opcode_t) *p1 == start_memory) + { + /* We have a new highest active register now. This will + get reset at the start_memory we are about to get to, + but we will have saved all the registers relevant to + this repetition op, as described above. */ + highest_active_reg = *(p1 + 1) + *(p1 + 2); + if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) + lowest_active_reg = *(p1 + 1); + } + + DEBUG_PRINT1 (":\n"); + PUSH_FAILURE_POINT (p + mcnt, d, -2); + break; + + + /* A smart repeat ends with `maybe_pop_jump'. + We change it to either `pop_failure_jump' or `jump'. */ + case maybe_pop_jump: + EXTRACT_NUMBER_AND_INCR (mcnt, p); + DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); + { + register unsigned char *p2 = p; + + /* Compare the beginning of the repeat with what in the + pattern follows its end. If we can establish that there + is nothing that they would both match, i.e., that we + would have to backtrack because of (as in, e.g., `a*a') + then we can change to pop_failure_jump, because we'll + never have to backtrack. + + This is not true in the case of alternatives: in + `(a|ab)*' we do need to backtrack to the `ab' alternative + (e.g., if the string was `ab'). But instead of trying to + detect that here, the alternative has put on a dummy + failure point which is what we will end up popping. */ + + /* Skip over open/close-group commands. + If what follows this loop is a ...+ construct, + look at what begins its body, since we will have to + match at least one of that. */ + while (1) + { + if (p2 + 2 < pend + && ((re_opcode_t) *p2 == stop_memory + || (re_opcode_t) *p2 == start_memory)) + p2 += 3; + else if (p2 + 6 < pend + && (re_opcode_t) *p2 == dummy_failure_jump) + p2 += 6; + else + break; + } + + p1 = p + mcnt; + /* p1[0] ... p1[2] are the `on_failure_jump' corresponding + to the `maybe_finalize_jump' of this case. Examine what + follows. */ + + /* If we're at the end of the pattern, we can change. */ + if (p2 == pend) + { + /* Consider what happens when matching ":\(.*\)" + against ":/". I don't really understand this code + yet. */ + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT1 + (" End of pattern: change to `pop_failure_jump'.\n"); + } + + else if ((re_opcode_t) *p2 == exactn + || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) + { + register unsigned char c + = *p2 == (unsigned char) endline ? '\n' : p2[2]; + + if ((re_opcode_t) p1[3] == exactn && p1[5] != c) + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", + c, p1[5]); + } + + else if ((re_opcode_t) p1[3] == charset + || (re_opcode_t) p1[3] == charset_not) + { + int not = (re_opcode_t) p1[3] == charset_not; + + if (c < (unsigned char) (p1[4] * BYTEWIDTH) + && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) + not = !not; + + /* `not' is equal to 1 if c would match, which means + that we can't change to pop_failure_jump. */ + if (!not) + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); + } + } + } + else if ((re_opcode_t) *p2 == charset) + { +#ifdef DEBUG + register unsigned char c + = *p2 == (unsigned char) endline ? '\n' : p2[2]; +#endif + +#if 0 + if ((re_opcode_t) p1[3] == exactn + && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] + && (p2[2 + p1[5] / BYTEWIDTH] + & (1 << (p1[5] % BYTEWIDTH))))) +#else + if ((re_opcode_t) p1[3] == exactn + && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4] + && (p2[2 + p1[4] / BYTEWIDTH] + & (1 << (p1[4] % BYTEWIDTH))))) +#endif + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", + c, p1[5]); + } + + else if ((re_opcode_t) p1[3] == charset_not) + { + int idx; + /* We win if the charset_not inside the loop + lists every character listed in the charset after. */ + for (idx = 0; idx < (int) p2[1]; idx++) + if (! (p2[2 + idx] == 0 + || (idx < (int) p1[4] + && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) + break; + + if (idx == p2[1]) + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); + } + } + else if ((re_opcode_t) p1[3] == charset) + { + int idx; + /* We win if the charset inside the loop + has no overlap with the one after the loop. */ + for (idx = 0; + idx < (int) p2[1] && idx < (int) p1[4]; + idx++) + if ((p2[2 + idx] & p1[5 + idx]) != 0) + break; + + if (idx == p2[1] || idx == p1[4]) + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); + } + } + } + } + p -= 2; /* Point at relative address again. */ + if ((re_opcode_t) p[-1] != pop_failure_jump) + { + p[-1] = (unsigned char) jump; + DEBUG_PRINT1 (" Match => jump.\n"); + goto unconditional_jump; + } + /* Note fall through. */ + + + /* The end of a simple repeat has a pop_failure_jump back to + its matching on_failure_jump, where the latter will push a + failure point. The pop_failure_jump takes off failure + points put on by this pop_failure_jump's matching + on_failure_jump; we got through the pattern to here from the + matching on_failure_jump, so didn't fail. */ + case pop_failure_jump: + { + /* We need to pass separate storage for the lowest and + highest registers, even though we don't care about the + actual values. Otherwise, we will restore only one + register from the stack, since lowest will == highest in + `pop_failure_point'. */ + active_reg_t dummy_low_reg, dummy_high_reg; + unsigned char *pdummy; + const char *sdummy; + + DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); + POP_FAILURE_POINT (sdummy, pdummy, + dummy_low_reg, dummy_high_reg, + reg_dummy, reg_dummy, reg_info_dummy); + } + /* Note fall through. */ + + unconditional_jump: +#ifdef _LIBC + DEBUG_PRINT2 ("\n%p: ", p); +#else + DEBUG_PRINT2 ("\n0x%x: ", p); +#endif + /* Note fall through. */ + + /* Unconditionally jump (without popping any failure points). */ + case jump: + EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ + DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); + p += mcnt; /* Do the jump. */ +#ifdef _LIBC + DEBUG_PRINT2 ("(to %p).\n", p); +#else + DEBUG_PRINT2 ("(to 0x%x).\n", p); +#endif + break; + + + /* We need this opcode so we can detect where alternatives end + in `group_match_null_string_p' et al. */ + case jump_past_alt: + DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); + goto unconditional_jump; + + + /* Normally, the on_failure_jump pushes a failure point, which + then gets popped at pop_failure_jump. We will end up at + pop_failure_jump, also, and with a pattern of, say, `a+', we + are skipping over the on_failure_jump, so we have to push + something meaningless for pop_failure_jump to pop. */ + case dummy_failure_jump: + DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); + /* It doesn't matter what we push for the string here. What + the code at `fail' tests is the value for the pattern. */ + PUSH_FAILURE_POINT (0, 0, -2); + goto unconditional_jump; + + + /* At the end of an alternative, we need to push a dummy failure + point in case we are followed by a `pop_failure_jump', because + we don't want the failure point for the alternative to be + popped. For example, matching `(a|ab)*' against `aab' + requires that we match the `ab' alternative. */ + case push_dummy_failure: + DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); + /* See comments just above at `dummy_failure_jump' about the + two zeroes. */ + PUSH_FAILURE_POINT (0, 0, -2); + break; + + /* Have to succeed matching what follows at least n times. + After that, handle like `on_failure_jump'. */ + case succeed_n: + EXTRACT_NUMBER (mcnt, p + 2); + DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); + + assert (mcnt >= 0); + /* Originally, this is how many times we HAVE to succeed. */ + if (mcnt > 0) + { + mcnt--; + p += 2; + STORE_NUMBER_AND_INCR (p, mcnt); +#ifdef _LIBC + DEBUG_PRINT3 (" Setting %p to %d.\n", p - 2, mcnt); +#else + DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - 2, mcnt); +#endif + } + else if (mcnt == 0) + { +#ifdef _LIBC + DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n", p+2); +#else + DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2); +#endif + p[2] = (unsigned char) no_op; + p[3] = (unsigned char) no_op; + goto on_failure; + } + break; + + case jump_n: + EXTRACT_NUMBER (mcnt, p + 2); + DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); + + /* Originally, this is how many times we CAN jump. */ + if (mcnt) + { + mcnt--; + STORE_NUMBER (p + 2, mcnt); +#ifdef _LIBC + DEBUG_PRINT3 (" Setting %p to %d.\n", p + 2, mcnt); +#else + DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + 2, mcnt); +#endif + goto unconditional_jump; + } + /* If don't have to jump any more, skip over the rest of command. */ + else + p += 4; + break; + + case set_number_at: + { + DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); + + EXTRACT_NUMBER_AND_INCR (mcnt, p); + p1 = p + mcnt; + EXTRACT_NUMBER_AND_INCR (mcnt, p); +#ifdef _LIBC + DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt); +#else + DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); +#endif + STORE_NUMBER (p1, mcnt); + break; + } + +#if 0 + /* The DEC Alpha C compiler 3.x generates incorrect code for the + test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of + AT_WORD_BOUNDARY, so this code is disabled. Expanding the + macro and introducing temporary variables works around the bug. */ + + case wordbound: + DEBUG_PRINT1 ("EXECUTING wordbound.\n"); + if (AT_WORD_BOUNDARY (d)) + break; + goto fail; + + case notwordbound: + DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); + if (AT_WORD_BOUNDARY (d)) + goto fail; + break; +#else + case wordbound: + { + boolean prevchar, thischar; + + DEBUG_PRINT1 ("EXECUTING wordbound.\n"); + if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) + break; + + prevchar = WORDCHAR_P (d - 1); + thischar = WORDCHAR_P (d); + if (prevchar != thischar) + break; + goto fail; + } + + case notwordbound: + { + boolean prevchar, thischar; + + DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); + if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) + goto fail; + + prevchar = WORDCHAR_P (d - 1); + thischar = WORDCHAR_P (d); + if (prevchar != thischar) + goto fail; + break; + } +#endif + + case wordbeg: + DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); + if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) + break; + goto fail; + + case wordend: + DEBUG_PRINT1 ("EXECUTING wordend.\n"); + if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) + && (!WORDCHAR_P (d) || AT_STRINGS_END (d))) + break; + goto fail; + +#ifdef emacs + case before_dot: + DEBUG_PRINT1 ("EXECUTING before_dot.\n"); + if (PTR_CHAR_POS ((unsigned char *) d) >= point) + goto fail; + break; + + case at_dot: + DEBUG_PRINT1 ("EXECUTING at_dot.\n"); + if (PTR_CHAR_POS ((unsigned char *) d) != point) + goto fail; + break; + + case after_dot: + DEBUG_PRINT1 ("EXECUTING after_dot.\n"); + if (PTR_CHAR_POS ((unsigned char *) d) <= point) + goto fail; + break; + + case syntaxspec: + DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); + mcnt = *p++; + goto matchsyntax; + + case wordchar: + DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); + mcnt = (int) Sword; + matchsyntax: + PREFETCH (); + /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ + d++; + if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt) + goto fail; + SET_REGS_MATCHED (); + break; + + case notsyntaxspec: + DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); + mcnt = *p++; + goto matchnotsyntax; + + case notwordchar: + DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); + mcnt = (int) Sword; + matchnotsyntax: + PREFETCH (); + /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ + d++; + if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt) + goto fail; + SET_REGS_MATCHED (); + break; + +#else /* not emacs */ + case wordchar: + DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); + PREFETCH (); + if (!WORDCHAR_P (d)) + goto fail; + SET_REGS_MATCHED (); + d++; + break; + + case notwordchar: + DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); + PREFETCH (); + if (WORDCHAR_P (d)) + goto fail; + SET_REGS_MATCHED (); + d++; + break; +#endif /* not emacs */ + + default: + abort (); + } + continue; /* Successfully executed one pattern command; keep going. */ + + + /* We goto here if a matching operation fails. */ + fail: + if (!FAIL_STACK_EMPTY ()) + { /* A restart point is known. Restore to that state. */ + DEBUG_PRINT1 ("\nFAIL:\n"); + POP_FAILURE_POINT (d, p, + lowest_active_reg, highest_active_reg, + regstart, regend, reg_info); + + /* If this failure point is a dummy, try the next one. */ + if (!p) + goto fail; + + /* If we failed to the end of the pattern, don't examine *p. */ + assert (p <= pend); + if (p < pend) + { + boolean is_a_jump_n = false; + + /* If failed to a backwards jump that's part of a repetition + loop, need to pop this failure point and use the next one. */ + switch ((re_opcode_t) *p) + { + case jump_n: + is_a_jump_n = true; + case maybe_pop_jump: + case pop_failure_jump: + case jump: + p1 = p + 1; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + p1 += mcnt; + + if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) + || (!is_a_jump_n + && (re_opcode_t) *p1 == on_failure_jump)) + goto fail; + break; + default: + /* do nothing */ ; + } + } + + if (d >= string1 && d <= end1) + dend = end_match_1; + } + else + break; /* Matching at this starting point really fails. */ + } /* for (;;) */ + + if (best_regs_set) + goto restore_best_regs; + + FREE_VARIABLES (); + + return -1; /* Failure to match. */ +} /* re_match_2 */ + +/* Subroutine definitions for re_match_2. */ + + +/* We are passed P pointing to a register number after a start_memory. + + Return true if the pattern up to the corresponding stop_memory can + match the empty string, and false otherwise. + + If we find the matching stop_memory, sets P to point to one past its number. + Otherwise, sets P to an undefined byte less than or equal to END. + + We don't handle duplicates properly (yet). */ + +static boolean +group_match_null_string_p (p, end, reg_info) + unsigned char **p, *end; + register_info_type *reg_info; +{ + int mcnt; + /* Point to after the args to the start_memory. */ + unsigned char *p1 = *p + 2; + + while (p1 < end) + { + /* Skip over opcodes that can match nothing, and return true or + false, as appropriate, when we get to one that can't, or to the + matching stop_memory. */ + + switch ((re_opcode_t) *p1) + { + /* Could be either a loop or a series of alternatives. */ + case on_failure_jump: + p1++; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + + /* If the next operation is not a jump backwards in the + pattern. */ + + if (mcnt >= 0) + { + /* Go through the on_failure_jumps of the alternatives, + seeing if any of the alternatives cannot match nothing. + The last alternative starts with only a jump, + whereas the rest start with on_failure_jump and end + with a jump, e.g., here is the pattern for `a|b|c': + + /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 + /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 + /exactn/1/c + + So, we have to first go through the first (n-1) + alternatives and then deal with the last one separately. */ + + + /* Deal with the first (n-1) alternatives, which start + with an on_failure_jump (see above) that jumps to right + past a jump_past_alt. */ + + while ((re_opcode_t) p1[mcnt-3] == jump_past_alt) + { + /* `mcnt' holds how many bytes long the alternative + is, including the ending `jump_past_alt' and + its number. */ + + if (!alt_match_null_string_p (p1, p1 + mcnt - 3, + reg_info)) + return false; + + /* Move to right after this alternative, including the + jump_past_alt. */ + p1 += mcnt; + + /* Break if it's the beginning of an n-th alternative + that doesn't begin with an on_failure_jump. */ + if ((re_opcode_t) *p1 != on_failure_jump) + break; + + /* Still have to check that it's not an n-th + alternative that starts with an on_failure_jump. */ + p1++; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + if ((re_opcode_t) p1[mcnt-3] != jump_past_alt) + { + /* Get to the beginning of the n-th alternative. */ + p1 -= 3; + break; + } + } + + /* Deal with the last alternative: go back and get number + of the `jump_past_alt' just before it. `mcnt' contains + the length of the alternative. */ + EXTRACT_NUMBER (mcnt, p1 - 2); + + if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info)) + return false; + + p1 += mcnt; /* Get past the n-th alternative. */ + } /* if mcnt > 0 */ + break; + + + case stop_memory: + assert (p1[1] == **p); + *p = p1 + 2; + return true; + + + default: + if (!common_op_match_null_string_p (&p1, end, reg_info)) + return false; + } + } /* while p1 < end */ + + return false; +} /* group_match_null_string_p */ + + +/* Similar to group_match_null_string_p, but doesn't deal with alternatives: + It expects P to be the first byte of a single alternative and END one + byte past the last. The alternative can contain groups. */ + +static boolean +alt_match_null_string_p (p, end, reg_info) + unsigned char *p, *end; + register_info_type *reg_info; +{ + int mcnt; + unsigned char *p1 = p; + + while (p1 < end) + { + /* Skip over opcodes that can match nothing, and break when we get + to one that can't. */ + + switch ((re_opcode_t) *p1) + { + /* It's a loop. */ + case on_failure_jump: + p1++; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + p1 += mcnt; + break; + + default: + if (!common_op_match_null_string_p (&p1, end, reg_info)) + return false; + } + } /* while p1 < end */ + + return true; +} /* alt_match_null_string_p */ + + +/* Deals with the ops common to group_match_null_string_p and + alt_match_null_string_p. + + Sets P to one after the op and its arguments, if any. */ + +static boolean +common_op_match_null_string_p (p, end, reg_info) + unsigned char **p, *end; + register_info_type *reg_info; +{ + int mcnt; + boolean ret; + int reg_no; + unsigned char *p1 = *p; + + switch ((re_opcode_t) *p1++) + { + case no_op: + case begline: + case endline: + case begbuf: + case endbuf: + case wordbeg: + case wordend: + case wordbound: + case notwordbound: +#ifdef emacs + case before_dot: + case at_dot: + case after_dot: +#endif + break; + + case start_memory: + reg_no = *p1; + assert (reg_no > 0 && reg_no <= MAX_REGNUM); + ret = group_match_null_string_p (&p1, end, reg_info); + + /* Have to set this here in case we're checking a group which + contains a group and a back reference to it. */ + + if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) + REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; + + if (!ret) + return false; + break; + + /* If this is an optimized succeed_n for zero times, make the jump. */ + case jump: + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + if (mcnt >= 0) + p1 += mcnt; + else + return false; + break; + + case succeed_n: + /* Get to the number of times to succeed. */ + p1 += 2; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + + if (mcnt == 0) + { + p1 -= 4; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + p1 += mcnt; + } + else + return false; + break; + + case duplicate: + if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) + return false; + break; + + case set_number_at: + p1 += 4; + + default: + /* All other opcodes mean we cannot match the empty string. */ + return false; + } + + *p = p1; + return true; +} /* common_op_match_null_string_p */ + + +/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN + bytes; nonzero otherwise. */ + +static int +bcmp_translate (s1, s2, len, translate) + const char *s1, *s2; + register int len; + RE_TRANSLATE_TYPE translate; +{ + register const unsigned char *p1 = (const unsigned char *) s1; + register const unsigned char *p2 = (const unsigned char *) s2; + while (len) + { + if (translate[*p1++] != translate[*p2++]) return 1; + len--; + } + return 0; +} + +/* Entry points for GNU code. */ + +/* re_compile_pattern is the GNU regular expression compiler: it + compiles PATTERN (of length SIZE) and puts the result in BUFP. + Returns 0 if the pattern was valid, otherwise an error string. + + Assumes the `allocated' (and perhaps `buffer') and `translate' fields + are set in BUFP on entry. + + We call regex_compile to do the actual compilation. */ + +const char * +re_compile_pattern (pattern, length, bufp) + const char *pattern; + size_t length; + struct re_pattern_buffer *bufp; +{ + reg_errcode_t ret; + + /* GNU code is written to assume at least RE_NREGS registers will be set + (and at least one extra will be -1). */ + bufp->regs_allocated = REGS_UNALLOCATED; + + /* And GNU code determines whether or not to get register information + by passing null for the REGS argument to re_match, etc., not by + setting no_sub. */ + bufp->no_sub = 0; + + /* Match anchors at newline. */ + bufp->newline_anchor = 1; + + ret = regex_compile (pattern, length, re_syntax_options, bufp); + + if (!ret) + return NULL; + return gettext (re_error_msgid[(int) ret]); +} + +/* Entry points compatible with 4.2 BSD regex library. We don't define + them unless specifically requested. */ + +#if defined (_REGEX_RE_COMP) || defined (_LIBC) + +/* BSD has one and only one pattern buffer. */ +static struct re_pattern_buffer re_comp_buf; + +char * +#ifdef _LIBC +/* Make these definitions weak in libc, so POSIX programs can redefine + these names if they don't use our functions, and still use + regcomp/regexec below without link errors. */ +weak_function +#endif +re_comp (s) + const char *s; +{ + reg_errcode_t ret; + + if (!s) + { + if (!re_comp_buf.buffer) + return gettext ("No previous regular expression"); + return 0; + } + + if (!re_comp_buf.buffer) + { + re_comp_buf.buffer = (unsigned char *) malloc (200); + if (re_comp_buf.buffer == NULL) + return gettext (re_error_msgid[(int) REG_ESPACE]); + re_comp_buf.allocated = 200; + + re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); + if (re_comp_buf.fastmap == NULL) + return gettext (re_error_msgid[(int) REG_ESPACE]); + } + + /* Since `re_exec' always passes NULL for the `regs' argument, we + don't need to initialize the pattern buffer fields which affect it. */ + + /* Match anchors at newlines. */ + re_comp_buf.newline_anchor = 1; + + ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); + + if (!ret) + return NULL; + + /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ + return (char *) gettext (re_error_msgid[(int) ret]); +} + + +int +#ifdef _LIBC +weak_function +#endif +re_exec (s) + const char *s; +{ + const int len = strlen (s); + return + 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0); +} + +#endif /* _REGEX_RE_COMP */ + +/* POSIX.2 functions. Don't define these for Emacs. */ + +#ifndef emacs + +/* regcomp takes a regular expression as a string and compiles it. + + PREG is a regex_t *. We do not expect any fields to be initialized, + since POSIX says we shouldn't. Thus, we set + + `buffer' to the compiled pattern; + `used' to the length of the compiled pattern; + `syntax' to RE_SYNTAX_POSIX_EXTENDED if the + REG_EXTENDED bit in CFLAGS is set; otherwise, to + RE_SYNTAX_POSIX_BASIC; + `newline_anchor' to REG_NEWLINE being set in CFLAGS; + `fastmap' and `fastmap_accurate' to zero; + `re_nsub' to the number of subexpressions in PATTERN. + + PATTERN is the address of the pattern string. + + CFLAGS is a series of bits which affect compilation. + + If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we + use POSIX basic syntax. + + If REG_NEWLINE is set, then . and [^...] don't match newline. + Also, regexec will try a match beginning after every newline. + + If REG_ICASE is set, then we considers upper- and lowercase + versions of letters to be equivalent when matching. + + If REG_NOSUB is set, then when PREG is passed to regexec, that + routine will report only success or failure, and nothing about the + registers. + + It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for + the return codes and their meanings.) */ + +int +regcomp (preg, pattern, cflags) + regex_t *preg; + const char *pattern; + int cflags; +{ + reg_errcode_t ret; + reg_syntax_t syntax + = (cflags & REG_EXTENDED) ? + RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; + + /* regex_compile will allocate the space for the compiled pattern. */ + preg->buffer = 0; + preg->allocated = 0; + preg->used = 0; + + /* Don't bother to use a fastmap when searching. This simplifies the + REG_NEWLINE case: if we used a fastmap, we'd have to put all the + characters after newlines into the fastmap. This way, we just try + every character. */ + preg->fastmap = 0; + + if (cflags & REG_ICASE) + { + unsigned i; + + preg->translate + = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE + * sizeof (*(RE_TRANSLATE_TYPE)0)); + if (preg->translate == NULL) + return (int) REG_ESPACE; + + /* Map uppercase characters to corresponding lowercase ones. */ + for (i = 0; i < CHAR_SET_SIZE; i++) + preg->translate[i] = ISUPPER (i) ? tolower (i) : i; + } + else + preg->translate = NULL; + + /* If REG_NEWLINE is set, newlines are treated differently. */ + if (cflags & REG_NEWLINE) + { /* REG_NEWLINE implies neither . nor [^...] match newline. */ + syntax &= ~RE_DOT_NEWLINE; + syntax |= RE_HAT_LISTS_NOT_NEWLINE; + /* It also changes the matching behavior. */ + preg->newline_anchor = 1; + } + else + preg->newline_anchor = 0; + + preg->no_sub = !!(cflags & REG_NOSUB); + + /* POSIX says a null character in the pattern terminates it, so we + can use strlen here in compiling the pattern. */ + ret = regex_compile (pattern, strlen (pattern), syntax, preg); + + /* POSIX doesn't distinguish between an unmatched open-group and an + unmatched close-group: both are REG_EPAREN. */ + if (ret == REG_ERPAREN) ret = REG_EPAREN; + + return (int) ret; +} + + +/* regexec searches for a given pattern, specified by PREG, in the + string STRING. + + If NMATCH is zero or REG_NOSUB was set in the cflags argument to + `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at + least NMATCH elements, and we set them to the offsets of the + corresponding matched substrings. + + EFLAGS specifies `execution flags' which affect matching: if + REG_NOTBOL is set, then ^ does not match at the beginning of the + string; if REG_NOTEOL is set, then $ does not match at the end. + + We return 0 if we find a match and REG_NOMATCH if not. */ + +int +regexec (preg, string, nmatch, pmatch, eflags) + const regex_t *preg; + const char *string; + size_t nmatch; + regmatch_t pmatch[]; + int eflags; +{ + int ret; + struct re_registers regs; + regex_t private_preg; + int len = strlen (string); + boolean want_reg_info = !preg->no_sub && nmatch > 0; + + private_preg = *preg; + + private_preg.not_bol = !!(eflags & REG_NOTBOL); + private_preg.not_eol = !!(eflags & REG_NOTEOL); + + /* The user has told us exactly how many registers to return + information about, via `nmatch'. We have to pass that on to the + matching routines. */ + private_preg.regs_allocated = REGS_FIXED; + + if (want_reg_info) + { + regs.num_regs = nmatch; + regs.start = TALLOC (nmatch, regoff_t); + regs.end = TALLOC (nmatch, regoff_t); + if (regs.start == NULL || regs.end == NULL) + return (int) REG_NOMATCH; + } + + /* Perform the searching operation. */ + ret = re_search (&private_preg, string, len, + /* start: */ 0, /* range: */ len, + want_reg_info ? ®s : (struct re_registers *) 0); + + /* Copy the register information to the POSIX structure. */ + if (want_reg_info) + { + if (ret >= 0) + { + unsigned r; + + for (r = 0; r < nmatch; r++) + { + pmatch[r].rm_so = regs.start[r]; + pmatch[r].rm_eo = regs.end[r]; + } + } + + /* If we needed the temporary register info, free the space now. */ + free (regs.start); + free (regs.end); + } + + /* We want zero return to mean success, unlike `re_search'. */ + return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; +} + + +/* Returns a message corresponding to an error code, ERRCODE, returned + from either regcomp or regexec. We don't use PREG here. */ + +size_t +regerror (errcode, preg, errbuf, errbuf_size) + int errcode; + const regex_t *preg; + char *errbuf; + size_t errbuf_size; +{ + const char *msg; + size_t msg_size; + + if (errcode < 0 + || errcode >= (int) (sizeof (re_error_msgid) + / sizeof (re_error_msgid[0]))) + /* Only error codes returned by the rest of the code should be passed + to this routine. If we are given anything else, or if other regex + code generates an invalid error code, then the program has a bug. + Dump core so we can fix it. */ + abort (); + + msg = gettext (re_error_msgid[errcode]); + + msg_size = strlen (msg) + 1; /* Includes the null. */ + + if (errbuf_size != 0) + { + if (msg_size > errbuf_size) + { + strncpy (errbuf, msg, errbuf_size - 1); + errbuf[errbuf_size - 1] = 0; + } + else + strcpy (errbuf, msg); + } + + return msg_size; +} + + +/* Free dynamically allocated space used by PREG. */ + +void +regfree (preg) + regex_t *preg; +{ + if (preg->buffer != NULL) + free (preg->buffer); + preg->buffer = NULL; + + preg->allocated = 0; + preg->used = 0; + + if (preg->fastmap != NULL) + free (preg->fastmap); + preg->fastmap = NULL; + preg->fastmap_accurate = 0; + + if (preg->translate != NULL) + free (preg->translate); + preg->translate = NULL; +} + +#endif /* not emacs */ diff --git a/ghc/lib/misc/cbits/shutdownSocket.c b/ghc/lib/misc/cbits/shutdownSocket.c new file mode 100644 index 0000000..b378b2d --- /dev/null +++ b/ghc/lib/misc/cbits/shutdownSocket.c @@ -0,0 +1,43 @@ +#if 0 +% +% (c) The GRASP/AQUA Project, Glasgow University, 1996 +% +\subsection[shutdownSocket.lc]{Shut down part of full duplex connection} + +\begin{code} +#endif + +#define NON_POSIX_SOURCE +#include "rtsdefs.h" +#include "ghcSockets.h" + +StgInt +shutdownSocket(I_ sockfd, I_ how) +{ + StgInt rc; + + while ((rc = shutdown((int) sockfd, (int) how)) < 0) { + if (errno != EINTR) { + cvtErrno(); + switch (ghc_errno) { + default: + stdErrno(); + break; + case GHC_EBADF: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Not a valid write descriptor"; + break; + case GHC_ENOTCONN: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Socket not connected"; + break; + case GHC_ENOTSOCK: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Descriptor is not a socket"; + break; + } + return -1; + } + } + return rc; +} diff --git a/ghc/lib/misc/cbits/writeDescriptor.c b/ghc/lib/misc/cbits/writeDescriptor.c new file mode 100644 index 0000000..5273403 --- /dev/null +++ b/ghc/lib/misc/cbits/writeDescriptor.c @@ -0,0 +1,76 @@ +#if 0 +% +% (c) The GRASP/AQUA Project, Glasgow University, 1996 +% +\subsection[writeDescriptor.lc]{Stuff bytes down a descriptor} + +\begin{code} +#endif + +#define NON_POSIX_SOURCE +#include "rtsdefs.h" +#include "ghcSockets.h" + +StgInt +writeDescriptor(I_ fd, A_ buf, I_ nbytes) +{ + StgInt dumped; + + while ((dumped = write((int) fd, (char *) buf, (int) nbytes)) < 0) { + if (errno != EINTR) { + cvtErrno(); + switch (ghc_errno) { + default: + stdErrno(); + break; + case GHC_EBADF: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Not a valid write descriptor"; + break; + case GHC_EDQUOT: + ghc_errtype = ERR_RESOURCEEXHAUSTED; + ghc_errstr = "Disk quota exhausted"; + break; + case GHC_EFAULT: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Data not in writeable part of user address space"; + break; + case GHC_EFBIG: + ghc_errtype = ERR_RESOURCEEXHAUSTED; + ghc_errstr = "Maximum process or system file size exceeded"; + break; + case GHC_EINVAL: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Seek pointer associated with descriptor negative"; + break; + case GHC_EIO: + ghc_errtype = ERR_SYSTEMERROR; + ghc_errstr = "I/O error occurred while writing to file system"; + break; + case GHC_ENOSPC: + ghc_errtype = ERR_RESOURCEEXHAUSTED; + ghc_errstr = "No space left on device"; + break; + case GHC_ENXIO: + ghc_errtype = ERR_SYSTEMERROR; + ghc_errstr = "Hangup occurred"; + break; + case GHC_EPIPE: + ghc_errtype = ERR_SYSTEMERROR; + ghc_errstr = "Write to not read pipe/unconnected socket caught"; + break; + case GHC_ERANGE: + ghc_errtype = ERR_INVALIDARGUMENT; + ghc_errstr = "Too much or too little written to descriptor"; + break; + case GHC_EAGAIN: + case GHC_EWOULDBLOCK: + ghc_errtype = ERR_OTHERERROR; + ghc_errstr = "No data could be written immediately"; + break; + } + return -1; + } + } + return dumped; +} diff --git a/ghc/lib/misc/docs/libraries.lit b/ghc/lib/misc/docs/libraries.lit new file mode 100644 index 0000000..891d9b1 --- /dev/null +++ b/ghc/lib/misc/docs/libraries.lit @@ -0,0 +1,1075 @@ +%************************************************************************ +%* * +\section[syslibs]{System libraries} +\index{system libraries} +\index{libraries, system} +%* * +%************************************************************************ + +We intend to provide more and more ready-to-use Haskell code, so that +every program doesn't have to invent everything from scratch. + +If you provide a \tr{-syslib }\index{-syslib option} option, +then the interfaces for that library will come into scope (and may be +\tr{import}ed), and the code will be added in at link time. + +We supply a part of the HBC library (\tr{-syslib hbc}); as well as one +of our own (\tr{-syslib ghc}); one for an interface to POSIX routines +(\tr{-syslib posix}); and one of contributed stuff off the net, mostly +numerical (\tr{-syslib contrib}). + +If you have Haggis (our GUI X~toolkit for Haskell), it probably works +with a \tr{-syslib haggis} flag. + +%************************************************************************ +%* * +\subsection[GHC-library]{The GHC system library} +\index{library, GHC} +\index{GHC library} +%* * +%************************************************************************ + +We have started to put together a ``GHC system library.'' + +At the moment, the library is made of generally-useful bits of the +compiler itself. + +To use this library, just give a \tr{-syslib ghc}\index{-syslib ghc option} +option to GHC, both for compiling and linking. + +%************************************************************************ +%* * +\subsubsection[Bag]{The @Bag@ type} +\index{Bag module (GHC syslib)} +%* * +%************************************************************************ + +A {\em bag} is an unordered collection of elements which may contain +duplicates. To use, \tr{import Bag}. + +\begin{verbatim} +emptyBag :: Bag elt +unitBag :: elt -> Bag elt + +unionBags :: Bag elt -> Bag elt -> Bag elt +unionManyBags :: [Bag elt] -> Bag elt +consBag :: elt -> Bag elt -> Bag elt +snocBag :: Bag elt -> elt -> Bag elt + +concatBag :: Bag (Bag a) -> Bag a +mapBag :: (a -> b) -> Bag a -> Bag b + +foldBag :: (r -> r -> r) -- Replace TwoBags with this; should be associative + -> (a -> r) -- Replace UnitBag with this + -> r -- Replace EmptyBag with this + -> Bag a + -> r + +elemBag :: Eq elt => elt -> Bag elt -> Bool +isEmptyBag :: Bag elt -> Bool +filterBag :: (elt -> Bool) -> Bag elt -> Bag elt +partitionBag :: (elt -> Bool) -> Bag elt-> (Bag elt, Bag elt) + -- returns the elements that do/don't satisfy the predicate + +listToBag :: [elt] -> Bag elt +bagToList :: Bag elt -> [elt] +\end{verbatim} + +%************************************************************************ +%* * +\subsubsection[BitSet]{The @BitSet@ type} +\index{BitSet module (GHC syslib)} +%* * +%************************************************************************ + +Bit sets are a fast implementation of sets of integers ranging from 0 +to one less than the number of bits in a machine word (typically 31). +If any element exceeds the maximum value for a particular machine +architecture, the results of these operations are undefined. You have +been warned. [``If you put any safety checks in this code, I will have +to kill you.'' --JSM] + +\begin{verbatim} +mkBS :: [Int] -> BitSet +listBS :: BitSet -> [Int] +emptyBS :: BitSet +unitBS :: Int -> BitSet + +unionBS :: BitSet -> BitSet -> BitSet +minusBS :: BitSet -> BitSet -> BitSet +elementBS :: Int -> BitSet -> Bool +intersectBS :: BitSet -> BitSet -> BitSet + +isEmptyBS :: BitSet -> Bool +\end{verbatim} + +%************************************************************************ +%* * +\subsubsection[FiniteMap]{The @FiniteMap@ type} +\index{FiniteMap module (GHC syslib)} +%* * +%************************************************************************ + +What functional programmers call a {\em finite map}, everyone else +calls a {\em lookup table}. + +Out code is derived from that in this paper: +\begin{display} +S Adams +"Efficient sets: a balancing act" +Journal of functional programming 3(4) Oct 1993, pages 553-562 +\end{display} +Guess what? The implementation uses balanced trees. + +\begin{verbatim} +-- BUILDING +emptyFM :: FiniteMap key elt +unitFM :: key -> elt -> FiniteMap key elt +listToFM :: Ord key => [(key,elt)] -> FiniteMap key elt + -- In the case of duplicates, the last is taken + +-- ADDING AND DELETING + -- Throws away any previous binding + -- In the list case, the items are added starting with the + -- first one in the list +addToFM :: Ord key => FiniteMap key elt -> key -> elt -> FiniteMap key elt +addListToFM :: Ord key => FiniteMap key elt -> [(key,elt)] -> FiniteMap key elt + + -- Combines with previous binding +addToFM_C :: Ord key => (elt -> elt -> elt) + -> FiniteMap key elt -> key -> elt + -> FiniteMap key elt +addListToFM_C :: Ord key => (elt -> elt -> elt) + -> FiniteMap key elt -> [(key,elt)] + -> FiniteMap key elt + + -- Deletion doesn't complain if you try to delete something + -- which isn't there +delFromFM :: Ord key => FiniteMap key elt -> key -> FiniteMap key elt +delListFromFM :: Ord key => FiniteMap key elt -> [key] -> FiniteMap key elt + +-- COMBINING + -- Bindings in right argument shadow those in the left +plusFM :: Ord key => FiniteMap key elt -> FiniteMap key elt + -> FiniteMap key elt + + -- Combines bindings for the same thing with the given function +plusFM_C :: Ord key => (elt -> elt -> elt) + -> FiniteMap key elt -> FiniteMap key elt -> FiniteMap key elt + +minusFM :: Ord key => FiniteMap key elt -> FiniteMap key elt -> FiniteMap key elt + -- (minusFM a1 a2) deletes from a1 any bindings which are bound in a2 + +intersectFM :: Ord key => FiniteMap key elt -> FiniteMap key elt -> FiniteMap key elt +intersectFM_C :: Ord key => (elt -> elt -> elt) + -> FiniteMap key elt -> FiniteMap key elt -> FiniteMap key elt + +-- MAPPING, FOLDING, FILTERING +foldFM :: (key -> elt -> a -> a) -> a -> FiniteMap key elt -> a +mapFM :: (key -> elt1 -> elt2) -> FiniteMap key elt1 -> FiniteMap key elt2 +filterFM :: Ord key => (key -> elt -> Bool) + -> FiniteMap key elt -> FiniteMap key elt + +-- INTERROGATING +sizeFM :: FiniteMap key elt -> Int +isEmptyFM :: FiniteMap key elt -> Bool + +elemFM :: Ord key => key -> FiniteMap key elt -> Bool +lookupFM :: Ord key => FiniteMap key elt -> key -> Maybe elt +lookupWithDefaultFM + :: Ord key => FiniteMap key elt -> elt -> key -> elt + -- lookupWithDefaultFM supplies a "default" elt + -- to return for an unmapped key + +-- LISTIFYING +fmToList :: FiniteMap key elt -> [(key,elt)] +keysFM :: FiniteMap key elt -> [key] +eltsFM :: FiniteMap key elt -> [elt] +\end{verbatim} + +%************************************************************************ +%* * +\subsubsection[ListSetOps]{The @ListSetOps@ type} +\index{ListSetOps module (GHC syslib)} +%* * +%************************************************************************ + +Just a few set-sounding operations on lists. If you want sets, use +the \tr{Set} module. + +\begin{verbatim} +unionLists :: Eq a => [a] -> [a] -> [a] +intersectLists :: Eq a => [a] -> [a] -> [a] +minusList :: Eq a => [a] -> [a] -> [a] +disjointLists :: Eq a => [a] -> [a] -> Bool +intersectingLists :: Eq a => [a] -> [a] -> Bool +\end{verbatim} + +%************************************************************************ +%* * +\subsubsection[Maybes]{The @Maybes@ type} +\index{Maybes module (GHC syslib)} +%* * +%************************************************************************ + +The \tr{Maybe} type itself is in the Haskell~1.3 prelude. Moreover, +the required \tr{Maybe} library provides many useful functions on +\tr{Maybe}s. This (old) module provides more. + +An \tr{Either}-like type called \tr{MaybeErr}: +\begin{verbatim} +data MaybeErr val err = Succeeded val | Failed err +\end{verbatim} + +Some operations to do with \tr{Maybe} (some commentary follows): +\begin{verbatim} +maybeToBool :: Maybe a -> Bool -- Nothing => False; Just => True +allMaybes :: [Maybe a] -> Maybe [a] +firstJust :: [Maybe a] -> Maybe a +findJust :: (a -> Maybe b) -> [a] -> Maybe b + +assocMaybe :: Eq a => [(a,b)] -> a -> Maybe b +mkLookupFun :: (key -> key -> Bool) -- Equality predicate + -> [(key,val)] -- The assoc list + -> (key -> Maybe val) -- A lookup fun to use +mkLookupFunDef :: (key -> key -> Bool) -- Ditto, with a default + -> [(key,val)] + -> val -- the default + -> (key -> val) -- NB: not a Maybe anymore + + -- a monad thing +thenMaybe :: Maybe a -> (a -> Maybe b) -> Maybe b +returnMaybe :: a -> Maybe a +failMaybe :: Maybe a +mapMaybe :: (a -> Maybe b) -> [a] -> Maybe [b] +\end{verbatim} + +NB: @catMaybes@, which used to be here, is in the Haskell~1.3 libraries. + +@allMaybes@ collects a list of @Justs@ into a single @Just@, returning +@Nothing@ if there are any @Nothings@. + +@firstJust@ takes a list of @Maybes@ and returns the +first @Just@ if there is one, or @Nothing@ otherwise. + +@assocMaybe@ looks up in an association list, returning +@Nothing@ if it fails. + +Now, some operations to do with \tr{MaybeErr} (comments follow): +\begin{verbatim} + -- a monad thing (surprise, surprise) +thenMaB :: MaybeErr a err -> (a -> MaybeErr b err) -> MaybeErr b err +returnMaB :: val -> MaybeErr val err +failMaB :: err -> MaybeErr val err + +listMaybeErrs :: [MaybeErr val err] -> MaybeErr [val] [err] +foldlMaybeErrs :: (acc -> input -> MaybeErr acc err) + -> acc + -> [input] + -> MaybeErr acc [err] +\end{verbatim} + +@listMaybeErrs@ takes a list of @MaybeErrs@ and, if they all succeed, +returns a @Succeeded@ of a list of their values. If any fail, it +returns a @Failed@ of the list of all the errors in the list. + +@foldlMaybeErrs@ works along a list, carrying an accumulator; it +applies the given function to the accumulator and the next list item, +accumulating any errors that occur. + +%************************************************************************ +%* * +\subsubsection[PackedString]{The @PackedString@ type} +\index{PackedString module (GHC syslib)} +%* * +%************************************************************************ + +You need \tr{import PackedString}, and +heave in your \tr{-syslib ghc}. + +The basic type and functions which are available are: +\begin{verbatim} +data PackedString + +packString :: [Char] -> PackedString +packStringST :: [Char] -> ST s PackedString +packCString :: Addr -> PackedString +packCBytes :: Int -> Addr -> PackedString +packCBytesST :: Int -> Addr -> ST s PackedString +packBytesForC :: [Char] -> ByteArray Int +packBytesForCST :: [Char] -> ST s (ByteArray Int) +byteArrayToPS :: ByteArray Int -> PackedString +psToByteArray :: PackedString -> ByteArray Int + +unpackPS :: PackedString -> [Char] +\end{verbatim} + +We also provide a wad of list-manipulation-like functions: +\begin{verbatim} +nilPS :: PackedString +consPS :: Char -> PackedString -> PackedString + +headPS :: PackedString -> Char +tailPS :: PackedString -> PackedString +nullPS :: PackedString -> Bool +appendPS :: PackedString -> PackedString -> PackedString +lengthPS :: PackedString -> Int +indexPS :: PackedString -> Int -> Char + -- 0-origin indexing into the string +mapPS :: (Char -> Char) -> PackedString -> PackedString {-or String?-} +filterPS :: (Char -> Bool) -> PackedString -> PackedString {-or String?-} +foldlPS :: (a -> Char -> a) -> a -> PackedString -> a +foldrPS :: (Char -> a -> a) -> a -> PackedString -> a +takePS :: Int -> PackedString -> PackedString +dropPS :: Int -> PackedString -> PackedString +splitAtPS :: Int -> PackedString -> (PackedString, PackedString) +takeWhilePS:: (Char -> Bool) -> PackedString -> PackedString +dropWhilePS:: (Char -> Bool) -> PackedString -> PackedString +spanPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) +breakPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) +linesPS :: PackedString -> [PackedString] +wordsPS :: PackedString -> [PackedString] +reversePS :: PackedString -> PackedString +concatPS :: [PackedString] -> PackedString + +substrPS :: PackedString -> Int -> Int -> PackedString + -- pluck out a piece of a PS + -- start and end chars you want; both 0-origin-specified +\end{verbatim} + +%************************************************************************ +%* * +\subsubsection[Pretty]{The @Pretty@ type} +\index{Pretty module (GHC syslib)} +%* * +%************************************************************************ + +This is the pretty-printer that we use in GHC. + +\begin{verbatim} +type Pretty + +ppShow :: Int{-width-} -> Pretty -> [Char] + +pp'SP :: Pretty -- "comma space" +ppComma :: Pretty -- , +ppEquals :: Pretty -- = +ppLbrack :: Pretty -- [ +ppLparen :: Pretty -- ( +ppNil :: Pretty -- nothing +ppRparen :: Pretty -- ) +ppRbrack :: Pretty -- ] +ppSP :: Pretty -- space +ppSemi :: Pretty -- ; + +ppChar :: Char -> Pretty +ppDouble :: Double -> Pretty +ppFloat :: Float -> Pretty +ppInt :: Int -> Pretty +ppInteger :: Integer -> Pretty +ppRational :: Rational -> Pretty +ppStr :: [Char] -> Pretty + +ppAbove :: Pretty -> Pretty -> Pretty +ppAboves :: [Pretty] -> Pretty +ppBeside :: Pretty -> Pretty -> Pretty +ppBesides :: [Pretty] -> Pretty +ppCat :: [Pretty] -> Pretty +ppHang :: Pretty -> Int -> Pretty -> Pretty +ppInterleave :: Pretty -> [Pretty] -> Pretty -- spacing between +ppIntersperse :: Pretty -> [Pretty] -> Pretty -- no spacing between +ppNest :: Int -> Pretty -> Pretty +ppSep :: [Pretty] -> Pretty + +ppBracket :: Pretty -> Pretty -- [ ... ] around something +ppParens :: Pretty -> Pretty -- ( ... ) around something +ppQuote :: Pretty -> Pretty -- ` ... ' around something +\end{verbatim} + +%************************************************************************ +%* * +\subsubsection[Set]{The @Set@ type} +\index{Set module (GHC syslib)} +%* * +%************************************************************************ + +Our implementation of {\em sets} (key property: no duplicates) is just +a variant of the \tr{FiniteMap} module. + +\begin{verbatim} +mkSet :: Ord a => [a] -> Set a +setToList :: Set a -> [a] +emptySet :: Set a +singletonSet :: a -> Set a + +union :: Ord a => Set a -> Set a -> Set a +unionManySets :: Ord a => [Set a] -> Set a +intersect :: Ord a => Set a -> Set a -> Set a +minusSet :: Ord a => Set a -> Set a -> Set a +mapSet :: Ord a => (b -> a) -> Set b -> Set a + +elementOf :: Ord a => a -> Set a -> Bool +isEmptySet :: Set a -> Bool +\end{verbatim} + +%************************************************************************ +%* * +\subsubsection[Util]{The @Util@ type} +\index{Util module (GHC syslib)} +%* * +%************************************************************************ + +Stuff that has been useful to use in writing the compiler. Don't be +too surprised if this stuff moves/gets-renamed/etc. + +\begin{verbatim} +-- general list processing +exists :: (a -> Bool) -> [a] -> Bool +forall :: (a -> Bool) -> [a] -> Bool +isSingleton :: [a] -> Bool +lengthExceeds :: [a] -> Int -> Bool +mapAndUnzip :: (a -> (b, c)) -> [a] -> ([b], [c]) +mapAndUnzip3 :: (a -> (b, c, d)) -> [a] -> ([b], [c], [d]) +nOfThem :: Int -> a -> [a] +zipEqual :: [a] -> [b] -> [(a,b)] +zipWithEqual :: String -> (a->b->c) -> [a]->[b]->[c] +zipWith3Equal :: String -> (a->b->c->d) -> [a]->[b]->[c]->[d] +zipWith4Equal :: String -> (a->b->c->d->e) -> [a]->[b]->[c]->[d]->[e] +zipLazy :: [a] -> [b] -> [(a,b)] -- lazy in 2nd arg + +-- association lists +assoc :: Eq a => String -> [(a, b)] -> a -> b + +-- duplicate handling +hasNoDups :: Eq a => [a] -> Bool +equivClasses :: (a -> a -> Ordering) -> [a] -> [[a]] +runs :: (a -> a -> Bool) -> [a] -> [[a]] +removeDups :: (a -> a -> Ordering) -> [a] -> ([a], [[a]]) + +-- sorting (don't complain of no choice...) +quicksort :: (a -> a -> Bool) -> [a] -> [a] +sortLt :: (a -> a -> Bool) -> [a] -> [a] +stableSortLt :: (a -> a -> Bool) -> [a] -> [a] +mergesort :: (a -> a -> Ordering) -> [a] -> [a] +mergeSort :: Ord a => [a] -> [a] +naturalMergeSort :: Ord a => [a] -> [a] +mergeSortLe :: Ord a => [a] -> [a] +naturalMergeSortLe :: Ord a => [a] -> [a] + +-- transitive closures +transitiveClosure :: (a -> [a]) -- Successor function + -> (a -> a -> Bool) -- Equality predicate + -> [a] + -> [a] -- The transitive closure + +-- accumulating (Left, Right, Bi-directional) +mapAccumL :: (acc -> x -> (acc, y)) + -- Function of elt of input list and + -- accumulator, returning new accumulator and + -- elt of result list + -> acc -- Initial accumulator + -> [x] -- Input list + -> (acc, [y]) -- Final accumulator and result list + +mapAccumR :: (acc -> x -> (acc, y)) -> acc -> [x] -> (acc, [y]) + +mapAccumB :: (accl -> accr -> x -> (accl, accr,y)) + -> accl -> accr -> [x] + -> (accl, accr, [y]) + +-- comparisons +cmpString :: String -> String -> Ordering + +-- pairs +applyToPair :: ((a -> c), (b -> d)) -> (a, b) -> (c, d) +applyToFst :: (a -> c) -> (a, b) -> (c, b) +applyToSnd :: (b -> d) -> (a, b) -> (a, d) +foldPair :: (a->a->a, b->b->b) -> (a, b) -> [(a, b)] -> (a, b) +unzipWith :: (a -> b -> c) -> [(a, b)] -> [c] +\end{verbatim} + +%************************************************************************ +%* * +\subsection[C-interfaces]{Interfaces to C libraries} +\index{C library interfaces} +\index{interfaces, C library} +%* * +%************************************************************************ + +The GHC system library (\tr{-syslib ghc}) also provides interfaces to +several useful C libraries, mostly from the GNU project. + +%************************************************************************ +%* * +\subsubsection[Readline]{The @Readline@ interface} +\index{Readline library (GHC syslib)} +\index{command-line editing library} +%* * +%************************************************************************ + +(Darren Moffat supplied the \tr{Readline} interface.) + +The \tr{Readline} module is a straightforward interface to the GNU +Readline library. As such, you will need to look at the GNU +documentation (and have a \tr{libreadline.a} file around somewhere...) + +You'll need to link any Readlining program with \tr{-lreadline -ltermcap}, +besides the usual \tr{-syslib ghc}. + +The main function you'll use is: +\begin{verbatim} +readline :: String{-the prompt-} -> IO String +\end{verbatim} + +If you want to mess around with Full Readline G(l)ory, we also +provide: +\begin{verbatim} +rlInitialize, addHistory, + +rlBindKey, rlAddDefun, RlCallbackFunction(..), + +rlGetLineBuffer, rlSetLineBuffer, rlGetPoint, rlSetPoint, rlGetEnd, +rlSetEnd, rlGetMark, rlSetMark, rlSetDone, rlPendingInput, + +rlPrompt, rlTerminalName, rlSetReadlineName, rlGetReadlineName +\end{verbatim} +(All those names are just Haskellised versions of what you +will see in the GNU readline documentation.) + +%************************************************************************ +%* * +\subsubsection[Regexp]{The @Regexp@ and @MatchPS@ interfaces} +\index{Regex library (GHC syslib)} +\index{MatchPS library (GHC syslib)} +\index{regular-expressions library} +%* * +%************************************************************************ + +(Sigbjorn Finne supplied the regular-expressions interface.) + +The \tr{Regex} library provides quite direct interface to the GNU +regular-expression library, for doing manipulation on +\tr{PackedString}s. You probably need to see the GNU documentation +if you are operating at this level. + +The datatypes and functions that \tr{Regex} provides are: +\begin{verbatim} +data PatBuffer # just a bunch of bytes (mutable) + +data REmatch + = REmatch (Array Int GroupBounds) -- for $1, ... $n + GroupBounds -- for $` (everything before match) + GroupBounds -- for $& (entire matched string) + GroupBounds -- for $' (everything after) + GroupBounds -- for $+ (matched by last bracket) + +-- GroupBounds hold the interval where a group +-- matched inside a string, e.g. +-- +-- matching "reg(exp)" "a regexp" returns the pair (5,7) for the +-- (exp) group. (PackedString indices start from 0) + +type GroupBounds = (Int, Int) + +re_compile_pattern + :: PackedString -- pattern to compile + -> Bool -- True <=> assume single-line mode + -> Bool -- True <=> case-insensitive + -> PrimIO PatBuffer + +re_match :: PatBuffer -- compiled regexp + -> PackedString -- string to match + -> Int -- start position + -> Bool -- True <=> record results in registers + -> PrimIO (Maybe REmatch) + +-- Matching on 2 strings is useful when you're dealing with multiple +-- buffers, which is something that could prove useful for +-- PackedStrings, as we don't want to stuff the contents of a file +-- into one massive heap chunk, but load (smaller chunks) on demand. + +re_match2 :: PatBuffer -- 2-string version + -> PackedString + -> PackedString + -> Int + -> Int + -> Bool + -> PrimIO (Maybe REmatch) + +re_search :: PatBuffer -- compiled regexp + -> PackedString -- string to search + -> Int -- start index + -> Int -- stop index + -> Bool -- True <=> record results in registers + -> PrimIO (Maybe REmatch) + +re_search2 :: PatBuffer -- Double buffer search + -> PackedString + -> PackedString + -> Int -- start index + -> Int -- range (?) + -> Int -- stop index + -> Bool -- True <=> results in registers + -> PrimIO (Maybe REmatch) +\end{verbatim} + +The \tr{MatchPS} module provides Perl-like ``higher-level'' facilities +to operate on \tr{PackedStrings}. The regular expressions in +question are in Perl syntax. The ``flags'' on various functions can +include: \tr{i} for case-insensitive, \tr{s} for single-line mode, and +\tr{g} for global. (It's probably worth your time to peruse the +source code...) + +\begin{verbatim} +matchPS :: PackedString -- regexp + -> PackedString -- string to match + -> [Char] -- flags + -> Maybe REmatch -- info about what matched and where + +searchPS :: PackedString -- regexp + -> PackedString -- string to match + -> [Char] -- flags + -> Maybe REmatch + +-- Perl-like match-and-substitute: +substPS :: PackedString -- regexp + -> PackedString -- replacement + -> [Char] -- flags + -> PackedString -- string + -> PackedString + +-- same as substPS, but no prefix and suffix: +replacePS :: PackedString -- regexp + -> PackedString -- replacement + -> [Char] -- flags + -> PackedString -- string + -> PackedString + +match2PS :: PackedString -- regexp + -> PackedString -- string1 to match + -> PackedString -- string2 to match + -> [Char] -- flags + -> Maybe REmatch + +search2PS :: PackedString -- regexp + -> PackedString -- string to match + -> PackedString -- string to match + -> [Char] -- flags + -> Maybe REmatch + +-- functions to pull the matched pieces out of an REmatch: + +getMatchesNo :: REmatch -> Int +getMatchedGroup :: REmatch -> Int -> PackedString -> PackedString +getWholeMatch :: REmatch -> PackedString -> PackedString +getLastMatch :: REmatch -> PackedString -> PackedString +getAfterMatch :: REmatch -> PackedString -> PackedString + +-- (reverse) brute-force string matching; +-- Perl equivalent is index/rindex: +findPS, rfindPS :: PackedString -> PackedString -> Maybe Int + +-- Equivalent to Perl "chop" (off the last character, if any): +chopPS :: PackedString -> PackedString + +-- matchPrefixPS: tries to match as much as possible of strA starting +-- from the beginning of strB (handy when matching fancy literals in +-- parsers): +matchPrefixPS :: PackedString -> PackedString -> Int +\end{verbatim} + +%************************************************************************ +%* * +\subsubsection[Socket]{Network-interface toolkit---@Socket@ and @SocketPrim@} +\index{SocketPrim interface (GHC syslib)} +\index{Socket interface (GHC syslib)} +\index{network-interface library} +\index{sockets library} +\index{BSD sockets library} +%* * +%************************************************************************ + +(Darren Moffat supplied the network-interface toolkit.) + +Your best bet for documentation is to look at the code---really!--- +normally in \tr{hslibs/ghc/src/{BSD,Socket,SocketPrim}.lhs}. + +The \tr{BSD} module provides functions to get at system-database info; +pretty straightforward if you're into this sort of thing: +\begin{verbatim} +getHostName :: IO String + +getServiceByName :: ServiceName -> IO ServiceEntry +getServicePortNumber:: ServiceName -> IO PortNumber +getServiceEntry :: IO ServiceEntry +setServiceEntry :: Bool -> IO () +endServiceEntry :: IO () + +getProtocolByName :: ProtocolName -> IO ProtocolEntry +getProtocolByNumber :: ProtocolNumber -> IO ProtcolEntry +getProtocolNumber :: ProtocolName -> ProtocolNumber +getProtocolEntry :: IO ProtocolEntry +setProtocolEntry :: Bool -> IO () +endProtocolEntry :: IO () + +getHostByName :: HostName -> IO HostEntry +getHostByAddr :: Family -> HostAddress -> IO HostEntry +getHostEntry :: IO HostEntry +setHostEntry :: Bool -> IO () +endHostEntry :: IO () +\end{verbatim} + +The \tr{SocketPrim} interface provides quite direct access to the +socket facilities in a BSD Unix system, including all the +complications. We hope you don't need to use it! See the source if +needed... + +The \tr{Socket} interface is a ``higher-level'' interface to sockets, +and it is what we recommend. Please tell us if the facilities it +offers are inadequate to your task! + +The interface is relatively modest: +\begin{verbatim} +connectTo :: Hostname -> PortID -> IO Handle +listenOn :: PortID -> IO Socket + +accept :: Socket -> IO (Handle, HostName) +sendTo :: Hostname -> PortID -> String -> IO () + +recvFrom :: Hostname -> PortID -> IO String +socketPort :: Socket -> IO PortID + +data PortID -- PortID is a non-abstract type + = Service String -- Service Name eg "ftp" + | PortNumber Int -- User defined Port Number + | UnixSocket String -- Unix family socket in file system + +type Hostname = String +\end{verbatim} + +Various examples of networking Haskell code are provided in +\tr{ghc/misc/examples/}, notably the \tr{net???/Main.hs} programs. + +%************************************************************************ +%* * +\subsection[HBC-library]{The HBC system library} +\index{HBC system library} +\index{system library, HBC} +%* * +%************************************************************************ + +This documentation is stolen directly from the HBC distribution. The +modules that GHC does not support (because they require HBC-specific +extensions) are omitted. + +\begin{description} +\item[\tr{ListUtil}:] +\index{ListUtil module (HBC library)}% +Various useful functions involving lists that are missing from the +\tr{Prelude}: +\begin{verbatim} +assoc :: (Eq c) => (a -> b) -> b -> [(c, a)] -> c -> b + -- assoc f d l k looks for k in the association list l, if it + -- is found f is applied to the value, otherwise d is returned. +concatMap :: (a -> [b]) -> [a] -> [b] + -- flattening map (LML's concmap) +unfoldr :: (a -> (b, a)) -> (a -> Bool) -> a -> [b] + -- unfoldr f p x repeatedly applies f to x until (p x) holds. + -- (f x) should give a list element and a new x. +mapAccuml :: (a -> b -> (a, c)) -> a -> [b] -> (a, [c]) + -- mapAccuml f s l maps f over l, but also threads the state s + -- through (LML's mapstate). +union :: (Eq a) => [a] -> [a] -> [a] + -- union of two lists +intersection :: (Eq a) => [a] -> [a] -> [a] + -- intersection of two lists +chopList :: ([a] -> (b, [a])) -> [a] -> [b] + -- LMLs choplist +assocDef :: (Eq a) => [(a, b)] -> b -> a -> b + -- LMLs assocdef +lookup :: (Eq a) => [(a, b)] -> a -> Option b + -- lookup l k looks for the key k in the association list l + -- and returns an optional value +tails :: [a] -> [[a]] + -- return all the tails of a list +rept :: (Integral a) => a -> b -> [b] + -- repeat a value a number of times +groupEq :: (a->a->Bool) -> [a] -> [[a]] + -- group list elements according to an equality predicate +group :: (Eq a) => [a] -> [[a]] + -- group according to} == +readListLazily :: (Read a) => String -> [a] + -- read a list in a lazy fashion +\end{verbatim} + +\item[\tr{Pretty}:] +\index{Pretty module (HBC library)}% +John Hughes's pretty printing library. +\begin{verbatim} +type Context = (Bool, Int, Int, Int) +type IText = Context -> [String] +text :: String -> IText -- just text +(~.) :: IText -> IText -> IText -- horizontal composition +(^.) :: IText -> IText -> IText -- vertical composition +separate :: [IText] -> IText -- separate by spaces +nest :: Int -> IText -> IText -- indent +pretty :: Int -> Int -> IText -> String -- format it +\end{verbatim} + +\item[\tr{QSort}:] +\index{QSort module (HBC library)}% +A sort function using quicksort. +\begin{verbatim} +sortLe :: (a -> a -> Bool) -> [a] -> [a] + -- sort le l sorts l with le as less than predicate +sort :: (Ord a) => [a] -> [a] + -- sort l sorts l using the Ord class +\end{verbatim} + +\item[\tr{Random}:] +\index{Random module (HBC library)}% +Random numbers. +\begin{verbatim} +randomInts :: Int -> Int -> [Int] + -- given two seeds gives a list of random Int +randomDoubles :: Int -> Int -> [Double] + -- random Double with uniform distribution in (0,1) +normalRandomDoubles :: Int -> Int -> [Double] + -- random Double with normal distribution, mean 0, variance 1 +\end{verbatim} + +\item[\tr{Trace}:] +Simple tracing. (Note: This comes with GHC anyway.) +\begin{verbatim} +trace :: String -> a -> a -- trace x y prints x and returns y +\end{verbatim} + +\item[\tr{Miranda}:] +\index{Miranda module (HBC library)}% +Functions found in the Miranda library. +(Note: Miranda is a registered trade mark of Research Software Ltd.) + +\item[\tr{Word}:] +\index{Word module (HBC library)} +Bit manipulation. (GHC doesn't implement absolutely all of this. +And don't count on @Word@ being 32 bits on a Alpha...) +\begin{verbatim} +class Bits a where + bitAnd :: a -> a -> a -- bitwise and + bitOr :: a -> a -> a -- bitwise or + bitXor :: a -> a -> a -- bitwise xor + bitCompl :: a -> a -- bitwise negation + bitRsh :: a -> Int -> a -- bitwise right shift + bitLsh :: a -> Int -> a -- bitwise left shift + bitSwap :: a -> a -- swap word halves + bit0 :: a -- word with least significant bit set + bitSize :: a -> Int -- number of bits in a word + +data Byte -- 8 bit quantity +data Short -- 16 bit quantity +data Word -- 32 bit quantity + +instance Bits Byte, Bits Short, Bits Word +instance Eq Byte, Eq Short, Eq Word +instance Ord Byte, Ord Short, Ord Word +instance Show Byte, Show Short, Show Word +instance Num Byte, Num Short, Num Word +wordToShorts :: Word -> [Short] -- convert a Word to two Short +wordToBytes :: Word -> [Byte] -- convert a Word to four Byte +bytesToString :: [Byte] -> String -- convert a list of Byte to a String (bit by bit) +wordToInt :: Word -> Int -- convert a Word to Int +shortToInt :: Short -> Int -- convert a Short to Int +byteToInt :: Byte -> Int -- convert a Byte to Int +\end{verbatim} + +\item[\tr{Time}:] +\index{Time module (HBC library)}% +Manipulate time values (a Double with seconds since 1970). +\begin{verbatim} +-- year mon day hour min sec dec-sec weekday +data Time = Time Int Int Int Int Int Int Double Int +dblToTime :: Double -> Time -- convert a Double to a Time +timeToDbl :: Time -> Double -- convert a Time to a Double +timeToString :: Time -> String -- convert a Time to a readable String +\end{verbatim} + +\item[\tr{Hash}:] +\index{Hash module (HBC library)}% +Hashing functions. +\begin{verbatim} +class Hashable a where + hash :: a -> Int -- hash a value, return an Int +-- instances for all Prelude types +hashToMax :: (Hashable a) => Int -> a -> Int -- hash into interval [0..x-1] +\end{verbatim} + +\item[\tr{NameSupply}:] +\index{NameSupply module (HBC library)}% +Functions to generate unique names (Int). +\begin{verbatim} +type Name = Int +initialNameSupply :: NameSupply + -- The initial name supply (may be different every + -- time the program is run. +splitNameSupply :: NameSupply -> (NameSupply,NameSupply) + -- split the namesupply into two +getName :: NameSupply -> Name + -- get the name associated with a name supply +\end{verbatim} + +\item[\tr{Parse}:] +\index{Parse module (HBC library)}% +Higher order functions to build parsers. With a little care these +combinators can be used to build efficient parsers with good error +messages. +\begin{verbatim} +infixr 8 +.+ , ..+ , +.. +infix 6 `act` , >>> , `into` , .> +infixr 4 ||| , ||! , |!! +data ParseResult a b +type Parser a b = a -> Int -> ParseResult a b +(|||) :: Parser a b -> Parser a b -> Parser a b + -- Alternative +(||!) :: Parser a b -> Parser a b -> Parser a b + -- Alternative, but with committed choice +(|!!) :: Parser a b -> Parser a b -> Parser a b + -- Alternative, but with committed choice +(+.+) :: Parser a b -> Parser a c -> Parser a (b,c) + -- Sequence +(..+) :: Parser a b -> Parser a c -> Parser a c + -- Sequence, throw away first part +(+..) :: Parser a b -> Parser a c -> Parser a b + -- Sequence, throw away second part +act :: Parser a b -> (b->c) -> Parser a c + -- Action +(>>>) :: Parser a (b,c) -> (b->c->d) -> Parser a d + -- Action on two items +(.>) :: Parser a b -> c -> Parse a c + -- Action ignoring value +into :: Parser a b -> (b -> Parser a c) -> Parser a c + -- Use a produced value in a parser. +succeed b :: Parser a b + -- Always succeeds without consuming a token +failP :: Parser a b + -- Always fails. +many :: Parser a b -> Parser a [b] + -- Kleene star +many1 :: Parser a b -> Parser a [b] + -- Kleene plus +count :: Parser a b -> Int -> Parser a [b] + -- Parse an exact number of items +sepBy1 :: Parser a b -> Parser a c -> Parser a [b] + -- Non-empty sequence of items separated by something +sepBy :: Parser a b -> Parser a c -> Parser a [b] + -- Sequence of items separated by something +lit :: (Eq a, Show a) => a -> Parser [a] a + -- Recognise a literal token from a list of tokens +litp :: String -> (a->Bool) -> Parser [a] a + -- Recognise a token with a predicate. + -- The string is a description for error messages. +testp :: String -> (a -> Bool) -> (Parser b a) -> Parser b a + -- Test a semantic value. +token :: (a -> Either String (b, a)) -> Parser a b + -- General token recogniser. +parse :: Parser a b -> a -> Either ([String], a) [(b, a)] + -- Do a parse. Return either error (possible tokens and rest + -- of tokens) or all possible parses. +sParse :: (Show a) => (Parser [a] b) -> [a] -> Either String b + -- Simple parse. Return error message or result. +\end{verbatim} + +%%%simpleLex :: String -> [String] -- A simple (but useful) lexical analyzer + +\item[\tr{Native}:] +\index{Native module (HBC library)}% +Functions to convert the primitive types \tr{Int}, \tr{Float}, and \tr{Double} to +their native representation as a list of bytes (\tr{Char}). If such a list +is read/written to a file it will have the same format as when, e.g., +C read/writes the same kind of data. +\begin{verbatim} +type Bytes = [Char] -- A byte stream is just a list of characters + +class Native a where + showBytes :: a -> Bytes -> Bytes + -- prepend the representation of an item the a byte stream + listShowBytes :: [a] -> Bytes -> Bytes + -- prepend the representation of a list of items to a stream + -- (may be more efficient than repeating showBytes). + readBytes :: Bytes -> Maybe (a, Bytes) + -- get an item from the stream and return the rest, + -- or fail if the stream is to short. + listReadBytes :: Int -> Bytes -> Maybe ([a], Bytes) + -- read n items from a stream. + +instance Native Int +instance Native Float +instance Native Double +instance (Native a, Native b) => Native (a,b) + -- juxtaposition of the two items +instance (Native a, Native b, Native c) => Native (a, b, c) + -- juxtaposition of the three items +instance (Native a) => Native [a] + -- an item count in an Int followed by the items + +shortIntToBytes :: Int -> Bytes -> Bytes + -- Convert an Int to what corresponds to a short in C. +bytesToShortInt :: Bytes -> Maybe (Int, Bytes) + -- Get a short from a byte stream and convert to an Int. + +showB :: (Native a) => a -> Bytes -- Simple interface to showBytes. +readB :: (Native a) => Bytes -> a -- Simple interface to readBytes. +\end{verbatim} + +\item[\tr{Number}:] +\index{Number module (HBC library)}% +Simple numbers that belong to all numeric classes and behave like +a naive user would expect (except that printing is still ugly). +(NB: GHC does not provide a magic way to use \tr{Numbers} everywhere, +but you should be able to do it with normal \tr{import}ing and +\tr{default}ing.) +\begin{verbatim} +data Number -- The type itself. +instance ... -- All reasonable instances. +isInteger :: Number -> Bool -- Test if a Number is an integer. +\end{verbatim} +\end{description} + +%************************************************************************ +%* * +\subsection[contrib-library]{The `contrib' system library} +\index{contrib system library} +\index{system library, contrib} +%* * +%************************************************************************ + +Just for a bit of fun, we took all the old contributed ``Haskell +library'' code---Stephen J.~Bevan the main hero, converted it to +Haskell~1.3 and heaved it into a \tr{contrib} system library. It is +mostly code for numerical methods (@SetMap@ is an exception); we have +{\em no idea} whether it is any good or not. + +The modules provided are: +@Adams_Bashforth_Approx@, +@Adams_Predictor_Corrector_Approx@, +@Choleski_Factorization@, +@Crout_Reduction@, +@Cubic_Spline@, +@Fixed_Point_Approx@, +@Gauss_Seidel_Iteration@, +@Hermite_Interpolation@, +@Horner@, +@Jacobi_Iteration@, +@LLDecompMethod@, +@Least_Squares_Fit@, +@Matrix_Ops@, +@Neville_Iterated_Interpolation@, +@Newton_Cotes@, +@Newton_Interpolatory_Divided_Difference@, +@Newton_Raphson_Approx@, +@Runge_Kutta_Approx@, +@SOR_Iteration@, +@Secant_Approx@, +@SetMap@, +@Steffensen_Approx@, +@Taylor_Approx@, and +@Vector_Ops@. diff --git a/ghc/lib/misc/tests/finite-maps/Main.hs b/ghc/lib/misc/tests/finite-maps/Main.hs new file mode 100644 index 0000000..b5ceae4 --- /dev/null +++ b/ghc/lib/misc/tests/finite-maps/Main.hs @@ -0,0 +1,77 @@ +-- Test module for Finite Maps + +module Main where + +import IO +import FiniteMap +import Util + +main = hGetContents stdin >>= \ input -> + let (s1, rest1) = rd_int input + r1 = test1 s1 + + (s2, rest2) = rd_int rest1 + r2 = test2 s2 + in + putStr r1 >> + putStr r2 + +rd_int = \ i -> (head (reads i)) :: (Int,String) + + +------------------------------------------------------------- +--Test 1 creates two big maps with the same domain, mapping +--each domain elt to 1. + +test1 :: Int -- Size of maps + -> String + +test1 size + = "Test 1" ++ "\n" ++ + "N = " ++ show size ++ "\n" ++ + "Tot sum = " ++ +-- show (fmToList fm1) ++ show (fmToList fm2) ++ show (fmToList sum_fm) ++ + show tot_sum ++ "\n" ++ + "Differences: " ++ diff ++ "\n" ++ + "Sum intersection:" ++ show sum_int ++ "\n\n" + where + fm1,fm2 :: FiniteMap Int Int + fm1 = listToFM [(i,1) | i <- [1..size-1]] + fm2 = listToFM [(i,1) | i <- [size,size-1..2]] + + -- Take their sum + sum_fm = plusFM_C (+) fm1 fm2 + tot_sum = sum (map get [1..size]) + get n = lookupWithDefaultFM sum_fm (error ("lookup" ++ show n)) n + -- Should be 1 + (size-2)*2 + 1 = 2*size - 2 + + + -- Take their difference + diff_fm1 = fm1 `minusFM` fm2 -- Should be a singleton + diff_fm2 = fm2 `minusFM` fm1 -- Should be a singleton + diff = show (fmToList diff_fm1) ++ "; " ++ show (fmToList diff_fm2) + + -- Take their intersection + int_fm = intersectFM_C (+) fm1 fm2 + sum_int = foldFM (\k n tot -> n+tot) 0 int_fm + + +test2 :: Int -- No of maps + -> String + +test2 size + = "Test 2" ++ "\n" ++ + "N = " ++ show size ++ "\n" ++ + "Sizes =" ++ show [sizeFM fm1,sizeFM fm2] ++ "\n" ++ + "Sums = " ++ show [sum1,sum2] ++ "\n\n" + where + fm1,fm2 :: FiniteMap Int Int + + fms1 = [unitFM i 1 | i <- [1..size]] + fm1 = foldr (plusFM_C (+)) emptyFM fms1 + + fms2 = [unitFM 1 i | i <- [1..size]] + fm2 = foldr (plusFM_C (+)) emptyFM fms2 + + sum1 = foldr (+) 0 (eltsFM fm1) + sum2 = foldr (+) 0 (eltsFM fm2) diff --git a/ghc/lib/misc/tests/finite-maps/Makefile b/ghc/lib/misc/tests/finite-maps/Makefile new file mode 100644 index 0000000..05055dd --- /dev/null +++ b/ghc/lib/misc/tests/finite-maps/Makefile @@ -0,0 +1,5 @@ +TOP = ../../.. +include $(TOP)/mk/boilerplate.mk +SRC_HC_OPTS += -syslib ghc +SRC_RUNTEST_OPTS += +RTS -H25m -RTS +include $(TOP)/mk/target.mk diff --git a/ghc/lib/misc/tests/finite-maps/ghclib001.stdin b/ghc/lib/misc/tests/finite-maps/ghclib001.stdin new file mode 100644 index 0000000..628db6e --- /dev/null +++ b/ghc/lib/misc/tests/finite-maps/ghclib001.stdin @@ -0,0 +1,2 @@ +13133 +9798 diff --git a/ghc/lib/misc/tests/finite-maps/ghclib001.stdout b/ghc/lib/misc/tests/finite-maps/ghclib001.stdout new file mode 100644 index 0000000..e989373 --- /dev/null +++ b/ghc/lib/misc/tests/finite-maps/ghclib001.stdout @@ -0,0 +1,11 @@ +Test 1 +N = 13133 +Tot sum = 26264 +Differences: [(1, 1)]; [(13133, 1)] +Sum intersection:26262 + +Test 2 +N = 9798 +Sizes =[9798, 1] +Sums = [9798, 48005301] + diff --git a/ghc/lib/posix/Makefile b/ghc/lib/posix/Makefile new file mode 100644 index 0000000..b9c86f1 --- /dev/null +++ b/ghc/lib/posix/Makefile @@ -0,0 +1,77 @@ +# +# Makefile for hslibs subdir +# +TOP=../.. +include $(TOP)/mk/boilerplate.mk + +WAYS=$(GhcLibWays) + +ifeq "$(way)" "" +SUBDIRS = cbits +else +SUBDIRS= +endif + +#----------------------------------------------------------------------------- +# Setting the standard variables +# + +LIBRARY = libHSposix$(_way).a +HS_SRCS = $(wildcard *.lhs) +LIBOBJS = $(HS_OBJS) +HS_IFACES= $(HS_SRCS:.lhs=.$(way_)hi) GHC.$(way_)hi + +#----------------------------------------------------------------------------- +# Setting the GHC compile options + +SRC_HC_OPTS += -i../misc -recomp -cpp -fglasgow-exts -fvia-C -Rghc-timing $(GhcLibHcOpts) + +# +# Profiling options +WAY_p_HC_OPTS += -GPrelude +WAY_mr_HC_OPTS += -GPrelude + +# +# Object and interface files have suffixes tagged with their ways +# +ifneq "$(way)" "" +SRC_HC_OPTS += -hisuf $(way_)hi +endif + +# +# Specific flags +# +PosixUtil_HC_OPTS ='-\#include"cbits/libposix.h"' -monly-3-regs +PosixDB_HC_OPTS ='-\#include"cbits/libposix.h"' +PosixErr_HC_OPTS ='-\#include"cbits/libposix.h"' +PosixFiles_HC_OPTS ='-\#include"cbits/libposix.h"' +PosixIO_HC_OPTS ='-\#include"cbits/libposix.h"' +PosixProcEnv_HC_OPTS ='-\#include"cbits/libposix.h"' +PosixProcPrim_HC_OPTS ='-\#include"cbits/libposix.h"' +PosixTTY_HC_OPTS ='-\#include"cbits/libposix.h"' -monly-2-regs +Posix_HC_OPTS ='-\#include"cbits/libposix.h"' + +#----------------------------------------------------------------------------- +# Dependency generation + +SRC_MKDEPENDHS_OPTS += -I$(GHC_INCLUDE_DIR) + +#----------------------------------------------------------------------------- +# Installation; need to install .hi files as well as libraries +# +# The interface files are put inside the $(libdir), since they +# might (potentially) be platform specific.. +# +# override is used here because for binary distributions, datadir is +# set on the command line. sigh. +# +override datadir:=$(libdir)/imports/posix + +# +# Files to install from here +# +INSTALL_LIBS += $(LIBRARY) +INSTALL_DATAS += $(HS_IFACES) + +include $(TOP)/mk/target.mk + diff --git a/ghc/lib/posix/Posix.lhs b/ghc/lib/posix/Posix.lhs new file mode 100644 index 0000000..fa3c233 --- /dev/null +++ b/ghc/lib/posix/Posix.lhs @@ -0,0 +1,91 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1995-1996 +% +\section[Posix]{Haskell 1.3 POSIX bindings} + +\begin{code} +module Posix ( + module PosixDB, + module PosixErr, + module PosixFiles, + module PosixIO, + module PosixProcEnv, + module PosixProcPrim, + module PosixTTY, + + runProcess, + + ByteCount, + Fd, intToFd, + ClockTick, + EpochTime, + FileOffset, + GroupID, + Limit, + LinkCount, + ProcessID, + ProcessGroupID, + UserID, + + ExitCode + + ) where + +import PrelBase +import PrelIOBase +import IO +import PrelHandle + +import PosixDB +import PosixErr +import PosixFiles +import PosixIO +import PosixProcEnv +import PosixProcPrim +import PosixTTY +import PosixUtil + +-- [OLD COMMENT:] +-- runProcess is our candidate for the high-level OS-independent primitive +-- If accepted, it will be moved out of Posix into LibSystem. + +import Directory ( setCurrentDirectory ) + + +runProcess :: FilePath -- Command + -> [String] -- Arguments + -> Maybe [(String, String)] -- Environment + -> Maybe FilePath -- Working directory + -> Maybe Handle -- stdin + -> Maybe Handle -- stdout + -> Maybe Handle -- stderr + -> IO () +runProcess path args env dir stdin stdout stderr = + forkProcess >>= \ pid -> + case pid of + Nothing -> doTheBusiness + Just x -> return () + where + doTheBusiness :: IO () + doTheBusiness = + maybeChangeWorkingDirectory >> + maybeDup2 0 stdin >> + maybeDup2 1 stdout >> + maybeDup2 2 stderr >> + executeFile path True args env >> + syserr "runProcess" + + maybeChangeWorkingDirectory :: IO () + maybeChangeWorkingDirectory = + case dir of + Nothing -> return () + Just x -> setCurrentDirectory x + + maybeDup2 :: Int -> Maybe Handle -> IO () + maybeDup2 dest h = + case h of Nothing -> return () + Just x -> handleToFd x >>= \ src -> + dupTo src (intToFd dest) >> + return () + +\end{code} diff --git a/ghc/lib/posix/PosixDB.lhs b/ghc/lib/posix/PosixDB.lhs new file mode 100644 index 0000000..a05e30d --- /dev/null +++ b/ghc/lib/posix/PosixDB.lhs @@ -0,0 +1,111 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1995-1997 +% +\section[PosixDB]{Haskell 1.4 POSIX System Databases} + +\begin{code} +module PosixDB ( + GroupEntry(..), + UserEntry(..), + + getUserEntryForID, -- :: UserID -> IO UserEntry + getUserEntryForName, -- :: String -> IO UserEntry + + getGroupEntryForID, -- :: GroupID -> IO GroupEntry + getGroupEntryForName -- :: String -> IO GroupEntry + + ) where + +import ST +import PackedString (psToByteArrayST) +import PrelIOBase +import Addr +import IO +import PosixUtil + +data GroupEntry = + GroupEntry { + groupName :: String, + groupID :: GroupID, + groupMembers :: [String] + } + +data UserEntry = + UserEntry { + userName :: String, + userID :: UserID, + userGroupID :: GroupID, + homeDirectory :: String, + userShell :: String + } + + +getGroupEntryForID :: GroupID -> IO GroupEntry +getGroupEntryForID gid = + _ccall_ getgrgid gid >>= \ ptr -> + if ptr == (``NULL'' :: Addr) then + fail (IOError Nothing NoSuchThing + "getGroupEntryForID: no such group entry") + else + unpackGroupEntry ptr + +getGroupEntryForName :: String -> IO GroupEntry +getGroupEntryForName name = + stToIO (psToByteArrayST name) >>= \ gname -> + _ccall_ getgrnam gname >>= \ ptr -> + if ptr == (``NULL'' :: Addr) then + fail (IOError Nothing NoSuchThing + "getGroupEntryForName: no such group entry") + else + unpackGroupEntry ptr + +getUserEntryForID :: UserID -> IO UserEntry +getUserEntryForID uid = + _ccall_ getpwuid uid >>= \ ptr -> + if ptr == ``NULL'' then + fail (IOError Nothing NoSuchThing + "getUserEntryForID: no such user entry") + else + unpackUserEntry ptr + +getUserEntryForName :: String -> IO UserEntry +getUserEntryForName name = + stToIO (psToByteArrayST name) >>= \ uname -> + _ccall_ getpwnam uname >>= \ ptr -> + if ptr == ``NULL'' then + fail (IOError Nothing NoSuchThing + "getUserEntryForName: no such user entry") + else + unpackUserEntry ptr +\end{code} + +Local utility functions + +\begin{code} +-- Copy the static structure returned by getgr* into a Haskell structure + +unpackGroupEntry :: Addr -> IO GroupEntry +unpackGroupEntry ptr = + do + str <- _casm_ ``%r = ((struct group *)%0)->gr_name;'' ptr + name <- strcpy str + gid <- _casm_ ``%r = ((struct group *)%0)->gr_gid;'' ptr + mem <- _casm_ ``%r = ((struct group *)%0)->gr_mem;'' ptr + members <- unvectorize mem 0 + return (GroupEntry name gid members) + +-- Copy the static structure returned by getpw* into a Haskell structure + +unpackUserEntry :: Addr -> IO UserEntry +unpackUserEntry ptr = + do + str <- _casm_ ``%r = ((struct passwd *)%0)->pw_name;'' ptr + name <- strcpy str + uid <- _casm_ ``%r = ((struct passwd *)%0)->pw_uid;'' ptr + gid <- _casm_ ``%r = ((struct passwd *)%0)->pw_gid;'' ptr + str <- _casm_ ``%r = ((struct passwd *)%0)->pw_dir;'' ptr + home <- strcpy str + str <- _casm_ ``%r = ((struct passwd *)%0)->pw_shell;'' ptr + shell <- strcpy str + return (UserEntry name uid gid home shell) +\end{code} diff --git a/ghc/lib/posix/PosixErr.lhs b/ghc/lib/posix/PosixErr.lhs new file mode 100644 index 0000000..21696d3 --- /dev/null +++ b/ghc/lib/posix/PosixErr.lhs @@ -0,0 +1,162 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1995-1996 +% +\section[PosixErr]{Haskell 1.3 POSIX Error Codes} + +\begin{code} +module PosixErr where + +import ST +import PrelIOBase + +type ErrorCode = Int + +getErrorCode :: IO ErrorCode +getErrorCode = do + errno <- _casm_ ``%r = errno;'' + return errno + +setErrorCode :: ErrorCode -> IO () +setErrorCode errno = do + _casm_ ``errno = %0;'' errno + return () + +noError :: ErrorCode +noError = 0 + +argumentListTooLong, e2BIG :: ErrorCode +argumentListTooLong = ``E2BIG'' +e2BIG = ``E2BIG'' + +badFd, eBADF :: ErrorCode +badFd = ``EBADF'' +eBADF = ``EBADF'' + +brokenPipe, ePIPE :: ErrorCode +brokenPipe = ``EPIPE'' +ePIPE = ``EPIPE'' + +directoryNotEmpty, eNOTEMPTY :: ErrorCode +directoryNotEmpty = ``ENOTEMPTY'' +eNOTEMPTY = ``ENOTEMPTY'' + +execFormatError, eNOEXEC :: ErrorCode +execFormatError = ``ENOEXEC'' +eNOEXEC = ``ENOEXEC'' + +fileAlreadyExists, eEXIST :: ErrorCode +fileAlreadyExists = ``EEXIST'' +eEXIST = ``EEXIST'' + +fileTooLarge, eFBIG :: ErrorCode +fileTooLarge = ``EFBIG'' +eFBIG = ``EFBIG'' + +filenameTooLong, eNAMETOOLONG :: ErrorCode +filenameTooLong = ``ENAMETOOLONG'' +eNAMETOOLONG = ``ENAMETOOLONG'' + +improperLink, eXDEV :: ErrorCode +improperLink = ``EXDEV'' +eXDEV = ``EXDEV'' + +inappropriateIOControlOperation, eNOTTY :: ErrorCode +inappropriateIOControlOperation = ``ENOTTY'' +eNOTTY = ``ENOTTY'' + +inputOutputError, eIO :: ErrorCode +inputOutputError = ``EIO'' +eIO = ``EIO'' + +interruptedOperation, eINTR :: ErrorCode +interruptedOperation = ``EINTR'' +eINTR = ``EINTR'' + +invalidArgument, eINVAL :: ErrorCode +invalidArgument = ``EINVAL'' +eINVAL = ``EINVAL'' + +invalidSeek, eSPIPE :: ErrorCode +invalidSeek = ``ESPIPE'' +eSPIPE = ``ESPIPE'' + +isADirectory, eISDIR :: ErrorCode +isADirectory = ``EISDIR'' +eISDIR = ``EISDIR'' + +noChildProcess, eCHILD :: ErrorCode +noChildProcess = ``ECHILD'' +eCHILD = ``ECHILD'' + +noLocksAvailable, eNOLCK :: ErrorCode +noLocksAvailable = ``ENOLCK'' +eNOLCK = ``ENOLCK'' + +noSpaceLeftOnDevice, eNOSPC :: ErrorCode +noSpaceLeftOnDevice = ``ENOSPC'' +eNOSPC = ``ENOSPC'' + +noSuchOperationOnDevice, eNODEV :: ErrorCode +noSuchOperationOnDevice = ``ENODEV'' +eNODEV = ``ENODEV'' + +noSuchDeviceOrAddress, eNXIO :: ErrorCode +noSuchDeviceOrAddress = ``ENXIO'' +eNXIO = ``ENXIO'' + +noSuchFileOrDirectory, eNOENT :: ErrorCode +noSuchFileOrDirectory = ``ENOENT'' +eNOENT = ``ENOENT'' + +noSuchProcess, eSRCH :: ErrorCode +noSuchProcess = ``ESRCH'' +eSRCH = ``ESRCH'' + +notADirectory, eNOTDIR :: ErrorCode +notADirectory = ``ENOTDIR'' +eNOTDIR = ``ENOTDIR'' + +notEnoughMemory, eNOMEM :: ErrorCode +notEnoughMemory = ``ENOMEM'' +eNOMEM = ``ENOMEM'' + +operationNotImplemented, eNOSYS :: ErrorCode +operationNotImplemented = ``ENOSYS'' +eNOSYS = ``ENOSYS'' + +operationNotPermitted, ePERM :: ErrorCode +operationNotPermitted = ``EPERM'' +ePERM = ``EPERM'' + +permissionDenied, eACCES :: ErrorCode +permissionDenied = ``EACCES'' +eACCES = ``EACCES'' + +readOnlyFileSystem, eROFS :: ErrorCode +readOnlyFileSystem = ``EROFS'' +eROFS = ``EROFS'' + +resourceBusy, eBUSY :: ErrorCode +resourceBusy = ``EBUSY'' +eBUSY = ``EBUSY'' + +resourceDeadlockAvoided, eDEADLK :: ErrorCode +resourceDeadlockAvoided = ``EDEADLK'' +eDEADLK = ``EDEADLK'' + +resourceTemporarilyUnavailable, eAGAIN :: ErrorCode +resourceTemporarilyUnavailable = ``EAGAIN'' +eAGAIN = ``EAGAIN'' + +tooManyLinks, eMLINK :: ErrorCode +tooManyLinks = ``EMLINK'' +eMLINK = ``EMLINK'' + +tooManyOpenFiles, eMFILE :: ErrorCode +tooManyOpenFiles = ``EMFILE'' +eMFILE = ``EMFILE'' + +tooManyOpenFilesInSystem, eNFILE :: ErrorCode +tooManyOpenFilesInSystem = ``ENFILE'' +eNFILE = ``ENFILE'' +\end{code} diff --git a/ghc/lib/posix/PosixFiles.lhs b/ghc/lib/posix/PosixFiles.lhs new file mode 100644 index 0000000..07d01ab --- /dev/null +++ b/ghc/lib/posix/PosixFiles.lhs @@ -0,0 +1,559 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1995-1996 +% +\section[PosixFiles]{Haskell 1.3 POSIX File and Directory Operations} + +\begin{code} +module PosixFiles ( + + -- Directory streams + DirStream, + openDirStream, closeDirStream, + readDirStream, rewindDirStream, + + -- set/get process' working directory. + getWorkingDirectory, changeWorkingDirectory, + + -- File modes/permissions + FileMode, + nullFileMode, + ownerReadMode, ownerWriteMode, ownerExecuteMode, ownerModes, + groupReadMode, groupWriteMode, groupExecuteMode, groupModes, + otherReadMode, otherWriteMode, otherExecuteMode, otherModes, + setUserIDMode, setGroupIDMode, + stdFileMode, accessModes, + + unionFileModes, intersectFileModes, + + -- File operations on descriptors + stdInput, stdOutput, stdError, + OpenMode(..), + OpenFileFlags(..), defaultFileFlags, + openFd, createFile, + + -- other file&directory operations + setFileCreationMask, + createLink, removeLink, + createDirectory, removeDirectory, + createNamedPipe, + rename, + + -- FileStatus + FileStatus, + getFileStatus, getFdStatus, + fileExist, + fileAccess, + setFileMode, + + fileMode, + fileID, FileID, + deviceID, DeviceID, + linkCount, + fileOwner, fileGroup, + fileSize, + accessTime, modificationTime, statusChangeTime, + isDirectory, isCharacterDevice, + isBlockDevice, isRegularFile, + isNamedPipe, + + setOwnerAndGroup, -- chown (might be restricted) + setFileTimes, -- set access and modification time + touchFile, -- set access and modification time to current time. + + -- run-time limit & POSIX feature testing + PathVar(..), + getPathVar, + getFileVar + + ) where + +import PrelST +import ST +import PrelIOBase +import IO +import IOExts (unsafePerformIO) +import PackedString (psToByteArrayST) +import Addr +import CCall +import PrelBase +import ByteArray + +import PosixErr +import PosixUtil +import Directory ( removeDirectory, -- re-use its code + getCurrentDirectory, + setCurrentDirectory + ) + +\end{code} + +%************************************************************ +%* * +\subsection[DirStream]{POSIX Directory streams} +%* * +%************************************************************ + +Accessing directories is done in POSIX via @DIR@ streams, with +operations for opening, closing, reading and rewinding the current +pointer in a directory. + +{\bf Note:} The standard interface @Directory@ provides the +operation @getDirectoryContents@ which returns the directory contents of a +specified file path, which supplants some of the raw @DirStream@ operations +defined here. + +\begin{code} + +data DirStream = DirStream# Addr# +instance CCallable DirStream +instance CReturnable DirStream + +openDirStream :: FilePath -> IO DirStream +openDirStream name = + psToByteArrayIO name >>= \dir -> + _ccall_ opendir dir >>= \dirp@(A# dirp#) -> + if dirp /= (``NULL''::Addr) + then return (DirStream# dirp#) + else syserr "openDirStream" + +readDirStream :: DirStream -> IO String +readDirStream dirp = do + setErrorCode noError + dirent <- _ccall_ readdir dirp + if dirent /= (``NULL''::Addr) + then do + str <- _casm_ ``%r = ((struct dirent *)%0)->d_name;'' dirent + name <- strcpy str + return name + else do + errno <- getErrorCode + if errno == noError + then fail (IOError Nothing EOF "EOF") + else syserr "readDirStream" + +rewindDirStream :: DirStream -> IO () +rewindDirStream dirp = do + _ccall_ rewinddir dirp + return () + +closeDirStream :: DirStream -> IO () +closeDirStream dirp = do + rc <- _ccall_ closedir dirp + if rc == 0 + then return () + else syserr "closeDirStream" + +{- + Renamings of functionality provided via Directory interface, + kept around for b.wards compatibility and for having more POSIXy + names +-} +getWorkingDirectory :: IO FilePath +getWorkingDirectory = getCurrentDirectory + +changeWorkingDirectory :: FilePath -> IO () +changeWorkingDirectory name = setCurrentDirectory name +\end{code} + +%************************************************************ +%* * +\subsection[FileMode]{POSIX File modes} +%* * +%************************************************************ + +The abstract type @FileMode@ and constants and operators for manipulating the +file modes defined by POSIX. + +\begin{code} + +data FileMode = FileMode# Word# +instance CCallable FileMode +instance CReturnable FileMode + +nullFileMode :: FileMode +nullFileMode = FileMode# (case ``0'' of { W# x -> x}) + +ownerReadMode :: FileMode +ownerReadMode = FileMode# (case ``S_IRUSR'' of { W# x -> x}) + +ownerWriteMode :: FileMode +ownerWriteMode = FileMode# (case ``S_IWUSR'' of { W# x -> x}) + +ownerExecuteMode :: FileMode +ownerExecuteMode = FileMode# (case ``S_IXUSR'' of { W# x -> x}) + +groupReadMode :: FileMode +groupReadMode = FileMode# (case ``S_IRGRP'' of { W# x -> x}) + +groupWriteMode :: FileMode +groupWriteMode = FileMode# (case ``S_IWGRP'' of { W# x -> x}) + +groupExecuteMode :: FileMode +groupExecuteMode = FileMode# (case ``S_IXGRP'' of { W# x -> x}) + +otherReadMode :: FileMode +otherReadMode = FileMode# (case ``S_IROTH'' of { W# x -> x}) + +otherWriteMode :: FileMode +otherWriteMode = FileMode# (case ``S_IWOTH'' of { W# x -> x}) + +otherExecuteMode :: FileMode +otherExecuteMode = FileMode# (case ``S_IXOTH'' of { W# x -> x}) + +setUserIDMode :: FileMode +setUserIDMode = FileMode# (case ``S_ISUID'' of { W# x -> x}) + +setGroupIDMode :: FileMode +setGroupIDMode = FileMode# (case ``S_ISGID'' of { W# x -> x}) + +stdFileMode :: FileMode +stdFileMode = FileMode# (case ``(S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)'' of { W# x -> x}) + +ownerModes :: FileMode +ownerModes = FileMode# (case ``S_IRWXU'' of { W# x -> x}) + +groupModes :: FileMode +groupModes = FileMode# (case ``S_IRWXG'' of { W# x -> x}) + +otherModes :: FileMode +otherModes = FileMode# (case ``S_IRWXO'' of { W# x -> x}) + +accessModes :: FileMode +accessModes = FileMode# (case ``(S_IRWXU|S_IRWXG|S_IRWXO)'' of { W# x -> x}) + +unionFileModes :: FileMode -> FileMode -> FileMode +unionFileModes (FileMode# m1#) (FileMode# m2#) = FileMode# (m1# `or#` m2#) + +intersectFileModes :: FileMode -> FileMode -> FileMode +intersectFileModes (FileMode# m1#) (FileMode# m2#) = FileMode# (m1# `and#` m2#) + +\end{code} + +%************************************************************ +%* * +\subsection[FileDescriptor]{POSIX File descriptors} +%* * +%************************************************************ + +File descriptors (formerly @Channel@s) are the lowest level +handles to file objects. + +\begin{code} +stdInput, stdOutput, stdError :: Fd +stdInput = intToFd 0 +stdOutput = intToFd 1 +stdError = intToFd 2 + +data OpenMode = ReadOnly | WriteOnly | ReadWrite + +data OpenFileFlags = + OpenFileFlags { + append :: Bool, + exclusive :: Bool, + noctty :: Bool, + nonBlock :: Bool, + trunc :: Bool + } + +defaultFileFlags :: OpenFileFlags +defaultFileFlags = + OpenFileFlags { + append = False, + exclusive = False, + noctty = False, + nonBlock = False, + trunc = False + } + +openFd :: FilePath + -> OpenMode + -> Maybe FileMode -- Just x => O_CREAT, Nothing => must exist + -> OpenFileFlags + -> IO Fd +openFd name how maybe_mode (OpenFileFlags append exclusive noctty nonBlock truncate) = + psToByteArrayIO name >>= \file -> + _ccall_ open file flags mode_w >>= \fd@(I# fd#) -> + if fd /= -1 + then return (FD# fd#) + else syserr "openFd" + where + mode_w = case maybe_mode of { Nothing -> ``0'' ; Just x -> x } + flags = W# (creat# `or#` flags# `or#` how#) + + or (W# x#) (W# y#) = W# (x# `or#` y#) + + (W# flags#) = + (if append then ``O_APPEND'' else zero) `or` + (if exclusive then ``O_EXCL'' else zero) `or` + (if noctty then ``O_NOCTTY'' else zero) `or` + (if nonBlock then ``O_NONBLOCK'' else zero) `or` + (if truncate then ``O_TRUNC'' else zero) + + zero = W# (int2Word# 0#) + + creat# = + case (case maybe_mode of { + Nothing -> zero ; + Just _ -> ``O_CREAT'' }) of { + W# x -> x } + + how# = + case + (case how of { ReadOnly -> ``O_RDONLY''; + WriteOnly -> ``O_WRONLY''; + ReadWrite -> ``O_RDWR''}) of { + W# x -> x } + +createFile :: FilePath -> FileMode -> IO Fd +createFile name mode = + psToByteArrayIO name >>= \file -> + _ccall_ creat file mode >>= \fd@(I# fd#) -> + if fd /= -1 + then return (FD# fd#) + else syserr "createFile" + +setFileCreationMask :: FileMode -> IO FileMode +setFileCreationMask mask = _ccall_ umask mask + +createLink :: FilePath -> FilePath -> IO () +createLink name1 name2 = do + path1 <- psToByteArrayIO name1 + path2 <- psToByteArrayIO name2 + rc <- _ccall_ link path1 path2 + if rc == 0 + then return () + else syserr "createLink" + +createDirectory :: FilePath -> FileMode -> IO () +createDirectory name mode = do -- NB: diff signature from LibDirectory one! + dir <- psToByteArrayIO name + rc <- _ccall_ mkdir dir mode + if rc == 0 + then return () + else syserr "createDirectory" + +createNamedPipe :: FilePath -> FileMode -> IO () +createNamedPipe name mode = do + pipe <- psToByteArrayIO name + rc <-_ccall_ mkfifo pipe mode + if rc == 0 + then return () + else syserr "createNamedPipe" + +removeLink :: FilePath -> IO () +removeLink name = do + path <- psToByteArrayIO name + rc <-_ccall_ unlink path + if rc == 0 + then return () + else syserr "removeLink" + +rename :: FilePath -> FilePath -> IO () +rename name1 name2 = do + path1 <- psToByteArrayIO name1 + path2 <- psToByteArrayIO name2 + rc <- _ccall_ rename path1 path2 + if rc == 0 + then return () + else syserr "rename" + +type FileStatus = ByteArray () +type FileID = Int +type DeviceID = Int + +fileMode :: FileStatus -> FileMode +fileMode stat = unsafePerformIO $ + _casm_ ``%r = ((struct stat *)%0)->st_mode;'' stat + +fileID :: FileStatus -> FileID +fileID stat = unsafePerformIO $ + _casm_ ``%r = ((struct stat *)%0)->st_ino;'' stat + +deviceID :: FileStatus -> DeviceID +deviceID stat = unsafePerformIO $ + _casm_ ``%r = ((struct stat *)%0)->st_dev;'' stat + +linkCount :: FileStatus -> LinkCount +linkCount stat = unsafePerformIO $ + _casm_ ``%r = ((struct stat *)%0)->st_nlink;'' stat + +fileOwner :: FileStatus -> UserID +fileOwner stat = unsafePerformIO $ + _casm_ ``%r = ((struct stat *)%0)->st_uid;'' stat + +fileGroup :: FileStatus -> GroupID +fileGroup stat = unsafePerformIO $ + _casm_ ``%r = ((struct stat *)%0)->st_gid;'' stat + +fileSize :: FileStatus -> FileOffset +fileSize stat = unsafePerformIO $ + _casm_ ``%r = ((struct stat *)%0)->st_size;'' stat + +accessTime :: FileStatus -> EpochTime +accessTime stat = unsafePerformIO $ + _casm_ ``%r = ((struct stat *)%0)->st_atime;'' stat + +modificationTime :: FileStatus -> EpochTime +modificationTime stat = unsafePerformIO $ + _casm_ ``%r = ((struct stat *)%0)->st_mtime;'' stat + +statusChangeTime :: FileStatus -> EpochTime +statusChangeTime stat = unsafePerformIO $ + _casm_ ``%r = ((struct stat *)%0)->st_ctime;'' stat + +isDirectory :: FileStatus -> Bool +isDirectory stat = unsafePerformIO $ + _casm_ ``%r = S_ISDIR(((struct stat *)%0)->st_mode);'' stat >>= \ rc -> + return (rc /= 0) + +isCharacterDevice :: FileStatus -> Bool +isCharacterDevice stat = unsafePerformIO $ + _casm_ ``%r = S_ISCHR(((struct stat *)%0)->st_mode);'' stat >>= \ rc -> + return (rc /= 0) + +isBlockDevice :: FileStatus -> Bool +isBlockDevice stat = unsafePerformIO $ + _casm_ ``%r = S_ISBLK(((struct stat *)%0)->st_mode);'' stat >>= \ rc -> + return (rc /= 0) + +isRegularFile :: FileStatus -> Bool +isRegularFile stat = unsafePerformIO $ + _casm_ ``%r = S_ISREG(((struct stat *)%0)->st_mode);'' stat >>= \ rc -> + return (rc /= 0) + +isNamedPipe :: FileStatus -> Bool +isNamedPipe stat = unsafePerformIO $ + _casm_ ``%r = S_ISFIFO(((struct stat *)%0)->st_mode);'' stat >>= \ rc -> + return (rc /= 0) + +getFileStatus :: FilePath -> IO FileStatus +getFileStatus name = do + path <- psToByteArrayIO name + bytes <- allocChars ``sizeof(struct stat)'' + rc <- _casm_ ``%r = stat(%0,(struct stat *)%1);'' path bytes + if rc == 0 + then do + stat <- freeze bytes + return stat + else syserr "getFileStatus" + +getFdStatus :: Fd -> IO FileStatus +getFdStatus fd = do + bytes <- allocChars ``sizeof(struct stat)'' + rc <- _casm_ ``%r = fstat(%0,(struct stat *)%1);'' fd bytes + if rc == 0 + then do + stat <- freeze bytes + return stat + else syserr "getFdStatus" + +fileAccess :: FilePath -> Bool -> Bool -> Bool -> IO Bool +fileAccess name read write exec = do + path <- psToByteArrayIO name + rc <- _ccall_ access path flags + return (rc == 0) + where + flags = I# (word2Int# (read# `or#` write# `or#` exec#)) + read# = case (if read then ``R_OK'' else ``0'') of { W# x -> x } + write# = case (if write then ``W_OK'' else ``0'') of { W# x -> x } + exec# = case (if exec then ``X_OK'' else ``0'') of { W# x -> x } + +fileExist :: FilePath -> IO Bool +fileExist name = do + path <- psToByteArrayIO name + rc <- _ccall_ access path (``F_OK''::Int) + return (rc == 0) + +setFileMode :: FilePath -> FileMode -> IO () +setFileMode name mode = do + path <- psToByteArrayIO name + rc <- _ccall_ chmod path mode + if rc == 0 + then return () + else syserr "setFileMode" + +setOwnerAndGroup :: FilePath -> UserID -> GroupID -> IO () +setOwnerAndGroup name uid gid = do + path <- psToByteArrayIO name + rc <- _ccall_ chown path uid gid + if rc == 0 + then return () + else syserr "setOwnerAndGroup" + +setFileTimes :: FilePath -> EpochTime -> EpochTime -> IO () +setFileTimes name atime mtime = do + path <- psToByteArrayIO name + rc <- _casm_ ``do {struct utimbuf ub; ub.actime = (time_t) %0; + ub.modtime = (time_t) %1; + %r = utime(%2, &ub);} while(0);'' atime mtime path + if rc == 0 + then return () + else syserr "setFileTimes" + +{- Set access and modification time to current time -} +touchFile :: FilePath -> IO () +touchFile name = do + path <- psToByteArrayIO name + rc <- _ccall_ utime path (``NULL''::Addr) + if rc == 0 + then return () + else syserr "touchFile" + +data PathVar = LinkLimit {- _PC_LINK_MAX -} + | InputLineLimit {- _PC_MAX_CANON -} + | InputQueueLimit {- _PC_MAX_INPUT -} + | FileNameLimit {- _PC_NAME_MAX -} + | PathNameLimit {- _PC_PATH_MAX -} + | PipeBufferLimit {- _PC_PIPE_BUF -} + | SetOwnerAndGroupIsRestricted {- _PC_CHOWN_RESTRICTED -} + | FileNamesAreNotTruncated {- _PC_NO_TRUNC -} + +getPathVar :: PathVar -> FilePath -> IO Limit +getPathVar v name = + (case v of + LinkLimit -> pathconf ``_PC_LINK_MAX'' + InputLineLimit -> pathconf ``_PC_MAX_CANON'' + InputQueueLimit -> pathconf ``_PC_MAX_INPUT'' + FileNameLimit -> pathconf ``_PC_NAME_MAX'' + PathNameLimit -> pathconf ``_PC_PATH_MAX'' + PipeBufferLimit -> pathconf ``_PC_PIPE_BUF'' + SetOwnerAndGroupIsRestricted -> pathconf ``_PC_CHOWN_RESTRICTED'' + FileNamesAreNotTruncated -> pathconf ``_PC_NO_TRUNC'') name + +pathconf :: Int -> FilePath -> IO Limit +pathconf n name = do + path <- psToByteArrayIO name + rc <- _ccall_ pathconf path n + if rc /= -1 + then return rc + else do + errno <- getErrorCode + if errno == invalidArgument + then fail (IOError Nothing NoSuchThing "PosixFiles.getPathVar: no such path limit or option") + else syserr "PosixFiles.getPathVar" + + +getFileVar :: PathVar -> Fd -> IO Limit +getFileVar v fd = + (case v of + LinkLimit -> fpathconf (``_PC_LINK_MAX''::Int) + InputLineLimit -> fpathconf (``_PC_MAX_CANON''::Int) + InputQueueLimit -> fpathconf ``_PC_MAX_INPUT'' + FileNameLimit -> fpathconf ``_PC_NAME_MAX'' + PathNameLimit -> fpathconf ``_PC_PATH_MAX'' + PipeBufferLimit -> fpathconf ``_PC_PIPE_BUF'' + SetOwnerAndGroupIsRestricted -> fpathconf ``_PC_CHOWN_RESTRICTED'' + FileNamesAreNotTruncated -> fpathconf ``_PC_NO_TRUNC'') fd + +fpathconf :: Int -> Fd -> IO Limit +fpathconf n fd = do + rc <- _ccall_ fpathconf fd n + if rc /= -1 + then return rc + else do + errno <- getErrorCode + if errno == invalidArgument + then fail (IOError Nothing NoSuchThing "getFileVar: no such path limit or option") + else syserr "getFileVar" + +\end{code} diff --git a/ghc/lib/posix/PosixIO.lhs b/ghc/lib/posix/PosixIO.lhs new file mode 100644 index 0000000..6c2ce72 --- /dev/null +++ b/ghc/lib/posix/PosixIO.lhs @@ -0,0 +1,311 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1995-1996 +% +\section[PosixIO]{Haskell 1.3 POSIX Input/Output Primitives} + +\begin{code} +module PosixIO ( + FdOption(..), + FileLock, + LockRequest(..), + + fdClose, + createPipe, + dup, + dupTo, + + fdRead, + fdWrite, + fdSeek, + + queryFdOption, + setFdOption, + + getLock, setLock, + waitToSetLock, + + -- Handle <-> Fd + handleToFd, fdToHandle, + ) where + +import GlaExts +import ST +import PrelIOBase +import PrelHandle (filePtr, readHandle, writeHandle, newHandle) +import IO +import PackedString ( unpackPS, unsafeByteArrayToPS, psToByteArrayST ) +import Addr +import Foreign + +import PosixUtil +import PosixFiles ( stdInput, stdOutput, stdError ) + + +createPipe :: IO (Fd, Fd) +createPipe = do + bytes <- allocChars ``(2*sizeof(int))'' + rc <- _casm_ ``%r = pipe((int *)%0);'' bytes + if rc /= -1 + then do + rd <- _casm_ ``%r = ((int *)%0)[0];'' bytes + wd <- _casm_ ``%r = ((int *)%0)[1];'' bytes + return (rd, wd) + else + syserr "createPipe" + +dup :: Fd -> IO Fd +dup fd = + _ccall_ dup fd >>= \ fd2@(I# fd2#) -> + if fd2 /= -1 then + return (FD# fd2#) + else + syserr "dup" + +dupTo :: Fd -> Fd -> IO () +dupTo fd1 fd2 = minusone_error (_ccall_ dup2 fd1 fd2) "dupTo" + +fdClose :: Fd -> IO () +fdClose fd = minusone_error (_ccall_ close fd) "fdClose" + +handleToFd :: Handle -> IO Fd +handleToFd h = do + h_ <- readHandle h + case h_ of + ErrorHandle ioError -> writeHandle h h_ >> fail ioError + ClosedHandle -> writeHandle h h_ >> + fail (IOError Nothing IllegalOperation + "handle is closed") + SemiClosedHandle _ _ -> writeHandle h h_ >> + fail (IOError Nothing IllegalOperation + "handle is semi-closed") + other -> + let file = filePtr h_ in + _casm_ `` %r=fileno((FILE *)%0); '' file >>= \ fd@(FD# fd#) -> + writeHandle h h_ >> + if fd# /=# (negateInt# 1#) then + return fd + else + syserr "handleToFd" + +-- default is no buffering. +fdToHandle :: Fd -> IO Handle +fdToHandle fd@(FD# fd#) = + _ccall_ fcntl fd (``F_GETFL''::Int) 0 >>= \ flags@(I# flags#) -> + if flags /= -1 then + let + wH = (int2Word# flags# `and#` (case ``O_WRONLY'' of { W# x -> x})) + `neWord#` int2Word# 0# + aH = (int2Word# flags# `and#` (case ``O_APPEND'' of { W# x -> x})) + `neWord#` int2Word# 0# + rwH = (int2Word# flags# `and#` (case ``O_RDWR'' of { W# x -> x })) + `neWord#` int2Word# 0# + (ft,handle_t) = + if wH then + if aH + then ("a",AppendHandle) + else ("w",WriteHandle) + else if rwH then + ("r+",ReadWriteHandle) + else + ("r",ReadHandle) + in + _ccall_ fdopen fd ft >>= \ file_struct@(A# ptr#) -> + if file_struct /= (``NULL''::Addr) then + {- + A distinction is made here between std{Input,Output,Error} Fds + and all others. The standard descriptors have a finaliser + that will not close the underlying fd, the others have one + that will. Or rather, the closing of the standard descriptors is + delayed until the process exits. + -} +#ifndef __PARALLEL_HASKELL__ + (if fd == stdInput || fd == stdOutput || fd == stdError then + makeForeignObj file_struct (``&freeStdFile''::Addr) + else + makeForeignObj file_struct (``&freeFile''::Addr)) >>= \ fp -> + newHandle (handle_t fp Nothing False) +#else + newHandle (handle_t file_struct Nothing False) +#endif + else + syserr "fdToHandle" + else + syserr "fdToHandle" + +fdRead :: Fd -> ByteCount -> IO (String, ByteCount) +fdRead fd 0 = return ("", 0) +fdRead fd nbytes = do + bytes <- allocChars nbytes + rc <- _ccall_ read fd bytes nbytes + case rc of + -1 -> syserr "fdRead" + 0 -> fail (IOError Nothing EOF "EOF") + n | n == nbytes -> do + buf <- freeze bytes + return (unpackPS (unsafeByteArrayToPS buf n), n) + | otherwise -> do + -- Let go of the excessively long ByteArray# by copying to a + -- shorter one. Maybe we need a new primitive, shrinkCharArray#? + bytes' <- allocChars n + _casm_ ``do {I_ i; for(i = 0; i < %2; i++) ((B_)%0)[i] = ((B_)%1)[i]; + } while(0);'' bytes' bytes n + buf <- freeze bytes' + return (unpackPS (unsafeByteArrayToPS buf n), n) + +fdWrite :: Fd -> String -> IO ByteCount +fdWrite fd str = do + buf <- stToIO (psToByteArrayST str) + rc <- _ccall_ write fd buf (length str) + if rc /= -1 + then return rc + else syserr "fdWrite" + +data FdOption = AppendOnWrite + | CloseOnExec + | NonBlockingRead + +queryFdOption :: Fd -> FdOption -> IO Bool +queryFdOption fd CloseOnExec = + _ccall_ fcntl fd (``F_GETFD''::Int) 0 >>= \ (I# flags#) -> + if flags# /=# -1# then + return ((int2Word# flags# `and#` fd_cloexec#) `neWord#` int2Word# 0#) + else + syserr "queryFdOption" + where + fd_cloexec# = case (``FD_CLOEXEC'') of { W# x -> x } +queryFdOption fd other = + _ccall_ fcntl fd (``F_GETFL''::Int) 0 >>= \ (I# flags#) -> + if flags# >=# 0# then + return ((int2Word# flags# `and#` opt#) `neWord#` int2Word# 0#) + else + syserr "queryFdOption" + where + opt# = case ( + case other of + AppendOnWrite -> ``O_APPEND'' + NonBlockingRead -> ``O_NONBLOCK'' ) of { W# x -> x } + +setFdOption :: Fd -> FdOption -> Bool -> IO () +setFdOption fd CloseOnExec val = do + flags <- _ccall_ fcntl fd (``F_GETFD''::Int) 0 + if flags /= -1 then do + rc <- (if val then + _casm_ ``%r = fcntl(%0, F_SETFD, %1 | FD_CLOEXEC);'' fd flags + else do + _casm_ ``%r = fcntl(%0, F_SETFD, %1 & ~FD_CLOEXEC);'' fd flags) + if rc /= -1 + then return () + else fail + else fail + where + fail = syserr "setFdOption" + +setFdOption fd other val = do + flags <- _ccall_ fcntl fd (``F_GETFL''::Int) 0 + if flags >= 0 then do + rc <- (if val then + _casm_ ``%r = fcntl(%0, F_SETFL, %1 | %2);'' fd flags opt + else do + _casm_ ``%r = fcntl(%0, F_SETFL, %1 & ~(%2));'' fd flags opt) + if rc /= -1 + then return () + else fail + else fail + where + fail = syserr "setFdOption" + opt = + case other of + AppendOnWrite -> (``O_APPEND''::Word) + NonBlockingRead -> (``O_NONBLOCK''::Word) + +data LockRequest = ReadLock + | WriteLock + | Unlock + +type FileLock = (LockRequest, SeekMode, FileOffset, FileOffset) + +getLock :: Fd -> FileLock -> IO (Maybe (ProcessID, FileLock)) +getLock fd lock = do + flock <- lock2Bytes lock + rc <- _ccall_ fcntl fd (``F_GETLK''::Int) flock + if rc /= -1 + then do + result <- bytes2ProcessIDAndLock flock + return (maybeResult result) + else syserr "getLock" + where + maybeResult (_, (Unlock, _, _, _)) = Nothing + maybeResult x = Just x + +setLock :: Fd -> FileLock -> IO () +setLock fd lock = do + flock <- lock2Bytes lock + minusone_error (_ccall_ fcntl fd (``F_SETLK''::Int) flock) "setLock" + +waitToSetLock :: Fd -> FileLock -> IO () +waitToSetLock fd lock = do + flock <- lock2Bytes lock + minusone_error (_ccall_ fcntl fd (``F_SETLKW''::Int) flock) "waitToSetLock" + +fdSeek :: Fd -> SeekMode -> FileOffset -> IO FileOffset +fdSeek fd mode offset = do + rc <- _ccall_ lseek fd offset (mode2Int mode) + if rc /= -1 + then return rc + else syserr "fdSeek" + +\end{code} + +Local utility functions + +\begin{code} + +-- Convert a Haskell SeekMode to an int + +mode2Int :: SeekMode -> Int +mode2Int AbsoluteSeek = ``SEEK_SET'' +mode2Int RelativeSeek = ``SEEK_CUR'' +mode2Int SeekFromEnd = ``SEEK_END'' + +-- Convert a Haskell FileLock to an flock structure +lockRequest2Int :: LockRequest -> Int +lockRequest2Int kind = + case kind of + ReadLock -> ``F_RDLCK'' + WriteLock -> ``F_WRLCK'' + Unlock -> ``F_UNLCK'' + +lock2Bytes :: FileLock -> IO (MutableByteArray RealWorld ()) +lock2Bytes (kind, mode, start, len) = do + bytes <- allocChars ``sizeof(struct flock)'' + _casm_ ``do { struct flock *fl = (struct flock *)%0; + fl->l_type = %1; + fl->l_whence = %2; + fl->l_start = %3; + fl->l_len = %4; + } while(0);'' + bytes (lockRequest2Int kind) (mode2Int mode) start len + return bytes +-- where + +bytes2ProcessIDAndLock :: MutableByteArray s () -> IO (ProcessID, FileLock) +bytes2ProcessIDAndLock bytes = do + ltype <- _casm_ ``%r = ((struct flock *)%0)->l_type;'' bytes + lwhence <- _casm_ ``%r = ((struct flock *)%0)->l_whence;'' bytes + lstart <- _casm_ ``%r = ((struct flock *)%0)->l_start;'' bytes + llen <- _casm_ ``%r = ((struct flock *)%0)->l_len;'' bytes + lpid <- _casm_ ``%r = ((struct flock *)%0)->l_pid;'' bytes + return (lpid, (kind ltype, mode lwhence, lstart, llen)) +-- where +kind :: Int -> LockRequest +kind x + | x == ``F_RDLCK'' = ReadLock + | x == ``F_WRLCK'' = WriteLock + | x == ``F_UNLCK'' = Unlock +mode :: Int -> SeekMode +mode x + | x == ``SEEK_SET'' = AbsoluteSeek + | x == ``SEEK_CUR'' = RelativeSeek + | x == ``SEEK_END'' = SeekFromEnd + +\end{code} diff --git a/ghc/lib/posix/PosixProcEnv.lhs b/ghc/lib/posix/PosixProcEnv.lhs new file mode 100644 index 0000000..37627b3 --- /dev/null +++ b/ghc/lib/posix/PosixProcEnv.lhs @@ -0,0 +1,278 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1995-1996 +% +\section[PosixProcEnv]{Haskell 1.3 POSIX Process Environment} + +\begin{code} +module PosixProcEnv ( + ProcessTimes, + SysVar(..), + SystemID, + childSystemTime, + childUserTime, + createProcessGroup, + createSession, + elapsedTime, + epochTime, +#if !defined(cygwin32_TARGET_OS) + getControllingTerminalName, +#endif + getEffectiveGroupID, + getEffectiveUserID, + getEffectiveUserName, +#if !defined(cygwin32_TARGET_OS) + getGroups, +#endif + getLoginName, + getParentProcessID, + getProcessGroupID, + getProcessID, + getProcessTimes, + getRealGroupID, + getRealUserID, + getSysVar, + getSystemID, + getTerminalName, + joinProcessGroup, + machine, + nodeName, + queryTerminal, + release, + setGroupID, + setProcessGroupID, + setUserID, + systemName, + systemTime, + userTime, + version + ) where + +import GlaExts +import PrelArr (ByteArray(..)) -- see internals +import PrelIOBase +import IO + +import PosixErr +import PosixUtil + +getProcessID :: IO ProcessID +getProcessID = _ccall_ getpid + +getParentProcessID :: IO ProcessID +getParentProcessID = _ccall_ getppid + +getRealUserID :: IO UserID +getRealUserID = _ccall_ getuid + +getEffectiveUserID :: IO UserID +getEffectiveUserID = _ccall_ geteuid + +setUserID :: UserID -> IO () +setUserID uid = nonzero_error (_ccall_ setuid uid) "setUserID" + +getLoginName :: IO String +getLoginName = do + str <- _ccall_ getlogin + if str == ``NULL'' + then syserr "getLoginName" + else strcpy str + +getRealGroupID :: IO GroupID +getRealGroupID = _ccall_ getgid + +getEffectiveGroupID :: IO GroupID +getEffectiveGroupID = _ccall_ getegid + +setGroupID :: GroupID -> IO () +setGroupID gid = nonzero_error (_ccall_ setgid gid) "setGroupID" + +-- getgroups() is not supported in beta18 of +-- cygwin32 +#if !defined(cygwin32_TARGET_OS) +getGroups :: IO [GroupID] +getGroups = do + ngroups <- _ccall_ getgroups 0 (``NULL''::Addr) + words <- allocWords ngroups + ngroups <- _casm_ ``%r = getgroups(%0,(gid_t *)%1);'' ngroups words + if ngroups /= -1 + then do + arr <- freeze words + return (map (extract arr) [0..(ngroups-1)]) + else + syserr "getGroups" + where + extract (ByteArray _ barr#) (I# n#) = + case indexIntArray# barr# n# of + r# -> (I# r#) +#endif + +getEffectiveUserName :: IO String +getEffectiveUserName = do + str <- _ccall_ cuserid (``NULL''::Addr) + if str == ``NULL'' + then syserr "getEffectiveUserName" + else strcpy str + +getProcessGroupID :: IO ProcessGroupID +getProcessGroupID = _ccall_ getpgrp + +createProcessGroup :: ProcessID -> IO ProcessGroupID +createProcessGroup pid = do + pgid <- _ccall_ setpgid pid 0 + if pgid == 0 + then return pgid + else syserr "createProcessGroup" + +joinProcessGroup :: ProcessGroupID -> IO () +joinProcessGroup pgid = + nonzero_error (_ccall_ setpgid 0 pgid) "joinProcessGroupID" + +setProcessGroupID :: ProcessID -> ProcessGroupID -> IO () +setProcessGroupID pid pgid = + nonzero_error (_ccall_ setpgid pid pgid) "setProcessGroupID" + +createSession :: IO ProcessGroupID +createSession = do + pgid <- _ccall_ setsid + if pgid /= -1 + then return pgid + else syserr "createSession" + +type SystemID = ByteArray () + +systemName :: SystemID -> String +systemName sid = unsafePerformIO $ do + str <-_casm_ ``%r = ((struct utsname *)%0)->sysname;'' sid + strcpy str + +nodeName :: SystemID -> String +nodeName sid = unsafePerformIO $ do + str <- _casm_ ``%r = ((struct utsname *)%0)->nodename;'' sid + strcpy str + +release :: SystemID -> String +release sid = unsafePerformIO $ do + str <- _casm_ ``%r = ((struct utsname *)%0)->release;'' sid + strcpy str + +version :: SystemID -> String +version sid = unsafePerformIO $ do + str <- _casm_ ``%r = ((struct utsname *)%0)->version;'' sid + strcpy str + +machine :: SystemID -> String +machine sid = unsafePerformIO $ do + str <- _casm_ ``%r = ((struct utsname *)%0)->machine;'' sid + strcpy str + +getSystemID :: IO SystemID +getSystemID = do + bytes <- allocChars (``sizeof(struct utsname)''::Int) + rc <- _casm_ ``%r = uname((struct utsname *)%0);'' bytes + if rc /= -1 + then freeze bytes + else syserr "getSystemID" + +epochTime :: IO EpochTime +epochTime = do + secs <- _ccall_ time (``NULL''::Addr) + if secs /= -1 + then return secs + else syserr "epochTime" + +-- All times in clock ticks (see getClockTick) + +type ProcessTimes = (ClockTick, ByteArray ()) + +elapsedTime :: ProcessTimes -> ClockTick +elapsedTime (realtime, _) = realtime + +userTime :: ProcessTimes -> ClockTick +userTime (_, times) = unsafePerformIO $ + _casm_ ``%r = ((struct tms *)%0)->tms_utime;'' times + +systemTime :: ProcessTimes -> ClockTick +systemTime (_, times) = unsafePerformIO $ + _casm_ ``%r = ((struct tms *)%0)->tms_stime;'' times + +childUserTime :: ProcessTimes -> ClockTick +childUserTime (_, times) = unsafePerformIO $ + _casm_ ``%r = ((struct tms *)%0)->tms_cutime;'' times + +childSystemTime :: ProcessTimes -> ClockTick +childSystemTime (_, times) = unsafePerformIO $ + _casm_ ``%r = ((struct tms *)%0)->tms_cstime;'' times + +getProcessTimes :: IO ProcessTimes +getProcessTimes = do + bytes <- allocChars (``sizeof(struct tms)''::Int) + elapsed <- _casm_ ``%r = times((struct tms *)%0);'' bytes + if elapsed /= -1 + then do + times <- freeze bytes + return (elapsed, times) + else + syserr "getProcessTimes" + +#if !defined(cygwin32_TARGET_OS) +getControllingTerminalName :: IO FilePath +getControllingTerminalName = do + str <- _ccall_ ctermid (``NULL''::Addr) + if str == ``NULL'' + then fail (IOError Nothing NoSuchThing "getControllingTerminalName: no controlling terminal") + else strcpy str +#endif + +getTerminalName :: Fd -> IO FilePath +getTerminalName fd = do + str <- _ccall_ ttyname fd + if str == ``NULL'' + then do + err <- try (queryTerminal fd) + either (\err -> syserr "getTerminalName") + (\succ -> if succ then fail (IOError Nothing NoSuchThing + "getTerminalName: no name") + else fail (IOError Nothing InappropriateType + "getTerminalName: not a terminal")) + err + else strcpy str + +queryTerminal :: Fd -> IO Bool +queryTerminal (FD# fd) = do + rc <- _ccall_ isatty fd + case rc of + -1 -> syserr "queryTerminal" + 0 -> return False + 1 -> return True + +data SysVar = ArgumentLimit + | ChildLimit + | ClockTick + | GroupLimit + | OpenFileLimit + | PosixVersion + | HasSavedIDs + | HasJobControl + +getSysVar :: SysVar -> IO Limit +getSysVar v = + case v of + ArgumentLimit -> sysconf ``_SC_ARG_MAX'' + ChildLimit -> sysconf ``_SC_CHILD_MAX'' + ClockTick -> sysconf ``_SC_CLK_TCK'' + GroupLimit -> sysconf ``_SC_NGROUPS_MAX'' + OpenFileLimit -> sysconf ``_SC_OPEN_MAX'' + PosixVersion -> sysconf ``_SC_VERSION'' + HasSavedIDs -> sysconf ``_SC_SAVED_IDS'' + HasJobControl -> sysconf ``_SC_JOB_CONTROL'' +-- where + +sysconf :: Int -> IO Limit +sysconf n = do + rc <- _ccall_ sysconf n + if rc /= -1 + then return rc + else fail (IOError Nothing NoSuchThing + "getSysVar: no such system limit or option") + +\end{code} diff --git a/ghc/lib/posix/PosixProcPrim.lhs b/ghc/lib/posix/PosixProcPrim.lhs new file mode 100644 index 0000000..d8c1a91 --- /dev/null +++ b/ghc/lib/posix/PosixProcPrim.lhs @@ -0,0 +1,502 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1995-1997 +% +\section[PosixProcPrim]{Haskell 1.3 POSIX Process Primitives} + +\begin{code} + +#include "config.h" + +module PosixProcPrim ( + Handler(..), + SignalSet, + Signal, + ProcessStatus(..), + + addSignal, + awaitSignal, + backgroundRead, + backgroundWrite, + blockSignals, +#ifndef cygwin32_TARGET_OS + continueProcess, +#endif + deleteSignal, + emptySignalSet, + executeFile, + exitImmediately, + floatingPointException, + forkProcess, + fullSignalSet, + getAnyProcessStatus, + getEnvVar, + getEnvironment, + getGroupProcessStatus, + getPendingSignals, + getProcessStatus, + getSignalMask, + illegalInstruction, + inSignalSet, + installHandler, + internalAbort, + keyboardSignal, + keyboardStop, + keyboardTermination, + killProcess, + lostConnection, + nullSignal, + openEndedPipe, + processStatusChanged, + queryStoppedChildFlag, + raiseSignal, + realTimeAlarm, + removeEnvVar, + scheduleAlarm, + segmentationViolation, + setEnvVar, + setEnvironment, + setSignalMask, + setStoppedChildFlag, + sigABRT, + sigALRM, + sigCHLD, +#ifndef cygwin32_TARGET_OS + sigCONT, +#endif + sigFPE, + sigHUP, + sigILL, + sigINT, + sigKILL, + sigPIPE, + sigProcMask, + sigQUIT, + sigSEGV, + sigSTOP, + sigSetSize, + sigTERM, + sigTSTP, + sigTTIN, + sigTTOU, + sigUSR1, + sigUSR2, + signalProcess, + signalProcessGroup, + sleep, + softwareStop, + softwareTermination, + unBlockSignals, + userDefinedSignal1, + userDefinedSignal2, + + ExitCode + + ) where + +import GlaExts +import IO +import PrelIOBase +import PackedString (psToByteArrayST) +import Foreign -- stable pointers +import PosixErr +import PosixUtil + +import System(ExitCode(..)) +import PosixProcEnv (getProcessID) + +forkProcess :: IO (Maybe ProcessID) +forkProcess = do + pid <-_ccall_ fork + case pid of + -1 -> syserr "forkProcess" + 0 -> return Nothing + _ -> return (Just pid) + +executeFile :: FilePath -- Command + -> Bool -- Search PATH? + -> [String] -- Arguments + -> Maybe [(String, String)] -- Environment + -> IO () +executeFile path search args Nothing = do + prog <- psToByteArrayIO path + argv <- vectorize (basename path:args) + (if search then + _casm_ ``execvp(%0,(char **)%1);'' prog argv + else + _casm_ ``execv(%0,(char **)%1);'' prog argv + ) + syserr "executeFile" + +executeFile path search args (Just env) = do + prog <- psToByteArrayIO path + argv <- vectorize (basename path:args) + envp <- vectorize (map (\ (name, val) -> name ++ ('=' : val)) env) + (if search then + _casm_ `` execvpe(%0,(char **)%1,(char **)%2);'' prog argv envp + else + _casm_ `` execve(%0,(char **)%1,(char **)%2);'' prog argv envp + ) + syserr "executeFile" + +data ProcessStatus = Exited ExitCode + | Terminated Signal + | Stopped Signal + deriving (Eq, Ord, Show) + +getProcessStatus :: Bool -> Bool -> ProcessID -> IO (Maybe ProcessStatus) +getProcessStatus block stopped pid = do + wstat <- allocWords 1 + pid <-_casm_ ``%r = waitpid(%0, (int *)%1, %2);'' pid wstat + (waitOptions block stopped) + case pid of + -1 -> syserr "getProcessStatus" + 0 -> return Nothing + _ -> do ps <- decipherWaitStatus wstat + return (Just ps) + +getGroupProcessStatus :: Bool + -> Bool + -> ProcessGroupID + -> IO (Maybe (ProcessID, ProcessStatus)) +getGroupProcessStatus block stopped pgid = do + wstat <- allocWords 1 + pid <-_casm_ ``%r = waitpid(%0, (int *)%1, %2);'' (-pgid) wstat + (waitOptions block stopped) + case pid of + -1 -> syserr "getGroupProcessStatus" + 0 -> return Nothing + _ -> do ps <- decipherWaitStatus wstat + return (Just (pid, ps)) + +getAnyProcessStatus :: Bool -> Bool -> IO (Maybe (ProcessID, ProcessStatus)) +getAnyProcessStatus block stopped = + getGroupProcessStatus block stopped 1 `catch` + \ err -> syserr "getAnyProcessStatus" + +exitImmediately :: ExitCode -> IO () +exitImmediately exitcode = do + _casm_ ``_exit(%0);'' (exitcode2Int exitcode) + syserr "exitImmediately" + where + exitcode2Int ExitSuccess = 0 + exitcode2Int (ExitFailure n) = n + +getEnvironment :: IO [(String, String)] +getEnvironment = do + env <- unvectorize ``environ'' 0 + return (map (split "") env) + where + split :: String -> String -> (String, String) + split x [] = error ("PosixProcPrim.getEnvironment:no `='? in: "++reverse x) + split x ('=' : xs) = (reverse x, xs) + split x (c:cs) = split (c:x) cs + +setEnvironment :: [(String, String)] -> IO () +setEnvironment pairs = do + env <- vectorize (map (\ (var,val) -> var ++ ('=' : val)) pairs) + nonzero_error (_casm_ ``%r = setenviron((char **)%0);'' env) + "setEnvironment" + +getEnvVar :: String -> IO String +getEnvVar name = do + str <- psToByteArrayIO name + str <- _ccall_ getenv str + if str == ``NULL'' + then fail (IOError Nothing NoSuchThing + "getEnvVar: no such environment variable") + else strcpy str + +setEnvVar :: String -> String -> IO () +setEnvVar name value = do + str <- psToByteArrayIO (name ++ ('=' : value)) + nonzero_error (_casm_ ``%r = _setenv(%0);'' str) "setEnvVar" + +removeEnvVar :: String -> IO () +removeEnvVar name = do + str <- psToByteArrayIO name + nonzero_error (_ccall_ delenv str) "removeEnvVar" + +type Signal = Int + +nullSignal :: Signal +nullSignal = 0 + +backgroundRead, sigTTIN :: Signal +backgroundRead = ``SIGTTIN'' +sigTTIN = ``SIGTTIN'' + +backgroundWrite, sigTTOU :: Signal +backgroundWrite = ``SIGTTOU'' +sigTTOU = ``SIGTTOU'' + +#ifndef cygwin32_TARGET_OS +continueProcess, sigCONT :: Signal +continueProcess = ``SIGCONT'' +sigCONT = ``SIGCONT'' +#endif + +floatingPointException, sigFPE :: Signal +floatingPointException = ``SIGFPE'' +sigFPE = ``SIGFPE'' + +illegalInstruction, sigILL :: Signal +illegalInstruction = ``SIGILL'' +sigILL = ``SIGILL'' + +internalAbort, sigABRT ::Signal +internalAbort = ``SIGABRT'' +sigABRT = ``SIGABRT'' + +keyboardSignal, sigINT :: Signal +keyboardSignal = ``SIGINT'' +sigINT = ``SIGINT'' + +keyboardStop, sigTSTP :: Signal +keyboardStop = ``SIGTSTP'' +sigTSTP = ``SIGTSTP'' + +keyboardTermination, sigQUIT :: Signal +keyboardTermination = ``SIGQUIT'' +sigQUIT = ``SIGQUIT'' + +killProcess, sigKILL :: Signal +killProcess = ``SIGKILL'' +sigKILL = ``SIGKILL'' + +lostConnection, sigHUP :: Signal +lostConnection = ``SIGHUP'' +sigHUP = ``SIGHUP'' + +openEndedPipe, sigPIPE :: Signal +openEndedPipe = ``SIGPIPE'' +sigPIPE = ``SIGPIPE'' + +processStatusChanged, sigCHLD :: Signal +processStatusChanged = ``SIGCHLD'' +sigCHLD = ``SIGCHLD'' + +realTimeAlarm, sigALRM :: Signal +realTimeAlarm = ``SIGALRM'' +sigALRM = ``SIGALRM'' + +segmentationViolation, sigSEGV :: Signal +segmentationViolation = ``SIGSEGV'' +sigSEGV = ``SIGSEGV'' + +softwareStop, sigSTOP :: Signal +softwareStop = ``SIGSTOP'' +sigSTOP = ``SIGSTOP'' + +softwareTermination, sigTERM :: Signal +softwareTermination = ``SIGTERM'' +sigTERM = ``SIGTERM'' + +userDefinedSignal1, sigUSR1 :: Signal +userDefinedSignal1 = ``SIGUSR1'' +sigUSR1 = ``SIGUSR1'' + +userDefinedSignal2, sigUSR2 :: Signal +userDefinedSignal2 = ``SIGUSR2'' +sigUSR2 = ``SIGUSR2'' + +signalProcess :: Signal -> ProcessID -> IO () +signalProcess int pid = + nonzero_error (_ccall_ kill pid int) "signalProcess" + +raiseSignal :: Signal -> IO () +raiseSignal int = getProcessID >>= signalProcess int + +signalProcessGroup :: Signal -> ProcessGroupID -> IO () +signalProcessGroup int pgid = signalProcess int (-pgid) + +setStoppedChildFlag :: Bool -> IO Bool +setStoppedChildFlag b = do + rc <- _casm_ ``%r = nocldstop; nocldstop = %0;'' x + return (rc == 0) + where + x = case b of {True -> 0; False -> 1} + +queryStoppedChildFlag :: IO Bool +queryStoppedChildFlag = do + rc <- _casm_ ``%r = nocldstop;'' + return (rc == 0) + +data Handler = Default + | Ignore + | Catch (IO ()) + +type SignalSet = ByteArray () + +sigSetSize :: Int +sigSetSize = ``sizeof(sigset_t)'' + +emptySignalSet :: SignalSet +emptySignalSet = unsafePerformPrimIO $ do + bytes <- allocChars sigSetSize + _casm_ ``(void) sigemptyset((sigset_t *)%0);'' bytes + freeze bytes + +fullSignalSet :: SignalSet +fullSignalSet = unsafePerformPrimIO $ do + bytes <- allocChars sigSetSize + _casm_ ``(void) sigfillset((sigset_t *)%0);'' bytes + freeze bytes + +addSignal :: Signal -> SignalSet -> SignalSet +addSignal int oldset = unsafePerformPrimIO $ do + bytes <- allocChars sigSetSize + _casm_ ``*(sigset_t *)%0 = *(sigset_t *)%1; + (void) sigaddset((sigset_t *)%0, %2);'' + bytes oldset int + freeze bytes + +inSignalSet :: Signal -> SignalSet -> Bool +inSignalSet int sigset = unsafePerformPrimIO $ do + rc <- _casm_ ``%r = sigismember((sigset_t *)%0, %1);'' sigset int + return (rc == 1) + +deleteSignal :: Signal -> SignalSet -> SignalSet +deleteSignal int oldset = unsafePerformPrimIO $ do + bytes <- allocChars sigSetSize + _casm_ ``*(sigset_t *)%0 = *(sigset_t *)%1; + (void) sigdelset((sigset_t *)%0, %2);'' + bytes oldset int + freeze bytes + +installHandler :: Signal + -> Handler + -> Maybe SignalSet -- other signals to block + -> IO Handler -- old handler + +#ifdef __PARALLEL_HASKELL__ +installHandler = error "installHandler: not available for Parallel Haskell" +#else +installHandler int handler maybe_mask = ( + case handler of + Default -> _ccall_ stg_sig_default int mask + Ignore -> _ccall_ stg_sig_ignore int mask + Catch m -> do + sptr <- makeStablePtr (ioToPrimIO m) + _ccall_ stg_sig_catch int sptr mask + ) >>= \rc -> + + if rc >= 0 then do + osptr <- _casm_ ``%r = (StgStablePtr) (%0);'' rc + m <- deRefStablePtr osptr + return (Catch m) + else if rc == ``STG_SIG_DFL'' then + return Default + else if rc == ``STG_SIG_IGN'' then + return Ignore + else + syserr "installHandler" + where + mask = case maybe_mask of + Nothing -> emptySignalSet + Just x -> x + +#endif {-!__PARALLEL_HASKELL__-} + +getSignalMask :: IO SignalSet +getSignalMask = do + bytes <- allocChars sigSetSize + rc <- _casm_ ``%r = sigprocmask(0, NULL, (sigset_t *)%0);'' bytes + if rc == 0 + then freeze bytes + else syserr "getSignalMask" + +sigProcMask :: String -> Int -> SignalSet -> IO SignalSet +sigProcMask name how sigset = do + bytes <- allocChars sigSetSize + rc <- _casm_ ``%r = sigprocmask(%0, (sigset_t *)%1, (sigset_t *)%2);'' + how sigset bytes + if rc == 0 + then freeze bytes + else syserr name + +setSignalMask :: SignalSet -> IO SignalSet +setSignalMask = sigProcMask "setSignalMask" ``SIG_SETMASK'' + +blockSignals :: SignalSet -> IO SignalSet +blockSignals = sigProcMask "blockSignals" ``SIG_BLOCK'' + +unBlockSignals :: SignalSet -> IO SignalSet +unBlockSignals = sigProcMask "unBlockSignals" ``SIG_UNBLOCK'' + +getPendingSignals :: IO SignalSet +getPendingSignals = do + bytes <- allocChars sigSetSize + rc <- _casm_ ``%r = sigpending((sigset_t *)%0);'' bytes + if rc == 0 + then freeze bytes + else syserr "getPendingSignals" + +awaitSignal :: Maybe SignalSet -> IO () +awaitSignal maybe_sigset = do + pause maybe_sigset + err <- getErrorCode + if err == interruptedOperation + then return () + else syserr "awaitSignal" +-- where + +pause :: Maybe SignalSet -> IO () +pause maybe_sigset = + case maybe_sigset of + Nothing -> _casm_ ``(void) pause();'' + Just sigset -> _casm_ ``(void) sigsuspend((sigset_t *)%0);'' sigset + +scheduleAlarm :: Int -> IO Int +scheduleAlarm (I# secs#) = + _ccall_ alarm (W# (int2Word# secs#)) >>= \ (W# w#) -> + return (I# (word2Int# w#)) + +sleep :: Int -> IO () +sleep 0 = return () +sleep (I# secs#) = do + _ccall_ sleep (W# (int2Word# secs#)) + return () +\end{code} + +Local utility functions + +\begin{code} + +-- Get the trailing component of a path + +basename :: String -> String +basename "" = "" +basename (c:cs) + | c == '/' = basename cs + | otherwise = c : basename cs + +-- Convert wait options to appropriate set of flags + +waitOptions :: Bool -> Bool -> Int +-- block stopped +waitOptions False False = ``WNOHANG'' +waitOptions False True = ``(WNOHANG|WUNTRACED)'' +waitOptions True False = 0 +waitOptions True True = ``WUNTRACED'' + +-- Turn a (ptr to a) wait status into a ProcessStatus + +decipherWaitStatus :: MutableByteArray s x -> IO ProcessStatus +decipherWaitStatus wstat = do + exited <- _casm_ ``%r = WIFEXITED(*(int *)%0);'' wstat + if exited /= 0 + then do + exitstatus <- _casm_ ``%r = WEXITSTATUS(*(int *)%0);'' wstat + if exitstatus == 0 + then return (Exited ExitSuccess) + else return (Exited (ExitFailure exitstatus)) + else do + signalled <- _casm_ ``%r = WIFSIGNALED(*(int *)%0);'' wstat + if signalled /= 0 + then do + termsig <- _casm_ ``%r = WTERMSIG(*(int *)%0);'' wstat + return (Terminated termsig) + else do + stopsig <-_casm_ ``%r = WSTOPSIG(*(int *)%0);'' wstat + return (Stopped stopsig) +\end{code} diff --git a/ghc/lib/posix/PosixTTY.lhs b/ghc/lib/posix/PosixTTY.lhs new file mode 100644 index 0000000..36bee8d --- /dev/null +++ b/ghc/lib/posix/PosixTTY.lhs @@ -0,0 +1,523 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1995-1996 +% +\section[PosixTTY]{Haskell 1.3 POSIX Device-Specific Functions} + +\begin{code} +module PosixTTY ( + BaudRate(..), + ControlCharacter(..), + FlowAction(..), + QueueSelector(..), + TerminalAttributes, + TerminalMode(..), + TerminalState(..), + bitsPerByte, + controlChar, + controlFlow, + discardData, + drainOutput, + getTerminalAttributes, + getTerminalProcessGroupID, + inputSpeed, + inputTime, + minInput, + outputSpeed, + sendBreak, + setTerminalAttributes, + setTerminalProcessGroupID, + terminalMode, + withBits, + withCC, + withInputSpeed, + withMinInput, + withMode, + withOutputSpeed, + withTime, + withoutCC, + withoutMode + ) where + +import GlaExts +import IOExts ( unsafePerformIO ) + +import IO +import Foreign + +import PosixUtil +import PosixErr + +type TerminalAttributes = ByteArray () + +data TerminalMode = InterruptOnBreak + | MapCRtoLF + | IgnoreBreak + | IgnoreCR + | IgnoreParityErrors + | MapLFtoCR + | CheckParity + | StripHighBit + | StartStopInput + | StartStopOutput + | MarkParityErrors + | ProcessOutput + | LocalMode + | ReadEnable + | TwoStopBits + | HangupOnClose + | EnableParity + | OddParity + | EnableEcho + | EchoErase + | EchoKill + | EchoLF + | ProcessInput + | ExtendedFunctions + | KeyboardInterrupts + | NoFlushOnInterrupt + | BackgroundWriteInterrupt + +withoutMode :: TerminalAttributes -> TerminalMode -> TerminalAttributes +withoutMode termios InterruptOnBreak = clearInputFlag ``BRKINT'' termios +withoutMode termios MapCRtoLF = clearInputFlag ``ICRNL'' termios +withoutMode termios IgnoreBreak = clearInputFlag ``IGNBRK'' termios +withoutMode termios IgnoreCR = clearInputFlag ``IGNCR'' termios +withoutMode termios IgnoreParityErrors = clearInputFlag ``IGNPAR'' termios +withoutMode termios MapLFtoCR = clearInputFlag ``INLCR'' termios +withoutMode termios CheckParity = clearInputFlag ``INPCK'' termios +withoutMode termios StripHighBit = clearInputFlag ``ISTRIP'' termios +withoutMode termios StartStopInput = clearInputFlag ``IXOFF'' termios +withoutMode termios StartStopOutput = clearInputFlag ``IXON'' termios +withoutMode termios MarkParityErrors = clearInputFlag ``PARMRK'' termios +withoutMode termios ProcessOutput = unsafePerformIO $ + allocChars ``sizeof(struct termios)'' >>= \ bytes -> + _casm_ ``*(struct termios *)%0 = *(struct termios *)%1; + ((struct termios *)%0)->c_oflag &= ~OPOST;'' bytes termios + >>= \ () -> + freeze bytes +withoutMode termios LocalMode = clearControlFlag ``CLOCAL'' termios +withoutMode termios ReadEnable = clearControlFlag ``CREAD'' termios +withoutMode termios TwoStopBits = clearControlFlag ``CSTOPB'' termios +withoutMode termios HangupOnClose = clearControlFlag ``HUPCL'' termios +withoutMode termios EnableParity = clearControlFlag ``PARENB'' termios +withoutMode termios OddParity = clearControlFlag ``PARODD'' termios +withoutMode termios EnableEcho = clearLocalFlag ``ECHO'' termios +withoutMode termios EchoErase = clearLocalFlag ``ECHOE'' termios +withoutMode termios EchoKill = clearLocalFlag ``ECHOK'' termios +withoutMode termios EchoLF = clearLocalFlag ``ECHONL'' termios +withoutMode termios ProcessInput = clearLocalFlag ``ICANON'' termios +withoutMode termios ExtendedFunctions = clearLocalFlag ``IEXTEN'' termios +withoutMode termios KeyboardInterrupts = clearLocalFlag ``ISIG'' termios +withoutMode termios NoFlushOnInterrupt = setLocalFlag ``NOFLSH'' termios +withoutMode termios BackgroundWriteInterrupt = clearLocalFlag ``TOSTOP'' termios + +withMode :: TerminalAttributes -> TerminalMode -> TerminalAttributes +withMode termios InterruptOnBreak = setInputFlag ``BRKINT'' termios +withMode termios MapCRtoLF = setInputFlag ``ICRNL'' termios +withMode termios IgnoreBreak = setInputFlag ``IGNBRK'' termios +withMode termios IgnoreCR = setInputFlag ``IGNCR'' termios +withMode termios IgnoreParityErrors = setInputFlag ``IGNPAR'' termios +withMode termios MapLFtoCR = setInputFlag ``INLCR'' termios +withMode termios CheckParity = setInputFlag ``INPCK'' termios +withMode termios StripHighBit = setInputFlag ``ISTRIP'' termios +withMode termios StartStopInput = setInputFlag ``IXOFF'' termios +withMode termios StartStopOutput = setInputFlag ``IXON'' termios +withMode termios MarkParityErrors = setInputFlag ``PARMRK'' termios +withMode termios ProcessOutput = unsafePerformIO $ do + bytes <- allocChars ``sizeof(struct termios)'' + _casm_ ``*(struct termios *)%0 = *(struct termios *)%1; + ((struct termios *)%0)->c_oflag |= OPOST;'' bytes termios + freeze bytes +withMode termios LocalMode = setControlFlag ``CLOCAL'' termios +withMode termios ReadEnable = setControlFlag ``CREAD'' termios +withMode termios TwoStopBits = setControlFlag ``CSTOPB'' termios +withMode termios HangupOnClose = setControlFlag ``HUPCL'' termios +withMode termios EnableParity = setControlFlag ``PARENB'' termios +withMode termios OddParity = setControlFlag ``PARODD'' termios +withMode termios EnableEcho = setLocalFlag ``ECHO'' termios +withMode termios EchoErase = setLocalFlag ``ECHOE'' termios +withMode termios EchoKill = setLocalFlag ``ECHOK'' termios +withMode termios EchoLF = setLocalFlag ``ECHONL'' termios +withMode termios ProcessInput = setLocalFlag ``ICANON'' termios +withMode termios ExtendedFunctions = setLocalFlag ``IEXTEN'' termios +withMode termios KeyboardInterrupts = setLocalFlag ``ISIG'' termios +withMode termios NoFlushOnInterrupt = clearLocalFlag ``NOFLSH'' termios +withMode termios BackgroundWriteInterrupt = setLocalFlag ``TOSTOP'' termios + +terminalMode :: TerminalMode -> TerminalAttributes -> Bool +terminalMode InterruptOnBreak = testInputFlag ``BRKINT'' +terminalMode MapCRtoLF = testInputFlag ``ICRNL'' +terminalMode IgnoreBreak = testInputFlag ``IGNBRK'' +terminalMode IgnoreCR = testInputFlag ``IGNCR'' +terminalMode IgnoreParityErrors = testInputFlag ``IGNPAR'' +terminalMode MapLFtoCR = testInputFlag ``INLCR'' +terminalMode CheckParity = testInputFlag ``INPCK'' +terminalMode StripHighBit = testInputFlag ``ISTRIP'' +terminalMode StartStopInput = testInputFlag ``IXOFF'' +terminalMode StartStopOutput = testInputFlag ``IXON'' +terminalMode MarkParityErrors = testInputFlag ``PARMRK'' +terminalMode ProcessOutput = \ termios -> unsafePerformIO $ + _casm_ ``%r = ((struct termios *)%0)->c_oflag & OPOST;'' termios + >>= \ (W# flags#) -> + return (flags# `neWord#` int2Word# 0#) +terminalMode LocalMode = testControlFlag ``CLOCAL'' +terminalMode ReadEnable = testControlFlag ``CREAD'' +terminalMode TwoStopBits = testControlFlag ``CSTOPB'' +terminalMode HangupOnClose = testControlFlag ``HUPCL'' +terminalMode EnableParity = testControlFlag ``PARENB'' +terminalMode OddParity = testControlFlag ``PARODD'' +terminalMode EnableEcho = testLocalFlag ``ECHO'' +terminalMode EchoErase = testLocalFlag ``ECHOE'' +terminalMode EchoKill = testLocalFlag ``ECHOK'' +terminalMode EchoLF = testLocalFlag ``ECHONL'' +terminalMode ProcessInput = testLocalFlag ``ICANON'' +terminalMode ExtendedFunctions = testLocalFlag ``IEXTEN'' +terminalMode KeyboardInterrupts = testLocalFlag ``ISIG'' +terminalMode NoFlushOnInterrupt = not . testLocalFlag ``NOFLSH'' +terminalMode BackgroundWriteInterrupt = testLocalFlag ``TOSTOP'' + +bitsPerByte :: TerminalAttributes -> Int +bitsPerByte termios = unsafePerformIO $ do + w <- _casm_ ``%r = ((struct termios *)%0)->c_cflag & CSIZE;'' termios + return (word2Bits w) + where + word2Bits :: Word -> Int + word2Bits x = + if x == ``CS5'' then 5 + else if x == ``CS6'' then 6 + else if x == ``CS7'' then 7 + else if x == ``CS8'' then 8 + else 0 + +withBits :: TerminalAttributes -> Int -> TerminalAttributes +withBits termios bits = unsafePerformIO $ do + bytes <- allocChars ``sizeof(struct termios)'' + _casm_ ``*(struct termios *)%0 = *(struct termios *)%1; + ((struct termios *)%0)->c_cflag = + (((struct termios *)%1)->c_cflag & ~CSIZE) | %2;'' + bytes termios (mask bits) + freeze bytes + where + mask :: Int -> Word + mask 5 = ``CS5'' + mask 6 = ``CS6'' + mask 7 = ``CS7'' + mask 8 = ``CS8'' + mask _ = error "withBits bit value out of range [5..8]" + +data ControlCharacter = EndOfFile + | EndOfLine + | Erase + | Interrupt + | Kill + | Quit + | Suspend + | Start + | Stop + +controlChar :: TerminalAttributes -> ControlCharacter -> Maybe Char +controlChar termios cc = unsafePerformIO $ do + val <- _casm_ ``%r = ((struct termios *)%0)->c_cc[%1];'' + termios (cc2Word cc) + if val == ``_POSIX_VDISABLE'' + then return Nothing + else return (Just (toEnum val)) + +withCC :: TerminalAttributes + -> (ControlCharacter, Char) + -> TerminalAttributes +withCC termios (cc, c) = unsafePerformIO $ do + bytes <- allocChars ``sizeof(struct termios)'' + _casm_ ``*(struct termios *)%0 = *(struct termios *)%1; + ((struct termios *)%0)->c_cc[%2] = %3;'' + bytes termios (cc2Word cc) c + freeze bytes + +withoutCC :: TerminalAttributes + -> ControlCharacter + -> TerminalAttributes +withoutCC termios cc = unsafePerformIO $ do + bytes <- allocChars ``sizeof(struct termios)'' + _casm_ ``*(struct termios *)%0 = *(struct termios *)%1; + ((struct termios *)%0)->c_cc[%2] = _POSIX_VDISABLE;'' + bytes termios (cc2Word cc) + freeze bytes + +inputTime :: TerminalAttributes -> Int +inputTime termios = unsafePerformIO $ do + _casm_ ``%r = ((struct termios *)%0)->c_cc[VTIME];'' termios + +withTime :: TerminalAttributes -> Int -> TerminalAttributes +withTime termios time = unsafePerformIO $ do + bytes <- allocChars ``sizeof(struct termios)'' + _casm_ ``*(struct termios *)%0 = *(struct termios *)%1; + ((struct termios *)%0)->c_cc[VTIME] = %2;'' bytes termios time + freeze bytes + +minInput :: TerminalAttributes -> Int +minInput termios = unsafePerformIO $ do + _casm_ ``%r = ((struct termios *)%0)->c_cc[VMIN];'' termios + +withMinInput :: TerminalAttributes -> Int -> TerminalAttributes +withMinInput termios count = unsafePerformIO $ do + bytes <- allocChars ``sizeof(struct termios)'' + _casm_ ``*(struct termios *)%0 = *(struct termios *)%1; + ((struct termios *)%0)->c_cc[VMIN] = %2;'' bytes termios count + freeze bytes + +data BaudRate = B0 + | B50 + | B75 + | B110 + | B134 + | B150 + | B200 + | B300 + | B600 + | B1200 + | B1800 + | B2400 + | B4800 + | B9600 + | B19200 + | B38400 + +inputSpeed :: TerminalAttributes -> BaudRate +inputSpeed termios = unsafePerformIO $ do + w <-_casm_ ``%r = cfgetispeed((struct termios *)%0);'' termios + return (word2Baud w) + +withInputSpeed :: TerminalAttributes -> BaudRate -> TerminalAttributes +withInputSpeed termios br = unsafePerformIO $ do + bytes <- allocChars ``sizeof(struct termios)'' + _casm_ ``*(struct termios *)%0 = *(struct termios *)%1; + cfsetispeed((struct termios *)%0, %2);'' bytes termios (baud2Word br) + freeze bytes + +outputSpeed :: TerminalAttributes -> BaudRate +outputSpeed termios = unsafePerformIO $ do + w <- _casm_ ``%r = cfgetospeed((struct termios *)%0);'' termios + return (word2Baud w) + +withOutputSpeed :: TerminalAttributes -> BaudRate -> TerminalAttributes +withOutputSpeed termios br = unsafePerformIO $ do + bytes <- allocChars ``sizeof(struct termios)'' + _casm_ ``*(struct termios *)%0 = *(struct termios *)%1; + cfsetospeed((struct termios *)%0, %2);'' bytes termios (baud2Word br) + freeze bytes + +getTerminalAttributes :: Fd -> IO TerminalAttributes +getTerminalAttributes (FD# fd) = do + bytes <- allocChars ``sizeof(struct termios)'' + rc <- _casm_ ``%r = tcgetattr(%0,(struct termios *)%1);'' fd bytes + if rc /= -1 + then freeze bytes + else syserr "getTerminalAttributes" + +data TerminalState = Immediately + | WhenDrained + | WhenFlushed + +setTerminalAttributes :: Fd + -> TerminalAttributes + -> TerminalState + -> IO () +setTerminalAttributes (FD# fd) termios state = do + rc <- _casm_ ``%r = tcsetattr(%0,%1,(struct termios *)%2);'' + fd (state2Int state) termios + if rc /= -1 + then return () + else syserr "setTerminalAttributes" + where + state2Int :: TerminalState -> Int + state2Int Immediately = ``TCSANOW'' + state2Int WhenDrained = ``TCSADRAIN'' + state2Int WhenFlushed = ``TCSAFLUSH'' + +sendBreak :: Fd -> Int -> IO () +sendBreak (FD# fd) duration = + nonzero_error (_ccall_ tcsendbreak fd duration) "sendBreak" + +drainOutput :: Fd -> IO () +drainOutput (FD# fd) = + nonzero_error (_ccall_ tcdrain fd) "drainOutput" + +data QueueSelector = InputQueue + | OutputQueue + | BothQueues + +discardData :: Fd -> QueueSelector -> IO () +discardData (FD# fd) queue = + minusone_error (_ccall_ tcflush fd (queue2Int queue)) "discardData" + where + queue2Int :: QueueSelector -> Int + queue2Int InputQueue = ``TCIFLUSH'' + queue2Int OutputQueue = ``TCOFLUSH'' + queue2Int BothQueues = ``TCIOFLUSH'' + +data FlowAction = SuspendOutput + | RestartOutput + | TransmitStop + | TransmitStart + +controlFlow :: Fd -> FlowAction -> IO () +controlFlow (FD# fd) action = + minusone_error (_ccall_ tcflow fd (action2Int action)) "controlFlow" + where + action2Int :: FlowAction -> Int + action2Int SuspendOutput = ``TCOOFF'' + action2Int RestartOutput = ``TCOON'' + action2Int TransmitStop = ``TCIOFF'' + action2Int TransmitStart = ``TCION'' + +getTerminalProcessGroupID :: Fd -> IO ProcessGroupID +getTerminalProcessGroupID (FD# fd) = do + pgid <- _ccall_ tcgetpgrp fd + if pgid /= -1 + then return pgid + else syserr "getTerminalProcessGroupID" + +setTerminalProcessGroupID :: Fd -> ProcessGroupID -> IO () +setTerminalProcessGroupID (FD# fd) pgid = + nonzero_error (_ccall_ tcsetpgrp fd pgid) "setTerminalProcessGroupID" + +\end{code} + +Local utility functions + +\begin{code} + +-- Convert Haskell ControlCharacter to Int + +cc2Word :: ControlCharacter -> Word +cc2Word EndOfFile = ``VEOF'' +cc2Word EndOfLine = ``VEOL'' +cc2Word Erase = ``VERASE'' +cc2Word Interrupt = ``VINTR'' +cc2Word Kill = ``VKILL'' +cc2Word Quit = ``VQUIT'' +cc2Word Suspend = ``VSUSP'' +cc2Word Start = ``VSTART'' +cc2Word Stop = ``VSTOP'' + +-- Convert Haskell BaudRate to unsigned integral type (Word) + +baud2Word :: BaudRate -> Word +baud2Word B0 = ``B0'' +baud2Word B50 = ``B50'' +baud2Word B75 = ``B75'' +baud2Word B110 = ``B110'' +baud2Word B134 = ``B134'' +baud2Word B150 = ``B150'' +baud2Word B200 = ``B200'' +baud2Word B300 = ``B300'' +baud2Word B600 = ``B600'' +baud2Word B1200 = ``B1200'' +baud2Word B1800 = ``B1800'' +baud2Word B2400 = ``B2400'' +baud2Word B4800 = ``B4800'' +baud2Word B9600 = ``B9600'' +baud2Word B19200 = ``B19200'' +baud2Word B38400 = ``B38400'' + +-- And convert a word back to a baud rate +-- We really need some cpp macros here. + +word2Baud :: Word -> BaudRate +word2Baud x = + if x == ``B0'' then B0 + else if x == ``B50'' then B50 + else if x == ``B75'' then B75 + else if x == ``B110'' then B110 + else if x == ``B134'' then B134 + else if x == ``B150'' then B150 + else if x == ``B200'' then B200 + else if x == ``B300'' then B300 + else if x == ``B600'' then B600 + else if x == ``B1200'' then B1200 + else if x == ``B1800'' then B1800 + else if x == ``B2400'' then B2400 + else if x == ``B4800'' then B4800 + else if x == ``B9600'' then B9600 + else if x == ``B19200'' then B19200 + else if x == ``B38400'' then B38400 + else error "unknown baud rate" + +-- Clear termios i_flag + +clearInputFlag :: Word -> TerminalAttributes -> TerminalAttributes +clearInputFlag flag termios = unsafePerformIO $ do + bytes <- allocChars ``sizeof(struct termios)'' + _casm_ ``*(struct termios *)%0 = *(struct termios *)%1; + ((struct termios *)%0)->c_iflag &= ~%2;'' bytes termios flag + freeze bytes + +-- Set termios i_flag + +setInputFlag :: Word -> TerminalAttributes -> TerminalAttributes +setInputFlag flag termios = unsafePerformIO $ do + bytes <- allocChars ``sizeof(struct termios)'' + _casm_ ``*(struct termios *)%0 = *(struct termios *)%1; + ((struct termios *)%0)->c_iflag |= %2;'' bytes termios flag + freeze bytes + +-- Examine termios i_flag + +testInputFlag :: Word -> TerminalAttributes -> Bool +testInputFlag flag termios = unsafePerformIO $ + _casm_ ``%r = ((struct termios *)%0)->c_iflag & %1;'' termios flag + >>= \ (W# flags#) -> + return (flags# `neWord#` int2Word# 0#) + +-- Clear termios c_flag + +clearControlFlag :: Word -> TerminalAttributes -> TerminalAttributes +clearControlFlag flag termios = unsafePerformIO $ do + bytes <- allocChars ``sizeof(struct termios)'' + _casm_ ``*(struct termios *)%0 = *(struct termios *)%1; + ((struct termios *)%0)->c_cflag &= ~%2;'' bytes termios flag + freeze bytes + +-- Set termios c_flag + +setControlFlag :: Word -> TerminalAttributes -> TerminalAttributes +setControlFlag flag termios = unsafePerformIO $ do + bytes <- allocChars ``sizeof(struct termios)'' + _casm_ ``*(struct termios *)%0 = *(struct termios *)%1; + ((struct termios *)%0)->c_cflag |= %2;'' bytes termios flag + freeze bytes + +-- Examine termios c_flag + +testControlFlag :: Word -> TerminalAttributes -> Bool +testControlFlag flag termios = unsafePerformIO $ + _casm_ ``%r = ((struct termios *)%0)->c_cflag & %1;'' termios flag + >>= \ (W# flags#) -> + return (flags# `neWord#` int2Word# 0#) + +-- Clear termios l_flag + +clearLocalFlag :: Word -> TerminalAttributes -> TerminalAttributes +clearLocalFlag flag termios = unsafePerformIO $ do + bytes <- allocChars ``sizeof(struct termios)'' + _casm_ ``*(struct termios *)%0 = *(struct termios *)%1; + ((struct termios *)%0)->c_lflag &= ~%2;'' bytes termios flag + freeze bytes + +-- Set termios l_flag + +setLocalFlag :: Word -> TerminalAttributes -> TerminalAttributes +setLocalFlag flag termios = unsafePerformIO $ do + bytes <- allocChars ``sizeof(struct termios)'' + _casm_ ``*(struct termios *)%0 = *(struct termios *)%1; + ((struct termios *)%0)->c_lflag |= %2;'' bytes termios flag + freeze bytes + +-- Examine termios l_flag + +testLocalFlag :: Word -> TerminalAttributes -> Bool +testLocalFlag flag termios = unsafePerformIO $ + _casm_ ``%r = ((struct termios *)%0)->c_iflag & %1;'' termios flag + >>= \ (W# flags#) -> + return (flags# `neWord#` int2Word# 0#) +\end{code} diff --git a/ghc/lib/posix/PosixUtil.lhs b/ghc/lib/posix/PosixUtil.lhs new file mode 100644 index 0000000..047b9bc --- /dev/null +++ b/ghc/lib/posix/PosixUtil.lhs @@ -0,0 +1,155 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1995-1996 +% +\section[PosixUtil]{Haskell 1.3 POSIX utilities} + +\begin{code} +module PosixUtil where + +import ST +import PrelST -- ST representation +import PrelIOBase -- IOError representation +import Foreign +import CCall +import PrelBase +import MutableArray +import ByteArray +import Array +import PackedString ( packCBytesST, psToByteArrayST, unpackPS ) +import Ix +import PrelArr (StateAndMutableByteArray#(..), StateAndByteArray#(..)) +\end{code} + +First, all of the major Posix data types, to avoid any recursive dependencies + +\begin{code} +type ByteCount = Int +type ClockTick = Int +type EpochTime = Int +type FileOffset = Int +type GroupID = Int +type Limit = Int +type LinkCount = Int +type ProcessID = Int +type ProcessGroupID = ProcessID +type UserID = Int +data Fd = FD# Int# +instance CCallable Fd +instance CReturnable Fd + +instance Eq Fd where + (FD# x#) == (FD# y#) = x# ==# y# + +-- use with care. +intToFd :: Int -> Fd +intToFd (I# fd#) = FD# fd# +\end{code} + +Now some local functions that shouldn't go outside this library. + +Fail with a SystemError. Normally, we do not try to re-interpret +POSIX error numbers, so most routines in this file will only fail +with SystemError. The only exceptions are (1) those routines where +failure of some kind may be considered ``normal''...e.g. getpwnam() +for a non-existent user, or (2) those routines which do not set +errno. + +\begin{code} +syserr :: String -> IO a +syserr str = fail (IOError Nothing -- ToDo: better + SystemError + str) + +-- Allocate a mutable array of characters with no indices. + +allocChars :: Int -> IO (MutableByteArray RealWorld ()) +allocChars (I# size#) = IO $ \ s# -> + case newCharArray# size# s# of + StateAndMutableByteArray# s2# barr# -> + IOok s2# (MutableByteArray bot barr#) + where + bot = error "PosixUtil.allocChars" + +-- Allocate a mutable array of words with no indices + +allocWords :: Int -> IO (MutableByteArray RealWorld ()) +allocWords (I# size#) = IO $ \ s# -> + case newIntArray# size# s# of + StateAndMutableByteArray# s2# barr# -> + IOok s2# (MutableByteArray bot barr#) + where + bot = error "PosixUtil.allocWords" + +-- Freeze these index-free mutable arrays + +freeze :: MutableByteArray RealWorld () -> IO (ByteArray ()) +freeze (MutableByteArray ixs arr#) = IO $ \ s# -> + case unsafeFreezeByteArray# arr# s# of + StateAndByteArray# s2# frozen# -> + IOok s2# (ByteArray ixs frozen#) + +-- Copy a null-terminated string from outside the heap to +-- Haskellized nonsense inside the heap + +strcpy :: Addr -> IO String +strcpy str + | str == ``NULL'' = return "" + | otherwise = + _ccall_ strlen str >>= \ len -> + stToIO (packCBytesST len str) >>= \ ps -> + return (unpackPS ps) + +-- Turn a string list into a NULL-terminated vector of null-terminated +-- strings No indices...I hate indices. Death to Ix. + +vectorize :: [String] -> IO (ByteArray ()) +vectorize xs = do + arr <- allocWords (len + 1) + fill arr 0 xs + freeze arr + where + len :: Int + len = length xs + + fill :: MutableByteArray RealWorld () -> Int -> [String] -> IO () + fill arr n [] = + _casm_ ``((PP_)%0)[%1] = NULL;'' arr n + fill arr n (x:xs) = + stToIO (psToByteArrayST x) >>= \ barr -> + _casm_ ``((PP_)%0)[%1] = (P_)%2;'' arr n barr + >>= \ () -> + fill arr (n+1) xs + +-- Turn a NULL-terminated vector of null-terminated strings into a string list + +unvectorize :: Addr -> Int -> IO [String] +unvectorize ptr n + | str == ``NULL'' = return [] + | otherwise = + strcpy str >>= \ x -> + unvectorize ptr (n+1) >>= \ xs -> + return (x : xs) + where + str = indexAddrOffAddr ptr n + +-- common templates for system calls + +nonzero_error :: IO Int -> String -> IO () +nonzero_error io err = do + rc <- io + if rc == 0 + then return () + else syserr err + +minusone_error :: IO Int -> String -> IO () +minusone_error io err = do + rc <- io + if rc /= -1 + then return () + else syserr err + +-- IO versions of a few ST functions. + +psToByteArrayIO = stToIO . psToByteArrayST + +\end{code} diff --git a/ghc/lib/posix/cbits/Makefile b/ghc/lib/posix/cbits/Makefile new file mode 100644 index 0000000..ed4cefc --- /dev/null +++ b/ghc/lib/posix/cbits/Makefile @@ -0,0 +1,17 @@ +# +# Makefile for cbits subdirectory +# +TOP=../../.. +include $(TOP)/mk/boilerplate.mk +override WAYS= + +# Hack! +SRC_CC_OPTS += -I$(GHC_INCLUDE_DIR) + +CC=$(HC) +C_SRCS=$(wildcard *.c) +LIBRARY=libHSposix_cbits.a +LIBOBJS=$(C_OBJS) +INSTALL_LIBS += $(LIBRARY) + +include $(TOP)/mk/target.mk diff --git a/ghc/lib/posix/cbits/env.c b/ghc/lib/posix/cbits/env.c new file mode 100644 index 0000000..936039c --- /dev/null +++ b/ghc/lib/posix/cbits/env.c @@ -0,0 +1,165 @@ +/* +% +% (c) The GRASP/AQUA Project, Glasgow University, 1995-1996 +% +\subsection[env.lc]{Environment Handling for LibPosix} + +Many useful environment functions are not necessarily provided by libc. +To get around this problem, we introduce our own. The first time that +you modify your environment, we copy the environment wholesale into +malloc'ed locations, so that subsequent modifications can do proper +memory management. The $environ$ variable is updated with a pointer +to the current environment so that the normal $getenv$ and $exec*$ functions +should continue to work properly. + +\begin{code} +*/ + +#include "rtsdefs.h" +#include "libposix.h" + +/* Switch this on once we've moved the environment to the malloc arena */ +int dirtyEnv = 0; + +/* + * For some reason, OSF turns off the prototype for this if we're + * _POSIX_SOURCE. Seems to me that this ought to be an ANSI-ism + * rather than a POSIX-ism, but no matter. (JSM(?)) + */ + +char * +strDup(const char *src) +{ + int len = strlen(src) + 1; + char *dst; + + if ((dst = malloc(len)) != NULL) + memcpy(dst, src, len); + return dst; +} + +/* Replace the entire environment */ +int +setenviron(char **envp) +{ + char **old = environ; + int dirtyOld = dirtyEnv; + int i; + + /* A quick hack to move the strings out of the heap */ + environ = envp; + if (copyenv() != 0) { + environ = old; + return -1; + } + /* Release the old space if we allocated it ourselves earlier */ + if (dirtyOld) { + for (i = 0; old[i] != NULL; i++) + free(old[i]); + free(old); + } + return 0; +} + +/* Copy initial environment into malloc arena */ +int +copyenv(void) +{ + char **new; + int i; + + for (i = 0; environ[i] != NULL; i++); + + if ((new = (char **) malloc((i + 1) * sizeof(char *))) == NULL) + return -1; + + new[i] = NULL; + + while (--i >= 0) { + if ((new[i] = strDup(environ[i])) == NULL) { + while (new[++i] != NULL) + free(new[i]); + free(new); + return -1; + } + } + environ = new; + dirtyEnv = 1; + return 0; +} + +/* Set or replace an environment variable + * simonm 14/2/96 - this is different to the standard C library + * implementation and the prototypes clash, so I'm calling it _setenv. + */ +int +_setenv(char *mapping) +{ + int i, keylen; + char *p; + char **new; + + /* We must have a non-empty key and an '=' */ + if (mapping[0] == '=' || (p = strchr(mapping, '=')) == NULL) { + errno = EINVAL; + return -1; + } + /* Include through the '=' for matching */ + keylen = p - mapping + 1; + + if (!dirtyEnv && copyenv() != 0) + return -1; + + if ((p = strDup(mapping)) == NULL) + return -1; + + /* Look for an existing key that matches */ + for (i = 0; environ[i] != NULL && strncmp(environ[i], p, keylen) != 0; i++); + + if (environ[i] != NULL) { + free(environ[i]); + environ[i] = p; + } else { + if ((new = (char **) realloc(environ, (i + 1) * sizeof(char *))) == NULL) { + free(p); + return -1; + } + new[i] = p; + new[i + 1] = NULL; + environ = new; + } + return 0; +} + +/* Delete a variable from the environment */ +int +delenv(char *name) +{ + int i, keylen; + + if (strchr(name, '=') != NULL) { + errno = EINVAL; + return -1; + } + keylen = strlen(name); + + if (!dirtyEnv && copyenv() != 0) + return -1; + + /* Look for a matching key */ + for (i = 0; environ[i] != NULL && + (strncmp(environ[i], name, keylen) != 0 || environ[i][keylen] != '='); i++); + + /* Don't complain if it wasn't there to begin with */ + if (environ[i] == NULL) { + return 0; + } + free(environ[i]); + + do { + environ[i] = environ[i + 1]; + i++; + } while (environ[i] != NULL); + + return 0; +} diff --git a/ghc/lib/posix/cbits/execvpe.c b/ghc/lib/posix/cbits/execvpe.c new file mode 100644 index 0000000..ab50ccd --- /dev/null +++ b/ghc/lib/posix/cbits/execvpe.c @@ -0,0 +1,153 @@ +/* +% +% (c) The GRASP/AQUA Project, Glasgow University, 1995-1996 +% +\subsection[posix.lc]{executeFile Runtime Support} + +\begin{code} +*/ +#if !defined(_AIX) +#define NON_POSIX_SOURCE +#endif + +#include "rtsdefs.h" +#include "libposix.h" + +/* + * We want the search semantics of execvp, but we want to provide our + * own environment, like execve. The following copyright applies to + * this code, as it is a derivative of execvp: + *- + * Copyright (c) 1991 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +int +execvpe(char *name, char **argv, char **envp) +{ + register int lp, ln; + register char *p; + int eacces, etxtbsy; + char *bp, *cur, *path, *buf; + + /* If it's an absolute or relative path name, it's easy. */ + if (strchr(name, '/')) { + bp = (char *) name; + cur = path = buf = NULL; + goto retry; + } + + /* Get the path we're searching. */ + if (!(path = getenv("PATH"))) { +#ifdef HAVE_CONFSTR + ln = confstr(_CS_PATH, NULL, 0); + if ((cur = path = malloc(ln + 1)) != NULL) { + path[0] = ':'; + (void) confstr (_CS_PATH, path + 1, ln); + } +#else + if ((cur = path = malloc(1 + 1)) != NULL) { + path[0] = ':'; + path[1] = '\0'; + } +#endif + } else + cur = path = strDup(path); + + if (path == NULL || (bp = buf = malloc(strlen(path)+strlen(name)+2)) == NULL) + goto done; + + eacces = etxtbsy = 0; + while (cur != NULL) { + p = cur; + if ((cur = strchr(cur, ':')) != NULL) + *cur++ = '\0'; + + /* + * It's a SHELL path -- double, leading and trailing colons mean the current + * directory. + */ + if (!*p) { + p = "."; + lp = 1; + } else + lp = strlen(p); + ln = strlen(name); + + memcpy(buf, p, lp); + buf[lp] = '/'; + memcpy(buf + lp + 1, name, ln); + buf[lp + ln + 1] = '\0'; + + retry: + (void) execve(bp, argv, envp); + switch (errno) { + case EACCES: + eacces = 1; + break; + case ENOENT: + break; + case ENOEXEC: + { + register size_t cnt; + register char **ap; + + for (cnt = 0, ap = (char **) argv; *ap; ++ap, ++cnt) + ; + if ((ap = malloc((cnt + 2) * sizeof(char *))) != NULL) { + memcpy(ap + 2, argv + 1, cnt * sizeof(char *)); + + ap[0] = "sh"; + ap[1] = bp; + (void) execve("/bin/sh", ap, envp); + free(ap); + } + goto done; + } + case ETXTBSY: + if (etxtbsy < 3) + (void) sleep(++etxtbsy); + goto retry; + default: + goto done; + } + } + if (eacces) + errno = EACCES; + else if (!errno) + errno = ENOENT; + done: + if (path) + free(path); + if (buf) + free(buf); + return (-1); +} diff --git a/ghc/lib/posix/cbits/libposix.h b/ghc/lib/posix/cbits/libposix.h new file mode 100644 index 0000000..624da76 --- /dev/null +++ b/ghc/lib/posix/cbits/libposix.h @@ -0,0 +1,79 @@ +#ifndef LIBPOSIX_H +#ifdef HAVE_SYS_WAIT_H +#include +#endif /* HAVE_SYS_WAIT_H */ + +#ifdef HAVE_SIGNAL_H +#include +#endif /* HAVE_SIGNAL_H */ + +#ifdef HAVE_SYS_UTSNAME_H +#include +#endif /* HAVE_SYS_UTSNAME_H */ + +#ifdef HAVE_SYS_TIMES_H +#include +#endif /* HAVE_SYS_TIMES_H */ + +#ifdef HAVE_DIRENT_H +#include +#endif /* HAVE_DIRENT_H */ + +#ifdef HAVE_SYS_STAT_H +#include +#endif /* HAVE_SYS_STAT_H */ + +#ifdef HAVE_FCNTL_H +#include +#endif /* HAVE_FCNTL_H */ + +#ifdef HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H */ + +#ifdef HAVE_UTIME_H +#include +#endif /* HAVE_UTIME_H */ + +#ifdef HAVE_TERMIOS_H +#include +#endif /* HAVE_TERMIOS_H */ + +#ifdef HAVE_GRP_H +#include +#endif /* HAVE_GRP_H */ + +#ifdef HAVE_PWD_H +#include +#endif /* HAVE_PWD_H */ + +#if TIME_WITH_SYS_TIME +# include +# include +#else +# if HAVE_SYS_TIME_H +# include +# else +# include +# endif +#endif + +#ifndef _POSIX_VDISABLE +#define _POSIX_VDISABLE '\0' /* Just a guess...but it works for Suns */ +#endif + +/* For PosixIO only (finaliser for (FILE *) contained in Handles) */ +extern void freeStdFile PROTO((StgForeignObj)); +extern void freeFile PROTO((StgForeignObj)); + +extern I_ nocldstop; + +char *strDup PROTO((const char *)); +int setenviron PROTO((char **)); +int copyenv (STG_NO_ARGS); +int _setenv PROTO((char *)); +int delenv PROTO((char *)); +int execvpe PROTO((char *, char **, char **)); + +#define LIBPOSIX_H +#endif diff --git a/ghc/lib/required/Array.lhs b/ghc/lib/std/Array.lhs similarity index 98% rename from ghc/lib/required/Array.lhs rename to ghc/lib/std/Array.lhs index b3d0f4d..390c481 100644 --- a/ghc/lib/required/Array.lhs +++ b/ghc/lib/std/Array.lhs @@ -18,7 +18,7 @@ module Array ( import Ix import PrelList import PrelRead -import ArrBase -- Most of the hard work is done here +import PrelArr -- Most of the hard work is done here import PrelBase infixl 9 !, // diff --git a/ghc/lib/required/CPUTime.lhs b/ghc/lib/std/CPUTime.lhs similarity index 84% rename from ghc/lib/required/CPUTime.lhs rename to ghc/lib/std/CPUTime.lhs index 97ff7df..e0532cc 100644 --- a/ghc/lib/required/CPUTime.lhs +++ b/ghc/lib/std/CPUTime.lhs @@ -4,19 +4,23 @@ \section[CPUTime]{Haskell 1.4 CPU Time Library} \begin{code} +{-# OPTIONS -fno-implicit-prelude #-} + module CPUTime ( getCPUTime, -- :: IO Integer cpuTimePrecision -- :: Integer ) where -import PrelBase ( Int(..), indexIntArray# ) -import ArrBase ( ByteArray(..), newIntArray, unsafeFreezeByteArray ) -import Addr -import IOBase +import PrelBase +import PrelArr ( ByteArray(..), newIntArray, unsafeFreezeByteArray ) +import PrelMaybe +import PrelNum +import PrelAddr +import PrelIOBase import IO -import Unsafe ( unsafePerformIO ) -import STBase +import PrelUnsafe ( unsafePerformIO ) +import PrelST import Ratio \end{code} diff --git a/ghc/lib/required/Char.lhs b/ghc/lib/std/Char.lhs similarity index 97% rename from ghc/lib/required/Char.lhs rename to ghc/lib/std/Char.lhs index 9dcca7e..ce77c98 100644 --- a/ghc/lib/required/Char.lhs +++ b/ghc/lib/std/Char.lhs @@ -27,7 +27,7 @@ module Char import PrelBase import PrelRead (readLitChar) -import GHCerr ( error ) +import PrelErr ( error ) \end{code} diff --git a/ghc/lib/required/Complex.lhs b/ghc/lib/std/Complex.lhs similarity index 100% rename from ghc/lib/required/Complex.lhs rename to ghc/lib/std/Complex.lhs diff --git a/ghc/lib/required/Directory.lhs b/ghc/lib/std/Directory.lhs similarity index 98% rename from ghc/lib/required/Directory.lhs rename to ghc/lib/std/Directory.lhs index 0389cbb..14be0e4 100644 --- a/ghc/lib/required/Directory.lhs +++ b/ghc/lib/std/Directory.lhs @@ -40,13 +40,13 @@ module Directory ) where import PrelBase -import Foreign ( Word(..) ) -import Addr -import IOBase -import STBase -import Unsafe ( unsafePerformIO ) -import ArrBase -import PackBase ( unpackNBytesST ) +import PrelIOBase +import PrelST +import PrelUnsafe ( unsafePerformIO ) +import PrelArr +import PrelPack ( unpackNBytesST ) +import PrelForeign ( Word(..) ) +import PrelAddr import Time ( ClockTime(..) ) \end{code} diff --git a/ghc/lib/required/IO.lhs b/ghc/lib/std/IO.lhs similarity index 98% rename from ghc/lib/required/IO.lhs rename to ghc/lib/std/IO.lhs index 97f8f23..fe58518 100644 --- a/ghc/lib/required/IO.lhs +++ b/ghc/lib/std/IO.lhs @@ -32,24 +32,24 @@ module IO ( try, bracket, bracket_ ) where -import Ix -import STBase -import Unsafe ( unsafePerformIO, unsafeInterleaveIO ) -import IOBase -import ArrBase ( MutableByteArray(..), newCharArray ) -import IOHandle -- much of the real stuff is in here -import PackBase ( unpackNBytesST ) +import PrelST +import PrelUnsafe ( unsafePerformIO, unsafeInterleaveIO ) +import PrelIOBase +import PrelArr ( MutableByteArray(..), newCharArray ) +import PrelHandle -- much of the real stuff is in here +import PrelPack ( unpackNBytesST ) import PrelBase import PrelRead ( readParen, Read(..), reads, lex ) import PrelMaybe import PrelEither -import GHC -import Addr +import PrelAddr +import PrelGHC #ifndef __PARALLEL_HASKELL__ -import Foreign ( ForeignObj, makeForeignObj, writeForeignObj ) +import PrelForeign ( ForeignObj, makeForeignObj, writeForeignObj ) #endif +import Ix import Char ( ord, chr ) \end{code} @@ -135,7 +135,8 @@ instance Eq Handle where (AppendHandle v1 _ _ , AppendHandle v2 _ _) -> v1 == v2 (ReadWriteHandle v1 _ _ , ReadWriteHandle v2 _ _) -> v1 == v2 _ -> False)) -#endif {- __CONCURRENT_HASKELL__ -} + +#endif instance Show Handle where {showsPrec p h = showString "<>"} diff --git a/ghc/lib/required/Ix.lhs b/ghc/lib/std/Ix.lhs similarity index 99% rename from ghc/lib/required/Ix.lhs rename to ghc/lib/std/Ix.lhs index 50bc163..af16fda 100644 --- a/ghc/lib/required/Ix.lhs +++ b/ghc/lib/std/Ix.lhs @@ -13,7 +13,7 @@ module Ix rangeSize ) where -import {-# SOURCE #-} GHCerr ( error ) +import {-# SOURCE #-} PrelErr ( error ) import PrelTup import PrelBase \end{code} diff --git a/ghc/lib/required/List.lhs b/ghc/lib/std/List.lhs similarity index 89% rename from ghc/lib/required/List.lhs rename to ghc/lib/std/List.lhs index 08952a6..1e133a6 100644 --- a/ghc/lib/required/List.lhs +++ b/ghc/lib/std/List.lhs @@ -34,9 +34,9 @@ module List ( ) where import Prelude -import Maybe (listToMaybe) +import Maybe ( listToMaybe ) import PrelBase ( Int(..) ) -import GHC ( (+#) ) +import PrelGHC ( (+#) ) infix 5 \\ \end{code} @@ -62,15 +62,16 @@ findIndex p = listToMaybe . findIndices p findIndices :: (a -> Bool) -> [a] -> [Int] --- One line definition --- findIndices p xs = [ i | (x,i) <- zip xs [0..], p x] - +#ifdef USE_REPORT_PRELUDE +findIndices p xs = [ i | (x,i) <- zip xs [0..], p x] +#else -- Efficient definition findIndices p xs = loop 0# p xs where loop n p [] = [] loop n p (x:xs) | p x = I# n : loop (n +# 1#) p xs | otherwise = loop (n +# 1#) p xs +#endif isPrefixOf :: (Eq a) => [a] -> [a] -> Bool isPrefixOf [] _ = True @@ -196,12 +197,6 @@ mapAccumR f s (x:xs) = (s'', y:ys) \end{code} \begin{code} -sort :: (Ord a) => [a] -> [a] -sort = sortBy compare - -sortBy :: (a -> a -> Ordering) -> [a] -> [a] -sortBy cmp = foldr (insertBy cmp) [] - insertBy :: (a -> a -> Ordering) -> a -> [a] -> [a] insertBy cmp x [] = [x] insertBy cmp x ys@(y:ys') @@ -339,3 +334,50 @@ tails [] = [[]] tails xxs@(_:xs) = xxs : tails xs \end{code} + +%----------------------------------------------------------------------------- +Quick Sort algorithm taken from HBC's QSort library. + +\begin{code} +sort :: (Ord a) => [a] -> [a] +sortBy :: (a -> a -> Ordering) -> [a] -> [a] + +#ifdef USE_REPORT_PRELUDE +sort = sortBy compare +sortBy cmp = foldr (insertBy cmp) [] +#else + +sortBy cmp l = qsort cmp l [] +sort l = qsort compare l [] + +-- rest is not exported: + +-- qsort is stable and does not concatenate. +qsort cmp [] r = r +qsort cmp [x] r = x:r +qsort cmp (x:xs) r = qpart cmp x xs [] [] r + +-- qpart partitions and sorts the sublists +qpart cmp x [] rlt rge r = + -- rlt and rge are in reverse order and must be sorted with an + -- anti-stable sorting + rqsort cmp rlt (x:rqsort cmp rge r) +qpart cmp x (y:ys) rlt rge r = + case cmp x y of + GT -> qpart cmp x ys (y:rlt) rge r + _ -> qpart cmp x ys rlt (y:rge) r + +-- rqsort is as qsort but anti-stable, i.e. reverses equal elements +rqsort cmp [] r = r +rqsort cmp [x] r = x:r +rqsort cmp (x:xs) r = rqpart cmp x xs [] [] r + +rqpart cmp x [] rle rgt r = + qsort cmp rle (x:qsort cmp rgt r) +rqpart cmp x (y:ys) rle rgt r = + case cmp y x of + GT -> rqpart cmp x ys rle (y:rgt) r + _ -> rqpart cmp x ys (y:rle) rgt r + +#endif /* USE_REPORT_PRELUDE */ +\end{code} diff --git a/ghc/lib/required/Locale.lhs b/ghc/lib/std/Locale.lhs similarity index 100% rename from ghc/lib/required/Locale.lhs rename to ghc/lib/std/Locale.lhs diff --git a/ghc/lib/ghc/Main.hi-boot b/ghc/lib/std/Main.hi-boot similarity index 91% rename from ghc/lib/ghc/Main.hi-boot rename to ghc/lib/std/Main.hi-boot index 5eba82e..d4bd8ff 100644 --- a/ghc/lib/ghc/Main.hi-boot +++ b/ghc/lib/std/Main.hi-boot @@ -10,4 +10,4 @@ _interface_ Main 1 _exports_ Main main ; _declarations_ -1 main _:_ IOBase.IO PrelBase.();; +1 main _:_ PrelIOBase.IO PrelBase.();; diff --git a/ghc/lib/required/Maybe.lhs b/ghc/lib/std/Maybe.lhs similarity index 98% rename from ghc/lib/required/Maybe.lhs rename to ghc/lib/std/Maybe.lhs index acecd04..3c86e91 100644 --- a/ghc/lib/required/Maybe.lhs +++ b/ghc/lib/std/Maybe.lhs @@ -20,7 +20,7 @@ module Maybe unfoldr ) where -import GHCerr ( error ) +import PrelErr ( error ) import Monad ( filter ) import PrelList import PrelMaybe diff --git a/ghc/lib/required/Monad.lhs b/ghc/lib/std/Monad.lhs similarity index 100% rename from ghc/lib/required/Monad.lhs rename to ghc/lib/std/Monad.lhs diff --git a/ghc/lib/required/Numeric.lhs b/ghc/lib/std/Numeric.lhs similarity index 99% rename from ghc/lib/required/Numeric.lhs rename to ghc/lib/std/Numeric.lhs index 4226863..067c672 100644 --- a/ghc/lib/required/Numeric.lhs +++ b/ghc/lib/std/Numeric.lhs @@ -32,7 +32,7 @@ module Numeric import PrelBase import PrelMaybe -import ArrBase +import PrelArr import PrelNum import PrelRead @@ -80,7 +80,6 @@ showInt n r in if n' == 0 then r' else showInt n' r' }} - \end{code} Controlling the format and precision of floats. The code that diff --git a/ghc/lib/glaExts/Addr.lhs b/ghc/lib/std/PrelAddr.lhs similarity index 96% rename from ghc/lib/glaExts/Addr.lhs rename to ghc/lib/std/PrelAddr.lhs index c592d50..6543bfb 100644 --- a/ghc/lib/glaExts/Addr.lhs +++ b/ghc/lib/std/PrelAddr.lhs @@ -2,21 +2,21 @@ % (c) The AQUA Project, Glasgow University, 1994-1996 % -\section[Addr]{Module @Addr@} +\section[PrelAddr]{Module @PrelAddr@} \begin{code} {-# OPTIONS -fno-implicit-prelude #-} -module Addr ( +module PrelAddr ( Addr(..), nullAddr, -- :: Addr plusAddr, -- :: Addr -> Int -> Addr ) where -import GHC +import PrelGHC import PrelBase -import STBase -import CCall +import PrelST +import PrelCCall \end{code} \begin{code} diff --git a/ghc/lib/ghc/ArrBase.lhs b/ghc/lib/std/PrelArr.lhs similarity index 99% rename from ghc/lib/ghc/ArrBase.lhs rename to ghc/lib/std/PrelArr.lhs index b80c0cd..806b932 100644 --- a/ghc/lib/ghc/ArrBase.lhs +++ b/ghc/lib/std/PrelArr.lhs @@ -1,25 +1,25 @@ % % (c) The AQUA Project, Glasgow University, 1994-1996 % -\section[ArrBase]{Module @ArrBase@} +\section[PrelArr]{Module @PrelArr@} -Array implementation, @ArrBase@ exports the basic array +Array implementation, @PrelArr@ exports the basic array types and operations. \begin{code} {-# OPTIONS -fno-implicit-prelude #-} -module ArrBase where +module PrelArr where -import {-# SOURCE #-} GHCerr ( error ) +import {-# SOURCE #-} PrelErr ( error ) import Ix import PrelList (foldl) -import STBase +import PrelST import PrelBase -import CCall -import Addr -import UnsafeST ( runST ) -import GHC +import PrelCCall +import PrelAddr +import PrelUnsafeST ( runST ) +import PrelGHC infixl 9 !, // \end{code} diff --git a/ghc/lib/ghc/PrelBase.lhs b/ghc/lib/std/PrelBase.lhs similarity index 99% rename from ghc/lib/ghc/PrelBase.lhs rename to ghc/lib/std/PrelBase.lhs index c8b4da1..3f5bc1d 100644 --- a/ghc/lib/ghc/PrelBase.lhs +++ b/ghc/lib/std/PrelBase.lhs @@ -9,12 +9,12 @@ module PrelBase( module PrelBase, - module GHC -- Re-export GHC, to avoid lots of people having - -- to import it explicitly + module PrelGHC -- Re-export PrelGHC, to avoid lots of people + -- having to import it explicitly ) where -import {-# SOURCE #-} GHCerr ( error ) -import GHC +import {-# SOURCE #-} PrelErr ( error ) +import PrelGHC infixr 9 . infixl 9 !! @@ -791,7 +791,7 @@ intToDigit :: Int -> Char intToDigit i | i >= 0 && i <= 9 = toEnum (fromEnum '0' + i) | i >= 10 && i <= 15 = toEnum (fromEnum 'a' + i -10) - | otherwise = error ("Char.intToDigit: not a digit: " ++ show i) -- .... + | otherwise = error "Char.intToDigit: not a digit" ++ show i) \end{code} diff --git a/ghc/lib/ghc/PrelBounded.lhs b/ghc/lib/std/PrelBounded.lhs similarity index 81% rename from ghc/lib/ghc/PrelBounded.lhs rename to ghc/lib/std/PrelBounded.lhs index 59dc439..3d1d0fd 100644 --- a/ghc/lib/ghc/PrelBounded.lhs +++ b/ghc/lib/std/PrelBounded.lhs @@ -21,6 +21,6 @@ instance Bounded Char where maxBound = '\255' instance Bounded Int where - minBound = I# -2147483648# -- GHC <= 2.09 had this at -2147483647 - maxBound = 2147483647 + minBound = -2147483648 -- GHC <= 2.09 had this at -2147483647 + maxBound = 2147483647 \end{code} diff --git a/ghc/lib/glaExts/CCall.lhs b/ghc/lib/std/PrelCCall.lhs similarity index 93% rename from ghc/lib/glaExts/CCall.lhs rename to ghc/lib/std/PrelCCall.lhs index f1205e8..6f886ff 100644 --- a/ghc/lib/glaExts/CCall.lhs +++ b/ghc/lib/std/PrelCCall.lhs @@ -2,18 +2,18 @@ % (c) The AQUA Project, Glasgow University, 1994-1996 % -\section[CCall]{Module @CCall@} +\section[PrelCCall]{Module @PrelCCall@} \begin{code} {-# OPTIONS -fno-implicit-prelude #-} -module CCall ( +module PrelCCall ( CCallable(..), CReturnable(..), Word(..) ) where import PrelBase -import GHC +import PrelGHC \end{code} %********************************************************* diff --git a/ghc/lib/ghc/ConcBase.lhs b/ghc/lib/std/PrelConc.lhs similarity index 91% rename from ghc/lib/ghc/ConcBase.lhs rename to ghc/lib/std/PrelConc.lhs index 96ebe29..04d6d60 100644 --- a/ghc/lib/ghc/ConcBase.lhs +++ b/ghc/lib/std/PrelConc.lhs @@ -2,13 +2,13 @@ % (c) The AQUA Project, Glasgow University, 1994-1996 % -\section[ConcBase]{Module @ConcBase@} +\section[PrelConc]{Module @PrelConc@} Basic concurrency stuff \begin{code} {-# OPTIONS -fno-implicit-prelude #-} -module ConcBase( +module PrelConc( -- Forking and suchlike ST, forkST, IO, forkIO, @@ -20,14 +20,14 @@ module ConcBase( ) where import PrelBase -import STBase ( ST(..), STret(..), StateAndPtr#(..) ) -import IOBase ( IO(..), IOResult(..), MVar(..) ) -import GHCerr ( parError ) -import PrelBase ( Int(..) ) -import GHC ( fork#, delay#, waitRead#, waitWrite#, - SynchVar#, newSynchVar#, takeMVar#, putMVar#, - State#, RealWorld, par# - ) +import PrelST ( ST(..), STret(..), StateAndPtr#(..) ) +import PrelIOBase ( IO(..), IOResult(..), MVar(..) ) +import PrelErr ( parError ) +import PrelBase ( Int(..) ) +import PrelGHC ( fork#, delay#, waitRead#, waitWrite#, + SynchVar#, newSynchVar#, takeMVar#, putMVar#, + State#, RealWorld, par# + ) infixr 0 `par`, `fork` \end{code} diff --git a/ghc/lib/ghc/PrelEither.lhs b/ghc/lib/std/PrelEither.lhs similarity index 100% rename from ghc/lib/ghc/PrelEither.lhs rename to ghc/lib/std/PrelEither.lhs diff --git a/ghc/lib/ghc/GHCerr.hi-boot b/ghc/lib/std/PrelErr.hi-boot similarity index 76% rename from ghc/lib/ghc/GHCerr.hi-boot rename to ghc/lib/std/PrelErr.hi-boot index 78bf038..6290349 100644 --- a/ghc/lib/ghc/GHCerr.hi-boot +++ b/ghc/lib/std/PrelErr.hi-boot @@ -1,12 +1,12 @@ --------------------------------------------------------------------------- --- GHCerr.hi-boot +-- PrelErr.hi-boot -- -- This hand-written interface file is the initial bootstrap version --- for GHCerr.hi. +-- for PrelErr.hi. -- It doesn't need to give "error" a type signature, -- because it's wired into the compiler --------------------------------------------------------------------------- -_interface_ GHerr 1 +_interface_ PrelErr 1 _exports_ -GHCerr error; +PrelErr error; diff --git a/ghc/lib/ghc/GHCerr.lhs b/ghc/lib/std/PrelErr.lhs similarity index 91% rename from ghc/lib/ghc/GHCerr.lhs rename to ghc/lib/std/PrelErr.lhs index 578fcac..643900e 100644 --- a/ghc/lib/ghc/GHCerr.lhs +++ b/ghc/lib/std/PrelErr.lhs @@ -2,18 +2,18 @@ % (c) The AQUA Project, Glasgow University, 1994-1996 % -\section[GHCerr]{Module @GHCerr@} +\section[PrelErr]{Module @PrelErr@} -The GHCerr module defines the code for the wired-in error functions, +The PrelErr module defines the code for the wired-in error functions, which have a special type in the compiler (with "open tyvars"). - + We cannot define these functions in a module where they might be used -(e.g., GHCbase), because the magical wired-in type will get confused +(e.g., PrelBase), because the magical wired-in type will get confused with what the typechecker figures out. \begin{code} {-# OPTIONS -fno-implicit-prelude #-} -module GHCerr +module PrelErr ( irrefutPatError @@ -33,10 +33,10 @@ module GHCerr --import Prelude import PrelBase -import IOBase -import Addr -import Foreign ( StablePtr, deRefStablePtr ) -import PrelList ( span ) +import PrelIOBase +import PrelAddr +import PrelForeign ( StablePtr, deRefStablePtr ) +import PrelList ( span ) --------------------------------------------------------------- @@ -123,12 +123,12 @@ seqError = error "Oops! Entered seqError (a GHC bug -- please report it!)\n" \begin{code} irrefutPatError - , noMethodBindingError + , noMethodBindingError --, noExplicitMethodError - , nonExhaustiveGuardsError - , patError - , recConError - , recUpdError :: String -> a + , nonExhaustiveGuardsError + , patError + , recConError + , recUpdError :: String -> a --noDefaultMethodError s = error ("noDefaultMethodError:"++s) --noExplicitMethodError s = error ("No default method for class operation "++s) diff --git a/ghc/lib/glaExts/Foreign.lhs b/ghc/lib/std/PrelForeign.lhs similarity index 96% rename from ghc/lib/glaExts/Foreign.lhs rename to ghc/lib/std/PrelForeign.lhs index 34d0990..7a5c6d2 100644 --- a/ghc/lib/glaExts/Foreign.lhs +++ b/ghc/lib/std/PrelForeign.lhs @@ -7,8 +7,8 @@ \begin{code} {-# OPTIONS -fno-implicit-prelude #-} -module Foreign ( - module Foreign, +module PrelForeign ( + module PrelForeign, #ifndef __PARALLEL_HASKELL__ ForeignObj(..), #endif @@ -22,13 +22,13 @@ module Foreign ( #endif ) where -import IOBase -import STBase -import Unsafe +import PrelIOBase +import PrelST +import PrelUnsafe import PrelBase -import CCall -import Addr -import GHC +import PrelCCall +import PrelAddr +import PrelGHC \end{code} diff --git a/ghc/lib/ghc/GHC.hi-boot b/ghc/lib/std/PrelGHC.hi-boot similarity index 99% rename from ghc/lib/ghc/GHC.hi-boot rename to ghc/lib/std/PrelGHC.hi-boot index bf73fbe..9d8a1b2 100644 --- a/ghc/lib/ghc/GHC.hi-boot +++ b/ghc/lib/std/PrelGHC.hi-boot @@ -5,9 +5,9 @@ -- primitive operations and types that GHC knows about. --------------------------------------------------------------------------- -_interface_ GHC 2 +_interface_ PrelGHC 2 _exports_ -GHC +PrelGHC -> All -- Pseudo class used for universal quantification diff --git a/ghc/lib/ghc/IOHandle.lhs b/ghc/lib/std/PrelHandle.lhs similarity index 98% rename from ghc/lib/ghc/IOHandle.lhs rename to ghc/lib/std/PrelHandle.lhs index a0d4f14..a597284 100644 --- a/ghc/lib/ghc/IOHandle.lhs +++ b/ghc/lib/std/PrelHandle.lhs @@ -2,7 +2,7 @@ % (c) The AQUA Project, Glasgow University, 1994-1996 % -\section[IOHandle]{Module @IOHandle@} +\section[PrelHandle]{Module @PrelHandle@} This module defines Haskell {\em handles} and the basic operations which are supported for them. @@ -12,30 +12,27 @@ which are supported for them. #include "error.h" -module IOHandle where +module PrelHandle where -import ST -import STBase -import ArrBase ( ByteArray(..), newVar, readVar, writeVar ) -import PrelRead ( Read ) -import PrelList (span) -import Ix -import IOBase -import Unsafe ( unsafePerformIO ) +import PrelST +import PrelArr ( ByteArray(..), newVar, readVar, writeVar ) +import PrelRead ( Read ) +import PrelList ( span ) +import PrelIOBase +import PrelUnsafe ( unsafePerformIO ) import PrelTup import PrelMaybe import PrelBase -import GHC -import Addr -import GHCerr ( error ) +import PrelAddr +import PrelErr ( error ) +import PrelGHC +import Ix #ifndef __PARALLEL_HASKELL__ -import Foreign ( ForeignObj, makeForeignObj, writeForeignObj ) +import PrelForeign ( ForeignObj, makeForeignObj, writeForeignObj ) #endif -#if defined(__CONCURRENT_HASKELL__) -import ConcBase -#endif +import PrelConc -- concurrent only \end{code} diff --git a/ghc/lib/ghc/PrelIO.lhs b/ghc/lib/std/PrelIO.lhs similarity index 98% rename from ghc/lib/ghc/PrelIO.lhs rename to ghc/lib/std/PrelIO.lhs index 24bf95b..2de7d3b 100644 --- a/ghc/lib/ghc/PrelIO.lhs +++ b/ghc/lib/std/PrelIO.lhs @@ -18,8 +18,8 @@ module PrelIO ( ) where import IO -import IOHandle -import IOBase +import PrelHandle +import PrelIOBase import PrelBase import PrelRead diff --git a/ghc/lib/ghc/IOBase.lhs b/ghc/lib/std/PrelIOBase.lhs similarity index 97% rename from ghc/lib/ghc/IOBase.lhs rename to ghc/lib/std/PrelIOBase.lhs index f23a25a..bcf6d7d 100644 --- a/ghc/lib/ghc/IOBase.lhs +++ b/ghc/lib/std/PrelIOBase.lhs @@ -2,7 +2,7 @@ % (c) The AQUA Project, Glasgow University, 1994-1996 % -\section[IOBase]{Module @IOBase@} +\section[PrelIOBase]{Module @PrelIOBase@} Definitions for the @IO@ monad and its friends. Everything is exported concretely; the @IO@ module itself exports abstractly. @@ -11,18 +11,17 @@ concretely; the @IO@ module itself exports abstractly. {-# OPTIONS -fno-implicit-prelude #-} #include "error.h" -module IOBase where +module PrelIOBase where -import {-# SOURCE #-} GHCerr ( error ) -import STBase +import {-# SOURCE #-} PrelErr ( error ) +import PrelST import PrelTup import PrelMaybe -import Addr -import PackBase ( unpackCString ) +import PrelAddr +import PrelPack ( unpackCString ) import PrelBase -import ArrBase ( ByteArray(..), MutableVar(..) ) - -import GHC +import PrelArr ( ByteArray(..), MutableVar(..) ) +import PrelGHC \end{code} diff --git a/ghc/lib/ghc/PrelList.lhs b/ghc/lib/std/PrelList.lhs similarity index 99% rename from ghc/lib/ghc/PrelList.lhs rename to ghc/lib/std/PrelList.lhs index df0e4fb..cae955e 100644 --- a/ghc/lib/ghc/PrelList.lhs +++ b/ghc/lib/std/PrelList.lhs @@ -22,7 +22,7 @@ module PrelList ( zip, zip3, zipWith, zipWith3, unzip, unzip3 ) where -import {-# SOURCE #-} GHCerr ( error ) +import {-# SOURCE #-} PrelErr ( error ) import PrelTup import PrelMaybe import PrelBase diff --git a/ghc/lib/ghc/GHCmain.lhs b/ghc/lib/std/PrelMain.lhs similarity index 81% rename from ghc/lib/ghc/GHCmain.lhs rename to ghc/lib/std/PrelMain.lhs index fa143b6..a64b361 100644 --- a/ghc/lib/ghc/GHCmain.lhs +++ b/ghc/lib/std/PrelMain.lhs @@ -2,14 +2,14 @@ % (c) The AQUA Project, Glasgow University, 1994-1997 % -\section[GHCmain]{Module @GHCmain@} +\section[PrelMain]{Module @PrelMain@} \begin{code} -module GHCmain( mainIO ) where +module PrelMain( mainIO ) where import Prelude import {-# SOURCE #-} qualified Main -- for type of "Main.main" -import GHCerr ( ioError ) +import PrelErr ( ioError ) \end{code} \begin{code} diff --git a/ghc/lib/ghc/PrelMaybe.lhs b/ghc/lib/std/PrelMaybe.lhs similarity index 100% rename from ghc/lib/ghc/PrelMaybe.lhs rename to ghc/lib/std/PrelMaybe.lhs diff --git a/ghc/lib/ghc/PrelNum.lhs b/ghc/lib/std/PrelNum.lhs similarity index 99% rename from ghc/lib/ghc/PrelNum.lhs rename to ghc/lib/std/PrelNum.lhs index 3c1e4fe..a562fac 100644 --- a/ghc/lib/ghc/PrelNum.lhs +++ b/ghc/lib/std/PrelNum.lhs @@ -20,16 +20,16 @@ It's rather big! module PrelNum where import PrelBase -import GHC -import {-# SOURCE #-} GHCerr ( error ) +import PrelGHC +import {-# SOURCE #-} PrelErr ( error ) import PrelList import PrelMaybe -import ArrBase ( Array, array, (!) ) -import Unsafe ( unsafePerformIO ) -import Ix ( Ix(..) ) -import CCall () -- we need the definitions of CCallable and CReturnable - -- for the _ccall_s herein. +import PrelArr ( Array, array, (!) ) +import PrelUnsafe ( unsafePerformIO ) +import Ix ( Ix(..) ) +import PrelCCall () -- we need the definitions of CCallable and + -- CReturnable for the _ccall_s herein. infixr 8 ^, ^^, ** diff --git a/ghc/lib/ghc/PackBase.lhs b/ghc/lib/std/PrelPack.lhs similarity index 97% rename from ghc/lib/ghc/PackBase.lhs rename to ghc/lib/std/PrelPack.lhs index 0f9dd04..39b4a23 100644 --- a/ghc/lib/ghc/PackBase.lhs +++ b/ghc/lib/std/PrelPack.lhs @@ -1,19 +1,19 @@ % % (c) The GRASP/AQUA Project, Glasgow University, 1997 % -\section[PackBase]{Packing/unpacking bytes} +\section[PrelPack]{Packing/unpacking bytes} This module provides a small set of low-level functions for packing and unpacking a chunk of bytes. Used by code emitted by the compiler plus the prelude libraries. -The programmer level view of packed strings is provided by a GHC system library -PackedString. +The programmer level view of packed strings is provided by a GHC +system library PackedString. \begin{code} {-# OPTIONS -fno-implicit-prelude #-} -module PackBase +module PrelPack ( -- (**) - emitted by compiler. @@ -47,12 +47,12 @@ module PackBase where import PrelBase -import {-# SOURCE #-} GHCerr ( error ) +import {-# SOURCE #-} PrelErr ( error ) import PrelList ( length ) -import STBase -import ArrBase -import Addr -import UnsafeST ( runST ) +import PrelST +import PrelArr +import PrelAddr +import PrelUnsafeST ( runST ) \end{code} diff --git a/ghc/lib/ghc/PrelRead.lhs b/ghc/lib/std/PrelRead.lhs similarity index 99% rename from ghc/lib/ghc/PrelRead.lhs rename to ghc/lib/std/PrelRead.lhs index 3b3e4c8..fd5ffaf 100644 --- a/ghc/lib/ghc/PrelRead.lhs +++ b/ghc/lib/std/PrelRead.lhs @@ -11,7 +11,7 @@ Instances of the Read class. module PrelRead where -import {-# SOURCE #-} GHCerr ( error ) +import {-# SOURCE #-} PrelErr ( error ) import PrelNum import PrelList import PrelTup diff --git a/ghc/lib/ghc/STBase.lhs b/ghc/lib/std/PrelST.lhs similarity index 96% rename from ghc/lib/ghc/STBase.lhs rename to ghc/lib/std/PrelST.lhs index 68dd330..b3b5411 100644 --- a/ghc/lib/ghc/STBase.lhs +++ b/ghc/lib/std/PrelST.lhs @@ -1,16 +1,16 @@ % % (c) The GRASP/AQUA Project, Glasgow University, 1992-1996 % -\section[STBase]{The @ST@ monad} +\section[PrelST]{The @ST@ monad} \begin{code} {-# OPTIONS -fno-implicit-prelude #-} -module STBase where +module PrelST where import Monad import PrelBase -import GHC +import PrelGHC \end{code} %********************************************************* diff --git a/ghc/lib/ghc/PrelTup.lhs b/ghc/lib/std/PrelTup.lhs similarity index 99% rename from ghc/lib/ghc/PrelTup.lhs rename to ghc/lib/std/PrelTup.lhs index 951d46d..daccfb8 100644 --- a/ghc/lib/ghc/PrelTup.lhs +++ b/ghc/lib/std/PrelTup.lhs @@ -11,7 +11,7 @@ This modules defines the typle data types. module PrelTup where -import {-# SOURCE #-} GHCerr ( error ) +import {-# SOURCE #-} PrelErr ( error ) import PrelBase \end{code} diff --git a/ghc/lib/ghc/Unsafe.lhs b/ghc/lib/std/PrelUnsafe.lhs similarity index 89% rename from ghc/lib/ghc/Unsafe.lhs rename to ghc/lib/std/PrelUnsafe.lhs index 653a5d0..775582c 100644 --- a/ghc/lib/ghc/Unsafe.lhs +++ b/ghc/lib/std/PrelUnsafe.lhs @@ -2,7 +2,7 @@ % (c) The AQUA Project, Glasgow University, 1994-1996 % -\section[Unsafe]{Module @Unsafe@} +\section[PrelUnsafe]{Module @PrelUnsafe@} These functions have their own module because we definitely don't want them to be inlined. @@ -10,7 +10,7 @@ them to be inlined. \begin{code} {-# OPTIONS -fno-implicit-prelude #-} -module Unsafe +module PrelUnsafe ( unsafePerformIO, unsafeInterleaveIO, trace, @@ -19,9 +19,9 @@ module Unsafe \begin{code} import PrelBase -import IOBase -import Addr -import {-# SOURCE #-} GHCerr ( error ) +import PrelIOBase +import PrelAddr +import {-# SOURCE #-} PrelErr ( error ) \end{code} %********************************************************* diff --git a/ghc/lib/ghc/UnsafeST.lhs b/ghc/lib/std/PrelUnsafeST.lhs similarity index 95% rename from ghc/lib/ghc/UnsafeST.lhs rename to ghc/lib/std/PrelUnsafeST.lhs index 5565178..17feed9 100644 --- a/ghc/lib/ghc/UnsafeST.lhs +++ b/ghc/lib/std/PrelUnsafeST.lhs @@ -10,9 +10,9 @@ them to be inlined. \begin{code} {-# OPTIONS -fno-implicit-prelude #-} -module UnsafeST (unsafeInterleaveST, runST) where +module PrelUnsafeST (unsafeInterleaveST, runST) where -import STBase +import PrelST import PrelBase \end{code} diff --git a/ghc/lib/required/Prelude.lhs b/ghc/lib/std/Prelude.lhs similarity index 98% rename from ghc/lib/required/Prelude.lhs rename to ghc/lib/std/Prelude.lhs index b386d66..000dd10 100644 --- a/ghc/lib/required/Prelude.lhs +++ b/ghc/lib/std/Prelude.lhs @@ -71,7 +71,7 @@ import PrelEither import PrelBounded import Monad import Maybe -import GHCerr ( error, seqError ) +import PrelErr ( error, seqError ) -- These can't conveniently be defined in PrelBase because they use numbers, -- or I/O, so here's a convenient place to do them. diff --git a/ghc/lib/required/Random.lhs b/ghc/lib/std/Random.lhs similarity index 100% rename from ghc/lib/required/Random.lhs rename to ghc/lib/std/Random.lhs diff --git a/ghc/lib/required/Ratio.lhs b/ghc/lib/std/Ratio.lhs similarity index 100% rename from ghc/lib/required/Ratio.lhs rename to ghc/lib/std/Ratio.lhs diff --git a/ghc/lib/required/System.lhs b/ghc/lib/std/System.lhs similarity index 96% rename from ghc/lib/required/System.lhs rename to ghc/lib/std/System.lhs index 806442e..ad0b66c 100644 --- a/ghc/lib/required/System.lhs +++ b/ghc/lib/std/System.lhs @@ -11,10 +11,10 @@ module System ( ) where import Prelude -import Addr -import IOBase ( IOError(..), IOErrorType(..), constructErrorAndFail ) -import ArrBase ( indexAddrOffAddr ) -import PackBase ( unpackCString ) +import PrelAddr +import PrelIOBase ( IOError(..), IOErrorType(..), constructErrorAndFail ) +import PrelArr ( indexAddrOffAddr ) +import PrelPack ( unpackCString ) \end{code} diff --git a/ghc/lib/required/Time.lhs b/ghc/lib/std/Time.lhs similarity index 98% rename from ghc/lib/required/Time.lhs rename to ghc/lib/std/Time.lhs index 1866a17..4ce9925 100644 --- a/ghc/lib/required/Time.lhs +++ b/ghc/lib/std/Time.lhs @@ -31,16 +31,15 @@ module Time ) where import PrelBase -import ST -import IOBase -import ArrBase -import STBase -import Unsafe ( unsafePerformIO ) -import ST +import PrelIOBase +import PrelArr +import PrelST +import PrelUnsafe ( unsafePerformIO ) +import PrelAddr +import PrelPack ( unpackCString ) + import Ix -import Addr -import Char ( intToDigit ) -import PackBase ( unpackCString ) +import Char ( intToDigit ) import Locale \end{code} diff --git a/ghc/lib/cbits/Makefile b/ghc/lib/std/cbits/Makefile similarity index 87% rename from ghc/lib/cbits/Makefile rename to ghc/lib/std/cbits/Makefile index 6759634..b330b62 100644 --- a/ghc/lib/cbits/Makefile +++ b/ghc/lib/std/cbits/Makefile @@ -1,6 +1,6 @@ -# $Id: Makefile,v 1.5 1997/08/25 22:40:59 sof Exp $ +# $Id: Makefile,v 1.1 1998/02/02 17:34:22 simonm Exp $ -TOP = ../.. +TOP = ../../.. include $(TOP)/mk/boilerplate.mk override WAYS= diff --git a/ghc/lib/cbits/closeFile.lc b/ghc/lib/std/cbits/closeFile.lc similarity index 100% rename from ghc/lib/cbits/closeFile.lc rename to ghc/lib/std/cbits/closeFile.lc diff --git a/ghc/lib/cbits/createDirectory.lc b/ghc/lib/std/cbits/createDirectory.lc similarity index 100% rename from ghc/lib/cbits/createDirectory.lc rename to ghc/lib/std/cbits/createDirectory.lc diff --git a/ghc/lib/cbits/errno.lc b/ghc/lib/std/cbits/errno.lc similarity index 100% rename from ghc/lib/cbits/errno.lc rename to ghc/lib/std/cbits/errno.lc diff --git a/ghc/lib/cbits/fileEOF.lc b/ghc/lib/std/cbits/fileEOF.lc similarity index 100% rename from ghc/lib/cbits/fileEOF.lc rename to ghc/lib/std/cbits/fileEOF.lc diff --git a/ghc/lib/cbits/fileGetc.lc b/ghc/lib/std/cbits/fileGetc.lc similarity index 100% rename from ghc/lib/cbits/fileGetc.lc rename to ghc/lib/std/cbits/fileGetc.lc diff --git a/ghc/lib/cbits/fileLookAhead.lc b/ghc/lib/std/cbits/fileLookAhead.lc similarity index 100% rename from ghc/lib/cbits/fileLookAhead.lc rename to ghc/lib/std/cbits/fileLookAhead.lc diff --git a/ghc/lib/cbits/filePosn.lc b/ghc/lib/std/cbits/filePosn.lc similarity index 100% rename from ghc/lib/cbits/filePosn.lc rename to ghc/lib/std/cbits/filePosn.lc diff --git a/ghc/lib/cbits/filePutc.lc b/ghc/lib/std/cbits/filePutc.lc similarity index 100% rename from ghc/lib/cbits/filePutc.lc rename to ghc/lib/std/cbits/filePutc.lc diff --git a/ghc/lib/cbits/fileSize.lc b/ghc/lib/std/cbits/fileSize.lc similarity index 100% rename from ghc/lib/cbits/fileSize.lc rename to ghc/lib/std/cbits/fileSize.lc diff --git a/ghc/lib/cbits/floatExtreme.h b/ghc/lib/std/cbits/floatExtreme.h similarity index 100% rename from ghc/lib/cbits/floatExtreme.h rename to ghc/lib/std/cbits/floatExtreme.h diff --git a/ghc/lib/cbits/floatExtreme.lc b/ghc/lib/std/cbits/floatExtreme.lc similarity index 100% rename from ghc/lib/cbits/floatExtreme.lc rename to ghc/lib/std/cbits/floatExtreme.lc diff --git a/ghc/lib/cbits/flushFile.lc b/ghc/lib/std/cbits/flushFile.lc similarity index 100% rename from ghc/lib/cbits/flushFile.lc rename to ghc/lib/std/cbits/flushFile.lc diff --git a/ghc/lib/cbits/freeFile.lc b/ghc/lib/std/cbits/freeFile.lc similarity index 100% rename from ghc/lib/cbits/freeFile.lc rename to ghc/lib/std/cbits/freeFile.lc diff --git a/ghc/lib/cbits/getBufferMode.lc b/ghc/lib/std/cbits/getBufferMode.lc similarity index 100% rename from ghc/lib/cbits/getBufferMode.lc rename to ghc/lib/std/cbits/getBufferMode.lc diff --git a/ghc/lib/cbits/getCPUTime.lc b/ghc/lib/std/cbits/getCPUTime.lc similarity index 100% rename from ghc/lib/cbits/getCPUTime.lc rename to ghc/lib/std/cbits/getCPUTime.lc diff --git a/ghc/lib/cbits/getClockTime.lc b/ghc/lib/std/cbits/getClockTime.lc similarity index 100% rename from ghc/lib/cbits/getClockTime.lc rename to ghc/lib/std/cbits/getClockTime.lc diff --git a/ghc/lib/cbits/getCurrentDirectory.lc b/ghc/lib/std/cbits/getCurrentDirectory.lc similarity index 100% rename from ghc/lib/cbits/getCurrentDirectory.lc rename to ghc/lib/std/cbits/getCurrentDirectory.lc diff --git a/ghc/lib/cbits/getDirectoryContents.lc b/ghc/lib/std/cbits/getDirectoryContents.lc similarity index 100% rename from ghc/lib/cbits/getDirectoryContents.lc rename to ghc/lib/std/cbits/getDirectoryContents.lc diff --git a/ghc/lib/cbits/getLock.lc b/ghc/lib/std/cbits/getLock.lc similarity index 100% rename from ghc/lib/cbits/getLock.lc rename to ghc/lib/std/cbits/getLock.lc diff --git a/ghc/lib/cbits/inputReady.lc b/ghc/lib/std/cbits/inputReady.lc similarity index 100% rename from ghc/lib/cbits/inputReady.lc rename to ghc/lib/std/cbits/inputReady.lc diff --git a/ghc/lib/cbits/openFile.lc b/ghc/lib/std/cbits/openFile.lc similarity index 100% rename from ghc/lib/cbits/openFile.lc rename to ghc/lib/std/cbits/openFile.lc diff --git a/ghc/lib/cbits/readFile.lc b/ghc/lib/std/cbits/readFile.lc similarity index 100% rename from ghc/lib/cbits/readFile.lc rename to ghc/lib/std/cbits/readFile.lc diff --git a/ghc/lib/cbits/removeDirectory.lc b/ghc/lib/std/cbits/removeDirectory.lc similarity index 100% rename from ghc/lib/cbits/removeDirectory.lc rename to ghc/lib/std/cbits/removeDirectory.lc diff --git a/ghc/lib/cbits/removeFile.lc b/ghc/lib/std/cbits/removeFile.lc similarity index 100% rename from ghc/lib/cbits/removeFile.lc rename to ghc/lib/std/cbits/removeFile.lc diff --git a/ghc/lib/cbits/renameDirectory.lc b/ghc/lib/std/cbits/renameDirectory.lc similarity index 100% rename from ghc/lib/cbits/renameDirectory.lc rename to ghc/lib/std/cbits/renameDirectory.lc diff --git a/ghc/lib/cbits/renameFile.lc b/ghc/lib/std/cbits/renameFile.lc similarity index 100% rename from ghc/lib/cbits/renameFile.lc rename to ghc/lib/std/cbits/renameFile.lc diff --git a/ghc/lib/cbits/seekFile.lc b/ghc/lib/std/cbits/seekFile.lc similarity index 100% rename from ghc/lib/cbits/seekFile.lc rename to ghc/lib/std/cbits/seekFile.lc diff --git a/ghc/lib/cbits/setBuffering.lc b/ghc/lib/std/cbits/setBuffering.lc similarity index 100% rename from ghc/lib/cbits/setBuffering.lc rename to ghc/lib/std/cbits/setBuffering.lc diff --git a/ghc/lib/cbits/setCurrentDirectory.lc b/ghc/lib/std/cbits/setCurrentDirectory.lc similarity index 100% rename from ghc/lib/cbits/setCurrentDirectory.lc rename to ghc/lib/std/cbits/setCurrentDirectory.lc diff --git a/ghc/lib/cbits/showTime.lc b/ghc/lib/std/cbits/showTime.lc similarity index 100% rename from ghc/lib/cbits/showTime.lc rename to ghc/lib/std/cbits/showTime.lc diff --git a/ghc/lib/cbits/stgio.h b/ghc/lib/std/cbits/stgio.h similarity index 100% rename from ghc/lib/cbits/stgio.h rename to ghc/lib/std/cbits/stgio.h diff --git a/ghc/lib/cbits/system.lc b/ghc/lib/std/cbits/system.lc similarity index 69% rename from ghc/lib/cbits/system.lc rename to ghc/lib/std/cbits/system.lc index ce99a11..013f111 100644 --- a/ghc/lib/cbits/system.lc +++ b/ghc/lib/std/cbits/system.lc @@ -24,21 +24,6 @@ StgInt systemCmd(cmd) StgByteArray cmd; { -#if defined(cygwin32_TARGET_OS) - /* The implementation of std. fork() has its problems - under cygwin32-b18, so we fall back on using libc's - system() instead. (It in turn has problems, as it - does not wait until the sub shell has finished before - returning. Using sleep() works around that.) - */ - if (system(cmd) < 0) { - cvtErrno(); - stdErrno(); - return -1; - } - sleep(1); - return 0; -#else int pid; int wstat; @@ -75,7 +60,6 @@ StgByteArray cmd; ghc_errstr = "internal error (process neither exited nor signalled)"; } return -1; -#endif /* ! cygwin32_TARGET_OS */ } \end{code} diff --git a/ghc/lib/cbits/timezone.h b/ghc/lib/std/cbits/timezone.h similarity index 100% rename from ghc/lib/cbits/timezone.h rename to ghc/lib/std/cbits/timezone.h diff --git a/ghc/lib/cbits/toClockSec.lc b/ghc/lib/std/cbits/toClockSec.lc similarity index 100% rename from ghc/lib/cbits/toClockSec.lc rename to ghc/lib/std/cbits/toClockSec.lc diff --git a/ghc/lib/cbits/toLocalTime.lc b/ghc/lib/std/cbits/toLocalTime.lc similarity index 100% rename from ghc/lib/cbits/toLocalTime.lc rename to ghc/lib/std/cbits/toLocalTime.lc diff --git a/ghc/lib/cbits/toUTCTime.lc b/ghc/lib/std/cbits/toUTCTime.lc similarity index 100% rename from ghc/lib/cbits/toUTCTime.lc rename to ghc/lib/std/cbits/toUTCTime.lc diff --git a/ghc/lib/cbits/writeFile.lc b/ghc/lib/std/cbits/writeFile.lc similarity index 100% rename from ghc/lib/cbits/writeFile.lc rename to ghc/lib/std/cbits/writeFile.lc diff --git a/ghc/runtime/main/TopClosure.lc b/ghc/runtime/main/TopClosure.lc index 1ffa934..2e3605e 100644 --- a/ghc/runtime/main/TopClosure.lc +++ b/ghc/runtime/main/TopClosure.lc @@ -2,7 +2,7 @@ \begin{code} #include "rtsdefs.h" -EXTDATA(GHCmain_mainIO_closure); +EXTDATA(PrelMain_mainIO_closure); -P_ TopClosure = GHCmain_mainIO_closure; +P_ TopClosure = PrelMain_mainIO_closure; \end{code} diff --git a/ghc/utils/mkdependHS/Makefile b/ghc/utils/mkdependHS/Makefile index 9b39b35..feffac4 100644 --- a/ghc/utils/mkdependHS/Makefile +++ b/ghc/utils/mkdependHS/Makefile @@ -48,7 +48,7 @@ install :: $(RM) $(SCRIPT_PROG) $(MAKE) $(MFLAGS) INSTALLING=1 $(SCRIPT_PROG) -CLEAN_FILES += $(SCRIPT_PROG) +CLEAN_FILES += $(SCRIPT_PROG) $(SCRIPT_LINK) include $(TOP)/mk/target.mk diff --git a/ghc/utils/mkdependHS/mkdependHS.prl b/ghc/utils/mkdependHS/mkdependHS.prl index 79ca346..c8f6daf 100644 --- a/ghc/utils/mkdependHS/mkdependHS.prl +++ b/ghc/utils/mkdependHS/mkdependHS.prl @@ -117,11 +117,8 @@ push(@Defines, # set up array of ignored modules local(@dirs) = ($INSTALLING) ? - ("$InstLibDirGhc/imports") - : ("$TopPwd/ghc/lib/ghc", - "$TopPwd/ghc/lib/required", - "$TopPwd/ghc/lib/glaExts", - "$TopPwd/ghc/lib/concurrent"); + ("$InstLibDirGhc/imports/std") + : ("$TopPwd/ghc/lib/std"); if (!$Include_prelude) { push(@Ignore_dirs, @dirs); } else { @@ -130,8 +127,8 @@ if (!$Include_prelude) { foreach $lib ( @Syslibs ) { local($dir) = - ($INSTALLING) ? "${InstHsLibDirGhc}/${lib}/imports" - : "${TopPwd}/hslibs/${lib}/src"; + ($INSTALLING) ? "${InstLibDirGhc}/imports/${lib}" + : "${TopPwd}/ghc/lib/${lib}"; if (!$Include_prelude) { push(@Ignore_dirs,$dir); } else { @@ -248,6 +245,8 @@ sub mangle_command_line_args { $Include_dirs .= " $_"; } elsif ( /^-syslib$/ ) { push(@Syslibs, &grab_arg_arg($_,'')); + } elsif ( /^-fglasgow-exts$/ ) { + push(@Syslibs, 'exts'); } elsif ($Dashdashes_seen != 1) { # not between -- ... -- if ( /^-v$/ ) { $Verbose++;